diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 7f212ac48e..bde36aa896 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -303,6 +303,7 @@ include(Packages/USER-QUIP) include(Packages/USER-QMMM) include(Packages/USER-VTK) include(Packages/KIM) +include(Packages/LATTE) include(Packages/MESSAGE) include(Packages/MSCG) include(Packages/COMPRESS) diff --git a/doc/src/Commands_all.txt b/doc/src/Commands_all.txt index d4762e0f76..d0d7657c07 100644 --- a/doc/src/Commands_all.txt +++ b/doc/src/Commands_all.txt @@ -50,11 +50,11 @@ An alphabetic list of all general LAMMPS commands. "dump"_dump.html, "dump adios"_dump_adios.html, "dump image"_dump_image.html, -"dump_modify"_dump_modify.html, "dump movie"_dump_image.html, "dump netcdf"_dump_netcdf.html, "dump netcdf/mpiio"_dump_netcdf.html, "dump vtk"_dump_vtk.html, +"dump_modify"_dump_modify.html, "dynamical_matrix"_dynamical_matrix.html, "echo"_echo.html, "fix"_fix.html, diff --git a/doc/src/Commands_pair.txt b/doc/src/Commands_pair.txt index fea085b4ed..6077fad8ec 100644 --- a/doc/src/Commands_pair.txt +++ b/doc/src/Commands_pair.txt @@ -222,6 +222,8 @@ OPT. "sph/rhosum"_pair_sph_rhosum.html, "sph/taitwater"_pair_sph_taitwater.html, "sph/taitwater/morris"_pair_sph_taitwater_morris.html, +"spin/dipole/cut"_pair_spin_dipole.html, +"spin/dipole/long"_pair_spin_dipole.html, "spin/dmi"_pair_spin_dmi.html, "spin/exchange"_pair_spin_exchange.html, "spin/magelec"_pair_spin_magelec.html, diff --git a/doc/src/Eqs/angle_class2_p6.tex b/doc/src/Eqs/angle_class2_p6.tex new file mode 100644 index 0000000000..37fd87e9ec --- /dev/null +++ b/doc/src/Eqs/angle_class2_p6.tex @@ -0,0 +1,15 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +$$ + E_{a} = K_2\left(\theta - \theta_0\right)^2 + K_3\left(\theta - \theta_0\right)^3 + K_4\left(\theta - \theta_0\right)^4 + K_5\left(\theta - \theta_0\right)^5 + K_6\left(\theta - \theta_0\right)^6 +$$ + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Eqs/angle_cosine_buck6d.tex b/doc/src/Eqs/angle_cosine_buck6d.tex new file mode 100644 index 0000000000..49be2fc8c2 --- /dev/null +++ b/doc/src/Eqs/angle_cosine_buck6d.tex @@ -0,0 +1,15 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +$$ + E = K \left[ 1 + \cos(n\theta - \theta_0)\right] +$$ + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Eqs/improper_inversion_harmonic.tex b/doc/src/Eqs/improper_inversion_harmonic.tex new file mode 100644 index 0000000000..a1607a1149 --- /dev/null +++ b/doc/src/Eqs/improper_inversion_harmonic.tex @@ -0,0 +1,15 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +$$ + E = K \left(\theta - \theta_0\right)^2 +$$ + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Eqs/pair_agni.tex b/doc/src/Eqs/pair_agni.tex new file mode 100644 index 0000000000..b9aa7882fc --- /dev/null +++ b/doc/src/Eqs/pair_agni.tex @@ -0,0 +1,18 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +\begin{eqnarray*} + F_i^u & = & \sum_t^{N_t}\alpha_t \cdot \exp\left[-\frac{\left(d_{i,t}^u\right)^2}{2l^2}\right] \\ + d_{i,t}^u & = & \left|\left| V_i^u(\eta) - V_t^u(\eta) \right|\right| \\ + V_i^u(\eta) & = & \sum_{j \neq i}\frac{r^u_{ij}}{r_{ij}} \cdot e^{-\left(\frac{r_{ij}}{\eta} \right)^2} \cdot f_d\left(r_{ij}\right) \\ + f_d\left(r_{ij}\right) & = & \frac{1}{2} \left[\cos\left(\frac{\pi r_{ij}}{R_c}\right) + 1 \right] +\end{eqnarray*} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Eqs/pair_buck6d.txt b/doc/src/Eqs/pair_buck6d.tex similarity index 91% rename from doc/src/Eqs/pair_buck6d.txt rename to doc/src/Eqs/pair_buck6d.tex index 4888444d8c..903c0685be 100644 --- a/doc/src/Eqs/pair_buck6d.txt +++ b/doc/src/Eqs/pair_buck6d.tex @@ -1,6 +1,7 @@ \documentclass[12pt]{article} \begin{document} +\pagestyle{empty} \begin{eqnarray*} E = A e^{-\kappa r} - \frac{C}{r^6} \cdot \frac{1}{1 + D r^{14}} \qquad r < r_c \\ diff --git a/doc/src/Eqs/pair_coul_gauss.tex b/doc/src/Eqs/pair_coul_gauss.tex new file mode 100644 index 0000000000..1eb9c05a6f --- /dev/null +++ b/doc/src/Eqs/pair_coul_gauss.tex @@ -0,0 +1,15 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +$$ + E = \frac{C_{q_i q_j}}{\epsilon r_{ij}}\,\, \textrm{erf}\left(\alpha_{ij} r_{ij}\right)\quad\quad\quad r < r_c +$$ + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Install_linux.txt b/doc/src/Install_linux.txt index ec063e7a95..9aebd30c05 100644 --- a/doc/src/Install_linux.txt +++ b/doc/src/Install_linux.txt @@ -15,7 +15,8 @@ Binaries are available for different versions of Linux: "Pre-built Fedora Linux executables"_#fedora "Pre-built EPEL Linux executables (RHEL, CentOS)"_#epel "Pre-built OpenSuse Linux executables"_#opensuse -"Gentoo Linux executable"_#gentoo :all(b) +"Gentoo Linux executable"_#gentoo +"Arch Linux build-script"_#arch :all(b) :line @@ -168,3 +169,31 @@ for details. Thanks to Nicolas Bock and Christoph Junghans (LANL) for setting up this Gentoo capability. + +:line + +Archlinux build-script :h4,link(arch) + +LAMMPS is available via Arch's unofficial Arch User repository (AUR). + +There are three scripts available, named lammps, lammps-beta and lammps-git. +They respectively package the stable, patch and git releases. + +To install, you will need to have the git package installed. You may use +any of the above names in-place of lammps. + +$ git clone https://aur.archlinux.org/lammps.git :pre +$ cd lammps :pre +$ makepkg -s :pre +# makepkg -i :pre + +To update, you may repeat the above, or change into the cloned directory, +and execute the following, after which, if there are any changes, you may +use makepkg as above. + +$ git pull :pre + +Alternatively, you may use an AUR helper to install these packages. + +Note that the AUR provides build-scripts that download the source and +the build the package on your machine. diff --git a/doc/src/Packages_details.txt b/doc/src/Packages_details.txt index b24584e540..43274a9606 100644 --- a/doc/src/Packages_details.txt +++ b/doc/src/Packages_details.txt @@ -927,6 +927,8 @@ the usual manner via MD. Various pair, fix, and compute styles. src/SPIN: filenames -> commands "Howto spins"_Howto_spins.html +"pair_style spin/dipole/cut"_pair_spin_dipole.html +"pair_style spin/dipole/long"_pair_spin_dipole.html "pair_style spin/dmi"_pair_spin_dmi.html "pair_style spin/exchange"_pair_spin_exchange.html "pair_style spin/magelec"_pair_spin_magelec.html diff --git a/doc/src/lammps.book b/doc/src/lammps.book index ef6be9dd36..79b410c783 100644 --- a/doc/src/lammps.book +++ b/doc/src/lammps.book @@ -648,6 +648,7 @@ pair_sph_lj.html pair_sph_rhosum.html pair_sph_taitwater.html pair_sph_taitwater_morris.html +pair_spin_dipole.html pair_spin_dmi.html pair_spin_exchange.html pair_spin_magelec.html diff --git a/doc/src/pair_class2.txt b/doc/src/pair_class2.txt index 2d6b325fed..9e25560071 100644 --- a/doc/src/pair_class2.txt +++ b/doc/src/pair_class2.txt @@ -155,7 +155,7 @@ All of the lj/class2 pair styles write their information to "binary restart files"_restart.html, so pair_style and pair_coeff commands do not need to be specified in an input script that reads a restart file. -Only the {lj/class2} pair style support the use of the +Only the {lj/class2} and {lj/class2/coul/long} pair styles support the use of the {inner}, {middle}, and {outer} keywords of the "run_style respa"_run_style.html command, meaning the pairwise forces can be partitioned by distance at different levels of the rRESPA hierarchy. diff --git a/doc/src/pair_modify.txt b/doc/src/pair_modify.txt index 4824a3d83e..c446aa29d0 100644 --- a/doc/src/pair_modify.txt +++ b/doc/src/pair_modify.txt @@ -13,7 +13,8 @@ pair_modify command :h3 pair_modify keyword values ... :pre one or more keyword/value pairs may be listed :ulb,l -keyword = {pair} or {shift} or {mix} or {table} or {table/disp} or {tabinner} or {tabinner/disp} or {tail} or {compute} :l +keyword = {pair} or {shift} or {mix} or {table} or {table/disp} or {tabinner} +or {tabinner/disp} or {tail} or {compute} or {nofdotr} :l {pair} values = sub-style N {special} which wt1 wt2 wt3 or sub-style N {compute/tally} flag sub-style = sub-style of "pair hybrid"_pair_hybrid.html @@ -33,7 +34,8 @@ keyword = {pair} or {shift} or {mix} or {table} or {table/disp} or {tabinner} or {tabinner/disp} value = cutoff cutoff = inner cutoff at which to begin table (distance units) {tail} value = {yes} or {no} - {compute} value = {yes} or {no} :pre + {compute} value = {yes} or {no} + {nofdotr} :pre :ule [Examples:] @@ -212,6 +214,10 @@ a pair style will not work, because the "kspace_style"_kspace_style.html command requires a Kspace-compatible pair style be defined. +The {nofdotr} keyword allows to disable an optimization that computes +the global stress tensor from the total forces and atom positions rather +than from summing forces between individual pairs of atoms. + :line The {special} keyword allows to override the 1-2, 1-3, and 1-4 diff --git a/doc/src/pair_style.txt b/doc/src/pair_style.txt index e305bc705d..8a35e5a467 100644 --- a/doc/src/pair_style.txt +++ b/doc/src/pair_style.txt @@ -284,6 +284,8 @@ accelerated styles exist. "sph/rhosum"_pair_sph_rhosum.html - "sph/taitwater"_pair_sph_taitwater.html - "sph/taitwater/morris"_pair_sph_taitwater_morris.html - +"spin/dipole/cut"_pair_spin_dipole.html - +"spin/dipole/long"_pair_spin_dipole.html - "spin/dmi"_pair_spin_dmi.html - "spin/exchange"_pair_spin_exchange.html - "spin/magelec"_pair_spin_magelec.html - diff --git a/doc/src/pairs.txt b/doc/src/pairs.txt index babdd2d1cc..2f63f18bad 100644 --- a/doc/src/pairs.txt +++ b/doc/src/pairs.txt @@ -105,6 +105,7 @@ Pair Styles :h1 pair_sph_rhosum pair_sph_taitwater pair_sph_taitwater_morris + pair_spin_dipole pair_spin_dmi pair_spin_exchange pair_spin_magelec diff --git a/doc/utils/sphinx-config/_themes/lammps_theme/static/css/theme.css b/doc/utils/sphinx-config/_themes/lammps_theme/static/css/theme.css index 58c5bec697..5e0d43b128 100644 --- a/doc/utils/sphinx-config/_themes/lammps_theme/static/css/theme.css +++ b/doc/utils/sphinx-config/_themes/lammps_theme/static/css/theme.css @@ -5092,4 +5092,17 @@ span[id*='MathJax-Span'] { src: local("Roboto Slab Bold"), local("RobotoSlab-Bold"), url(../fonts/RobotoSlab-Bold.ttf) format("truetype"); } +.codeblock, pre.literal-block, .rst-content .literal-block, .rst-content pre.literal-block, div[class^='highlight'] { + font-size: 12px; + line-height: 1.5; + display: block; + overflow: auto; + color: #404040; + padding: 12px 12px; +} + +.codeblock,div[class^='highlight'] { + padding: 0; +} + /*# sourceMappingURL=theme.css.map */ diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index bc50d4ca09..e168235565 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -101,6 +101,7 @@ api Appl Apu arccos +Archlinux arcsin arg args @@ -1531,6 +1532,7 @@ Makefile makefiles Makefiles makelist +makepkg Makse malloc Malolepsza diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 5534d32e5f..9410cc5250 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -17,6 +17,7 @@ #include "lal_precision.h" #include #include +#include #ifdef _OPENMP #include #endif diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 9d503663ae..8d196e2c35 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,39 @@ # Change Log +## [2.9.00](https://github.com/kokkos/kokkos/tree/2.9.00) (2019-06-24) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.8.00...2.9.00) + +**Implemented enhancements:** + +- Capability: CUDA Streams [\#1723](https://github.com/kokkos/kokkos/issues/1723) +- Capability: CUDA Stream support for parallel\_reduce [\#2061](https://github.com/kokkos/kokkos/issues/2061) +- Capability: Feature Request: TeamVectorRange [\#713](https://github.com/kokkos/kokkos/issues/713) +- Capability: Adding HPX backend [\#2080](https://github.com/kokkos/kokkos/issues/2080) +- Capability: TaskScheduler to have multiple queues [\#565](https://github.com/kokkos/kokkos/issues/565) +- Capability: Support for additional reductions in ScatterView [\#1674](https://github.com/kokkos/kokkos/issues/1674) +- Capability: Request: deep\_copy within parallel regions [\#689](https://github.com/kokkos/kokkos/issues/689) +- Capability: Feature Request: `create\_mirror\_view\_without\_initializing` [\#1765](https://github.com/kokkos/kokkos/issues/1765) +- View: Use SFINAE to restrict possible View type conversions [\#2127](https://github.com/kokkos/kokkos/issues/2127) +- Deprecation: Deprecate ExecutionSpace::fence\(\) as static function and make it non-static [\#2140](https://github.com/kokkos/kokkos/issues/2140) +- Deprecation: Deprecate LayoutTileLeft [\#2122](https://github.com/kokkos/kokkos/issues/2122) +- Macros: KOKKOS\_RESTRICT defined for non-Intel compilers [\#2038](https://github.com/kokkos/kokkos/issues/2038) + +**Fixed bugs:** + +- Cuda: TeamThreadRange loop count on device is passed by reference to host static constexpr [\#1733](https://github.com/kokkos/kokkos/issues/1733) +- Cuda: Build error with relocatable device code with CUDA 10.1 GCC 7.3 [\#2134](https://github.com/kokkos/kokkos/issues/2134) +- Cuda: cudaFuncSetCacheConfig is setting CachePreferShared too often [\#2066](https://github.com/kokkos/kokkos/issues/2066) +- Cuda: TeamPolicy doesn't throw then created with non-viable vector length and also doesn't backscale to viable one [\#2020](https://github.com/kokkos/kokkos/issues/2020) +- Cuda: cudaMemcpy error for large league sizes on V100 [\#1991](https://github.com/kokkos/kokkos/issues/1991) +- Cuda: illegal warp sync in parallel\_reduce by functor on Turing 75 [\#1958](https://github.com/kokkos/kokkos/issues/1958) +- TeamThreadRange: Inconsistent results from TeamThreadRange reduction [\#1905](https://github.com/kokkos/kokkos/issues/1905) +- Atomics: atomic\_fetch\_oper & atomic\_oper\_fetch don't build for complex\ [\#1964](https://github.com/kokkos/kokkos/issues/1964) +- Views: Kokkos randomread Views leak memory [\#2155](https://github.com/kokkos/kokkos/issues/2155) +- ScatterView: LayoutLeft overload currently non-functional [\#2165](https://github.com/kokkos/kokkos/issues/2165) +- KNL: With intel 17.2.174 illegal instruction in random number test [\#2078](https://github.com/kokkos/kokkos/issues/2078) +- Bitset: Enable copy constructor on device [\#2094](https://github.com/kokkos/kokkos/issues/2094) +- Examples: do not compile due to template deduction error \(multi\_fem\) [\#1928](https://github.com/kokkos/kokkos/issues/1928) + ## [2.8.00](https://github.com/kokkos/kokkos/tree/2.8.00) (2019-02-05) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.24...2.8.00) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index a90e86b9f8..e9ad57f0ae 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -23,7 +23,7 @@ KOKKOS_DEBUG ?= "no" KOKKOS_USE_TPLS ?= "" # Options: c++11,c++14,c++1y,c++17,c++1z,c++2a KOKKOS_CXX_STANDARD ?= "c++11" -# Options: aggressive_vectorization,disable_profiling,disable_deprecated_code,enable_large_mem_tests +# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests KOKKOS_OPTIONS ?= "" # Option for setting ETI path KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti @@ -33,11 +33,19 @@ KOKKOS_CMAKE ?= "no" # Options: force_uvm,use_ldg,rdc,enable_lambda KOKKOS_CUDA_OPTIONS ?= "enable_lambda" +# Default settings specific options. +# Options: enable_async_dispatch +KOKKOS_HPX_OPTIONS ?= "" + # Return a 1 if a string contains a substring and 0 if not # Note the search string should be without '"' # Example: $(call kokkos_has_string,"hwloc,librt",hwloc) # Will return a 1 kokkos_has_string=$(if $(findstring $2,$1),1,0) +# Returns 1 if the path exists, 0 otherwise +# Example: $(call kokkos_path_exists,/path/to/file) +# Will return a 1 if /path/to/file exists +kokkos_path_exists=$(if $(wildcard $1),1,0) # Check for general settings. KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes) @@ -58,6 +66,7 @@ KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OP KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization) KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling) KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) +KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecated_code) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print) KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_large_mem_tests) @@ -65,6 +74,7 @@ KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS), KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm) KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) +KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti) @@ -72,12 +82,15 @@ KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_ KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread) KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads) +KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX) KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 + endif endif endif endif @@ -112,7 +125,7 @@ KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2 KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l)) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) -KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin) +KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) # Check Host Compiler if using NVCC through nvcc_wrapper @@ -283,9 +296,9 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLE + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ - + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53)) @@ -300,19 +313,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ - + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53)) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) - ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) - CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) + CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH) endif endif @@ -441,6 +454,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_QTHREADS") endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX") +endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_SERIAL") endif @@ -559,9 +576,15 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING") endif -ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) + ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") + endif + ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") + endif endif + ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_ETI") endif @@ -593,8 +616,13 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") - KOKKOS_CXXFLAGS += --relocatable-device-code=true - KOKKOS_LDFLAGS += --relocatable-device-code=true + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_CXXFLAGS += -fcuda-rdc + KOKKOS_LDFLAGS += -fcuda-rdc + else + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true + endif endif ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) @@ -625,6 +653,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH") + endif +endif + # Add Architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) @@ -908,7 +942,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch KOKKOS_CXXFLAGS += -x cuda else - $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang) + $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) ) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) @@ -1058,10 +1092,18 @@ endif ifneq ($(KOKKOS_CMAKE), yes) KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include endif - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 - KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 + ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib64), 1) + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 + else ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib), 1) + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib + KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib + KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib + else + $(error Can't find CUDA library directory: no lib64 or lib directory in $(CUDA_PATH)) + endif KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) endif @@ -1124,6 +1166,33 @@ ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) KOKKOS_TPL_LIBRARY_NAMES += qthread endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) + ifneq ($(HPX_PATH),) + ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) + KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application_debug) + KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) + KOKKOS_LDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) + else + KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application) + KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) + KOKKOS_LDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) + endif + else + ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) + KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application_debug) + KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application_debug) + KOKKOS_LDFLAGS += $(shell pkg-config --libs hpx_application_debug) + else + KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application) + KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application) + KOKKOS_LDFLAGS += $(shell pkg-config --libs hpx_application) + endif + endif + KOKKOS_TPL_LIBRARY_NAMES += hpx +endif + # Explicitly set the GCC Toolchain for Clang. ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 44da1e082a..e7d5a3c907 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -30,6 +30,8 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp +Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) @@ -38,8 +40,8 @@ endif endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) -Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp +Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -92,6 +94,13 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) +Kokkos_HPX.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp +Kokkos_HPX_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp +endif + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp diff --git a/lib/kokkos/algorithms/cmake/Dependencies.cmake b/lib/kokkos/algorithms/cmake/Dependencies.cmake index c36b62523f..1b41310681 100644 --- a/lib/kokkos/algorithms/cmake/Dependencies.cmake +++ b/lib/kokkos/algorithms/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( LIB_REQUIRED_PACKAGES KokkosCore KokkosContainers - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC HPX TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 8bdd876723..7fb8505fe5 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -328,6 +328,8 @@ public: parallel_for("Kokkos::Sort::Copy", Kokkos::RangePolicy(0,len),functor); } + + Kokkos::fence(); } template diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt index f5aa24e9be..e238b37c8e 100644 --- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt +++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -42,6 +42,12 @@ IF(Kokkos_ENABLE_OpenMP) ) ENDIF() +IF(Kokkos_ENABLE_HPX) + LIST( APPEND SOURCES + TestHPX.cpp + ) +ENDIF() + IF(Kokkos_ENABLE_Serial) LIST( APPEND SOURCES TestSerial.cpp diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index b5848c451e..3c862d03dc 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -49,6 +49,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) TEST_TARGETS += test-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + OBJ_HPX = TestHPX.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_HPX + TEST_TARGETS += test-hpx +endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o TARGETS += KokkosAlgorithms_UnitTest_Serial @@ -67,6 +73,9 @@ KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_OpenMP +KokkosAlgorithms_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_HPX + KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Serial @@ -82,6 +91,9 @@ test-threads: KokkosAlgorithms_UnitTest_Threads test-openmp: KokkosAlgorithms_UnitTest_OpenMP ./KokkosAlgorithms_UnitTest_OpenMP +test-hpx: KokkosAlgorithms_UnitTest_HPX + ./KokkosAlgorithms_UnitTest_HPX + test-serial: KokkosAlgorithms_UnitTest_Serial ./KokkosAlgorithms_UnitTest_Serial diff --git a/lib/kokkos/algorithms/unit_tests/TestHPX.cpp b/lib/kokkos/algorithms/unit_tests/TestHPX.cpp new file mode 100644 index 0000000000..e5b7dbdb7a --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestHPX.cpp @@ -0,0 +1,96 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#include +#ifdef KOKKOS_ENABLE_HPX + +#include +#include + +//---------------------------------------------------------------------------- +#include +#include +#include + +namespace Test { + +class hpx : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + } + + static void TearDownTestCase() + { + } +}; + +#define HPX_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( hpx, Random_XorShift64 ) { \ + Impl::test_random >(num_draws); \ + } + +#define HPX_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( hpx, Random_XorShift1024 ) { \ + Impl::test_random >(num_draws); \ + } + +#define HPX_SORT_UNSIGNED( size ) \ + TEST_F( hpx, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Experimental::HPX, unsigned >(size); \ + } + +HPX_RANDOM_XORSHIFT64( 10240000 ) +HPX_RANDOM_XORSHIFT1024( 10130144 ) +HPX_SORT_UNSIGNED(171) + +#undef HPX_RANDOM_XORSHIFT64 +#undef HPX_RANDOM_XORSHIFT1024 +#undef HPX_SORT_UNSIGNED +} // namespace test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTHPX_PREVENT_LINK_ERROR() {} +#endif + diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index e0c646c199..5fd7f09b50 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -225,9 +225,9 @@ void test_dynamic_view_sort(unsigned int n ) Kokkos::Random_XorShift64_Pool g(1931); Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); - ExecutionSpace::fence(); + ExecutionSpace().fence(); Kokkos::deep_copy(keys,keys_view); - //ExecutionSpace::fence(); + //ExecutionSpace().fence(); double sum_before = 0.0; double sum_after = 0.0; @@ -237,9 +237,9 @@ void test_dynamic_view_sort(unsigned int n ) Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); - ExecutionSpace::fence(); // Need this fence to prevent BusError with Cuda + ExecutionSpace().fence(); // Need this fence to prevent BusError with Cuda Kokkos::deep_copy( keys_view , keys ); - //ExecutionSpace::fence(); + //ExecutionSpace().fence(); Kokkos::parallel_reduce(n,sum(keys_view),sum_after); Kokkos::parallel_reduce(n-1,is_sorted_struct(keys_view),sort_fails); diff --git a/lib/kokkos/cmake/kokkos_build.cmake b/lib/kokkos/cmake/kokkos_build.cmake index 8178483d01..f9b995baae 100644 --- a/lib/kokkos/cmake/kokkos_build.cmake +++ b/lib/kokkos/cmake/kokkos_build.cmake @@ -76,8 +76,20 @@ IF(KOKKOS_SEPARATE_LIBS) ) foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if ("${lib}" STREQUAL "cuda") + if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) set(LIB_cuda "-lcuda") + elseif ("${lib}" STREQUAL "hpx") + find_package(HPX REQUIRED) + if(${HPX_FOUND}) + target_link_libraries(kokkoscore PUBLIC ${HPX_LIBRARIES}) + target_link_libraries(kokkoscontainers PUBLIC ${HPX_LIBRARIES}) + target_link_libraries(kokkosalgorithms PUBLIC ${HPX_LIBRARIES}) + target_include_directories(kokkoscore PUBLIC ${HPX_INCLUDE_DIRS}) + target_include_directories(kokkoscontainers PUBLIC ${HPX_INCLUDE_DIRS}) + target_include_directories(kokkosalgorithms PUBLIC ${HPX_INCLUDE_DIRS}) + else() + message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") + endif() else() find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) endif() @@ -158,8 +170,16 @@ ELSE() ) foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if ("${lib}" STREQUAL "cuda") + if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) set(LIB_cuda "-lcuda") + elseif ("${lib}" STREQUAL "hpx") + find_package(HPX REQUIRED) + if(${HPX_FOUND}) + target_link_libraries(kokkos PUBLIC ${HPX_LIBRARIES}) + target_include_directories(kokkos PUBLIC ${HPX_INCLUDE_DIRS}) + else() + message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") + endif() else() find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) endif() diff --git a/lib/kokkos/cmake/kokkos_functions.cmake b/lib/kokkos/cmake/kokkos_functions.cmake index bc490115af..616618753b 100644 --- a/lib/kokkos/cmake/kokkos_functions.cmake +++ b/lib/kokkos/cmake/kokkos_functions.cmake @@ -95,7 +95,7 @@ function(set_kokkos_cxx_compiler) message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") endif() elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang.") + message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang, but compiler ID was ${INTERNAL_CXX_COMPILER_ID}") endif() endif() diff --git a/lib/kokkos/cmake/kokkos_options.cmake b/lib/kokkos/cmake/kokkos_options.cmake index be494e5df0..e730a94664 100644 --- a/lib/kokkos/cmake/kokkos_options.cmake +++ b/lib/kokkos/cmake/kokkos_options.cmake @@ -14,6 +14,7 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST OpenMP Pthread Qthread + HPX Cuda ROCm HWLOC @@ -23,6 +24,7 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST Cuda_Relocatable_Device_Code Cuda_UVM Cuda_LDG_Intrinsic + HPX_ASYNC_DISPATCH Debug Debug_DualView_Modify_Check Debug_Bounds_Check @@ -116,6 +118,7 @@ list(APPEND KOKKOS_DEVICES_LIST OpenMP # OpenMP Pthread # pthread Qthreads # qthreads + HPX # HPX Serial # serial ROCm # Relocatable device code ) @@ -173,6 +176,19 @@ set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc) set(KOKKOS_INTERNAL_LAMBDA enable_lambda) +#------------------------------------------------------------------------------- +# List of possible Options for HPX +#------------------------------------------------------------------------------- +# From Makefile.kokkos: Options: enable_async_dispatch +set(KOKKOS_HPX_OPTIONS_LIST) +list(APPEND KOKKOS_HPX_OPTIONS_LIST + ASYNC_DISPATCH # enable_async_dispatch + ) + +# Map of cmake variables to Makefile variables +set(KOKKOS_INTERNAL_ENABLE_ASYNC_DISPATCH enable_async_dispatch) + + #------------------------------------------------------------------------------- #------------------------------- Create doc strings ---------------------------- #------------------------------------------------------------------------------- @@ -202,6 +218,11 @@ set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkosc # Qthreads options. set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") +# HPX options. +set(KOKKOS_HPX_DIR "" CACHE PATH "Location of HPX library.") + +# Whether to build separate libraries or now +set(KOKKOS_SEPARATE_TESTS OFF CACHE BOOL "Provide unit test targets with finer granularity.") #------------------------------------------------------------------------------- #------------------------------- KOKKOS_DEVICES -------------------------------- @@ -215,6 +236,11 @@ IF(Trilinos_ENABLE_Kokkos) ELSE() set_kokkos_default_default(QTHREADS OFF) ENDIF() + IF(TPL_ENABLE_HPX) + set_kokkos_default_default(HPX ON) + ELSE() + set_kokkos_default_default(HPX OFF) + ENDIF() IF(Trilinos_ENABLE_OpenMP) set_kokkos_default_default(OPENMP ${Trilinos_ENABLE_OpenMP}) ELSE() @@ -231,6 +257,7 @@ ELSE() set_kokkos_default_default(OPENMP OFF) set_kokkos_default_default(PTHREAD OFF) set_kokkos_default_default(QTHREAD OFF) + set_kokkos_default_default(HPX OFF) set_kokkos_default_default(CUDA OFF) set_kokkos_default_default(ROCM OFF) ENDIF() @@ -241,6 +268,7 @@ set(KOKKOS_ENABLE_SERIAL ${KOKKOS_INTERNAL_ENABLE_SERIAL_DEFAULT} CACHE BOOL "Wh set(KOKKOS_ENABLE_OPENMP ${KOKKOS_INTERNAL_ENABLE_OPENMP_DEFAULT} CACHE BOOL "Enable OpenMP support in Kokkos." FORCE) set(KOKKOS_ENABLE_PTHREAD ${KOKKOS_INTERNAL_ENABLE_PTHREAD_DEFAULT} CACHE BOOL "Enable Pthread support in Kokkos.") set(KOKKOS_ENABLE_QTHREADS ${KOKKOS_INTERNAL_ENABLE_QTHREADS_DEFAULT} CACHE BOOL "Enable Qthreads support in Kokkos.") +set(KOKKOS_ENABLE_HPX ${KOKKOS_INTERNAL_ENABLE_HPX_DEFAULT} CACHE BOOL "Enable HPX support in Kokkos.") set(KOKKOS_ENABLE_CUDA ${KOKKOS_INTERNAL_ENABLE_CUDA_DEFAULT} CACHE BOOL "Enable CUDA support in Kokkos.") set(KOKKOS_ENABLE_ROCM ${KOKKOS_INTERNAL_ENABLE_ROCM_DEFAULT} CACHE BOOL "Enable ROCm support in Kokkos.") @@ -343,6 +371,18 @@ set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ${KOKKOS_INTERNAL_ENABLE_CUDA_REL set(KOKKOS_ENABLE_CUDA_LAMBDA ${KOKKOS_INTERNAL_ENABLE_CUDA_LAMBDA_DEFAULT} CACHE BOOL "Enable lambdas for CUDA. (cuda option)") +#------------------------------------------------------------------------------- +#------------------------------- KOKKOS_HPX_OPTIONS ---------------------------- +#------------------------------------------------------------------------------- + +# HPX options. +# Set Defaults +set_kokkos_default_default(HPX_ASYNC_DISPATCH OFF) + +# Set actual options +set(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH ${KOKKOS_INTERNAL_ENABLE_HPX_ASYNC_DISPATCH_DEFAULT} CACHE BOOL "Enable HPX async dispatch.") + + #------------------------------------------------------------------------------- #----------------------- HOST ARCH AND LEGACY TRIBITS -------------------------- #------------------------------------------------------------------------------- @@ -376,4 +416,3 @@ foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}} CACHE BOOL "CamelCase Compatibility setting for KOKKOS_ENABLE_${OPT}") ENDIF() endforeach() - diff --git a/lib/kokkos/cmake/kokkos_settings.cmake b/lib/kokkos/cmake/kokkos_settings.cmake index 387ced6d52..2c622d0de9 100644 --- a/lib/kokkos/cmake/kokkos_settings.cmake +++ b/lib/kokkos/cmake/kokkos_settings.cmake @@ -198,6 +198,8 @@ if(KOKKOS_CMAKE_VERBOSE) message(STATUS " Host Parallel: Pthread") elseif(KOKKOS_ENABLE_QTHREADS) message(STATUS " Host Parallel: Qthreads") + elseif(KOKKOS_ENABLE_HPX) + message(STATUS " Host Parallel: HPX") else() message(STATUS " Host Parallel: None") endif() @@ -244,6 +246,10 @@ if(KOKKOS_CMAKE_VERBOSE) message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}") endif() + if(KOKKOS_HPX_DIR) + message(STATUS " KOKKOS_HPX_DIR: ${KOKKOS_HPX_DIR}") + endif() + message(STATUS "") message(STATUS "Final kokkos settings variable:") message(STATUS " ${KOKKOS_SETTINGS}") diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake index f8eebc29f8..1f467f0662 100644 --- a/lib/kokkos/cmake/tribits.cmake +++ b/lib/kokkos/cmake/tribits.cmake @@ -9,6 +9,10 @@ IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) ENDIF() +IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_HPX) + SET(${PROJECT_NAME}_ENABLE_HPX OFF) +ENDIF() + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG) SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) ENDIF() @@ -309,6 +313,10 @@ ENDFUNCTION() FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE) ENDFUNCTION() +FUNCTION(TRIBITS_ADD_ADVANCED_TEST) + # TODO Write this +ENDFUNCTION() + FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME) SET(options STANDARD_PASS_OUTPUT WILL_FAIL) diff --git a/lib/kokkos/containers/cmake/Dependencies.cmake b/lib/kokkos/containers/cmake/Dependencies.cmake index 1d71d8af34..5e29157369 100644 --- a/lib/kokkos/containers/cmake/Dependencies.cmake +++ b/lib/kokkos/containers/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( LIB_REQUIRED_PACKAGES KokkosCore - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC HPX TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt index 1203a8bd81..3c6584bc34 100644 --- a/lib/kokkos/containers/performance_tests/CMakeLists.txt +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -24,6 +24,10 @@ IF(Kokkos_ENABLE_OpenMP) LIST( APPEND SOURCES TestOpenMP.cpp) ENDIF() +IF(Kokkos_ENABLE_HPX) + LIST( APPEND SOURCES TestHPX.cpp) +ENDIF() + # Per #374, we always want to build this test, but we only want to run # it as a PERFORMANCE test. That's why we separate building the test # from running the test. diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile index ebed75ccd6..f309a220d0 100644 --- a/lib/kokkos/containers/performance_tests/Makefile +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -49,6 +49,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) TEST_TARGETS += test-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + OBJ_HPX = TestHPX.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_HPX + TEST_TARGETS += test-hpx +endif + KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda @@ -61,6 +67,9 @@ KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP +KokkosContainers_PerformanceTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_HPX + test-cuda: KokkosContainers_PerformanceTest_Cuda ./KokkosContainers_PerformanceTest_Cuda @@ -73,6 +82,9 @@ test-threads: KokkosContainers_PerformanceTest_Threads test-openmp: KokkosContainers_PerformanceTest_OpenMP ./KokkosContainers_PerformanceTest_OpenMP +test-hpx: KokkosContainers_PerformanceTest_HPX + ./KokkosContainers_PerformanceTest_HPX + build_all: $(TARGETS) test: $(TEST_TARGETS) diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp index 0d2fae32a3..db6274e057 100644 --- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -197,7 +197,7 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::RangePolicy policy(0,par_size); Kokkos::parallel_for( policy , FunctorType(testview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_view = timer.seconds(); std::cout << " View time (init only): " << elapsed_time_view << std::endl; @@ -205,7 +205,7 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::View sumview("sumview",par_size); Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testview, sumview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_compview = timer.seconds(); std::cout << " View sum computation time: " << elapsed_time_view << std::endl; @@ -215,7 +215,7 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::parallel_for( policy , FunctorStrideType(teststrideview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_strideview = timer.seconds(); std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl; } @@ -226,7 +226,7 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::RangePolicy policy(0,par_size); Kokkos::parallel_for( policy , FunctorType(testview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_view_rank7 = timer.seconds(); std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl; } @@ -237,14 +237,14 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::RangePolicy policy(0,par_size); Kokkos::parallel_for( policy , FunctorType(testdrview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_drview = timer.seconds(); std::cout << " DynRankView time (init only): " << elapsed_time_drview << std::endl; timer.reset(); Kokkos::DynRankView sumview("sumview",par_size); Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testdrview, sumview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_compdrview = timer.seconds(); std::cout << " DynRankView sum computation time: " << elapsed_time_compdrview << std::endl; diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp index dcaca776be..98997b3239 100644 --- a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp +++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -192,7 +192,7 @@ void test_global_to_local_ids(unsigned num_ids) { generate_ids gen(local_2_global); } - Device::fence(); + Device().fence(); // generate elasped_time = timer.seconds(); std::cout << elasped_time << ", "; @@ -201,7 +201,7 @@ void test_global_to_local_ids(unsigned num_ids) { fill_map fill(global_2_local, local_2_global); } - Device::fence(); + Device().fence(); // fill elasped_time = timer.seconds(); @@ -214,7 +214,7 @@ void test_global_to_local_ids(unsigned num_ids) { find_test find(global_2_local, local_2_global,num_errors); } - Device::fence(); + Device().fence(); // find elasped_time = timer.seconds(); diff --git a/lib/kokkos/containers/performance_tests/TestHPX.cpp b/lib/kokkos/containers/performance_tests/TestHPX.cpp new file mode 100644 index 0000000000..0f43377cee --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestHPX.cpp @@ -0,0 +1,130 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#if defined( KOKKOS_ENABLE_HPX ) + +#include + +#include + +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include + + +namespace Performance { + +class hpx : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + Kokkos::initialize(); + Kokkos::print_configuration( std::cout ); + } + + static void TearDownTestCase() + { + Kokkos::finalize(); + } +}; + +TEST_F( hpx, dynrankview_perf ) +{ + std::cout << "HPX" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf( 8192 ); +} + +TEST_F( hpx, global_2_local) +{ + std::cout << "HPX" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids(i); +} + +TEST_F( hpx, unordered_map_performance_near) +{ + unsigned num_hpx = 4; + std::ostringstream base_file_name; + base_file_name << "hpx-" << num_hpx << "-near"; + Perf::run_performance_tests(base_file_name.str()); +} + +TEST_F( hpx, unordered_map_performance_far) +{ + unsigned num_hpx = 4; + std::ostringstream base_file_name; + base_file_name << "hpx-" << num_hpx << "-far"; + Perf::run_performance_tests(base_file_name.str()); +} + +TEST_F( hpx, scatter_view) +{ + std::cout << "ScatterView data-duplicated test:\n"; + Perf::test_scatter_view(10, 1000 * 1000); +//std::cout << "ScatterView atomics test:\n"; +//Perf::test_scatter_view(10, 1000 * 1000); +} + +} // namespace test +#else +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTHPX_PREVENT_EMPTY_LINK_ERROR() {} +#endif + diff --git a/lib/kokkos/containers/performance_tests/TestScatterView.hpp b/lib/kokkos/containers/performance_tests/TestScatterView.hpp index 03129d2b09..bd9121bb82 100644 --- a/lib/kokkos/containers/performance_tests/TestScatterView.hpp +++ b/lib/kokkos/containers/performance_tests/TestScatterView.hpp @@ -83,6 +83,7 @@ void test_scatter_view(int m, int n) for (int k = 0; k < m; ++k) { Kokkos::parallel_for(policy, f2, "hand_coded_duplicate_scatter_view_test"); } + Kokkos::fence(); auto t = timer.seconds(); std::cout << "hand-coded test took " << t << " seconds\n"; } @@ -101,6 +102,7 @@ void test_scatter_view(int m, int n) for (int k = 0; k < m; ++k) { Kokkos::parallel_for(policy, f, "scatter_view_test"); } + Kokkos::fence(); auto t = timer.seconds(); std::cout << "test took " << t << " seconds\n"; } diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp index e8734b259d..8d09281ed3 100644 --- a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp +++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -108,7 +108,7 @@ struct UnorderedMapTest std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush; histogram.calculate(); - Device::fence(); + Device().fence(); } void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out) @@ -236,7 +236,7 @@ void run_performance_tests(std::string const & base_file_name) uint32_t inserts = static_cast(test_ratios[j]*(capacity)); std::cout << capacity << std::flush; UnorderedMapTest test(capacity, inserts*collisions[i], collisions[i]); - Device::fence(); + Device().fence(); test.print(metrics_out, length_out, distance_out, block_distance_out); } std::cout << "\b\b " << std::endl; diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp index bfe8080f3b..4d78430fc6 100644 --- a/lib/kokkos/containers/src/Kokkos_Bitset.hpp +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -107,22 +107,20 @@ public: } } - /// assignment - Bitset & operator = (Bitset const & rhs) - { - this->m_size = rhs.m_size; - this->m_last_block_mask = rhs.m_last_block_mask; - this->m_blocks = rhs.m_blocks; + KOKKOS_INLINE_FUNCTION + Bitset (const Bitset&) = default; - return *this; - } + KOKKOS_INLINE_FUNCTION + Bitset& operator= (const Bitset&) = default; - /// copy constructor - Bitset( Bitset const & rhs) - : m_size( rhs.m_size ) - , m_last_block_mask( rhs.m_last_block_mask ) - , m_blocks( rhs.m_blocks ) - {} + KOKKOS_INLINE_FUNCTION + Bitset (Bitset&&) = default; + + KOKKOS_INLINE_FUNCTION + Bitset& operator= (Bitset&&) = default; + + KOKKOS_INLINE_FUNCTION + ~Bitset () = default; /// number of bits in the set /// can be call from the host or the device diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp index f6631a4149..d9b14d67a2 100644 --- a/lib/kokkos/containers/src/Kokkos_DualView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -484,8 +484,8 @@ public: } } if(std::is_same::value) { - t_dev::execution_space::fence(); - t_host::execution_space::fence(); + typename t_dev::execution_space().fence(); + typename t_host::execution_space().fence(); } } diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index 3f284e6a8d..d1e6704a57 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -75,7 +75,7 @@ struct DynRankDimTraits { , const size_t N4 , const size_t N5 , const size_t N6 - , const size_t N7 ) + , const size_t /* N7 */) { return ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified && N0 == unspecified) ? 0 @@ -106,7 +106,7 @@ struct DynRankDimTraits { // Extra overload to match that for specialize types v2 template KOKKOS_INLINE_FUNCTION - static size_t computeRank( const Kokkos::Impl::ViewCtorProp& prop, const Layout& layout ) + static size_t computeRank( const Kokkos::Impl::ViewCtorProp& /* prop */, const Layout& layout ) { return computeRank(layout); } @@ -155,7 +155,7 @@ struct DynRankDimTraits { // Extra overload to match that for specialize types template KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp& prop, const typename Traits::array_layout& layout ) + static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp& /* prop */, const typename Traits::array_layout& layout ) { return createLayout( layout ); } @@ -655,7 +655,7 @@ public: const size_t dim_scalar = m_map.dimension_scalar(); const size_t bytes = this->span() / dim_scalar; - typedef Kokkos::View > tmp_view_type; + typedef Kokkos::View > tmp_view_type; tmp_view_type rankone_view(this->data(), bytes, dim_scalar); return rankone_view(i0); } @@ -1060,7 +1060,7 @@ public: } // Copy the input allocation properties with possibly defaulted properties - alloc_prop prop( arg_prop ); + alloc_prop prop_copy( arg_prop ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) @@ -1070,18 +1070,18 @@ public: // Fence using the trait's executon space (which will be Kokkos::Cuda) // to avoid incomplete type errors from usng Kokkos::Cuda directly. if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ Kokkos::Impl::SharedAllocationRecord<> * - record = m_map.allocate_shared( prop , Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ); + record = m_map.allocate_shared( prop_copy, Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ @@ -1609,7 +1609,7 @@ struct DynRankViewFill { closure.execute(); - execution_space::fence(); + execution_space().fence(); } }; @@ -1650,6 +1650,7 @@ struct DynRankViewRemap { typedef Kokkos::RangePolicy< ExecSpace > Policy ; const Kokkos::Impl::ParallelFor< DynRankViewRemap , Policy > closure( *this , Policy( 0 , n0 ) ); closure.execute(); + // Kokkos::fence(); // ?? } KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp index ab782a82ad..37d56e7cfb 100644 --- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -288,8 +288,8 @@ public: >::type resize_serial( IntType const & n ) { - typedef typename traits::value_type value_type ; - typedef value_type * value_pointer_type ; + typedef typename traits::value_type local_value_type ; + typedef local_value_type * value_pointer_type ; const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; // New total number of chunks needed for resize @@ -304,8 +304,8 @@ public: if ( *pc < NC ) { while ( *pc < NC ) { m_chunks[*pc] = reinterpret_cast - ( - typename traits::memory_space().allocate( sizeof(value_type) << m_chunk_shift ) + ( + typename traits::memory_space().allocate( sizeof(local_value_type) << m_chunk_shift ) ); ++*pc ; } @@ -314,7 +314,7 @@ public: while ( NC + 1 <= *pc ) { --*pc ; typename traits::memory_space().deallocate( m_chunks[*pc] - , sizeof(value_type) << m_chunk_shift ); + , sizeof(local_value_type) << m_chunk_shift ); m_chunks[*pc] = 0 ; } } @@ -376,8 +376,8 @@ public: closure.execute(); - traits::execution_space::fence(); - //Impl::ChunkArraySpace< typename traits::memory_space >::memory_space::execution_space::fence(); + typename traits::execution_space().fence(); + //Impl::ChunkArraySpace< typename traits::memory_space >::memory_space::execution_space().fence(); } void construct_shared_allocation() diff --git a/lib/kokkos/containers/src/Kokkos_OffsetView.hpp b/lib/kokkos/containers/src/Kokkos_OffsetView.hpp index b614764ee7..4ce1f4d84f 100644 --- a/lib/kokkos/containers/src/Kokkos_OffsetView.hpp +++ b/lib/kokkos/containers/src/Kokkos_OffsetView.hpp @@ -202,8 +202,8 @@ namespace Kokkos { template ::value, iType>::type = 0> KOKKOS_INLINE_FUNCTION - int64_t begin(const iType dimension) const { - return dimension < Rank ? m_begins[dimension] : 0; + int64_t begin(const iType local_dimension) const { + return local_dimension < Rank ? m_begins[local_dimension] : 0; } KOKKOS_INLINE_FUNCTION @@ -211,7 +211,9 @@ namespace Kokkos { template ::value, iType>::type = 0> KOKKOS_INLINE_FUNCTION - int64_t end(const iType dimension) const {return begin(dimension) + m_map.extent(dimension);} + int64_t end(const iType local_dimension) const { + return begin(local_dimension) + m_map.extent(local_dimension); + } private: @@ -1068,7 +1070,7 @@ namespace Kokkos { } // Copy the input allocation properties with possibly defaulted properties - alloc_prop prop( arg_prop ); + alloc_prop prop_copy( arg_prop ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) @@ -1078,18 +1080,18 @@ namespace Kokkos { // Fence using the trait's executon space (which will be Kokkos::Cuda) // to avoid incomplete type errors from usng Kokkos::Cuda directly. if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ Kokkos::Impl::SharedAllocationRecord<> * - record = m_map.allocate_shared( prop , arg_layout ); + record = m_map.allocate_shared( prop_copy , arg_layout ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ diff --git a/lib/kokkos/containers/src/Kokkos_ScatterView.hpp b/lib/kokkos/containers/src/Kokkos_ScatterView.hpp index 8e56857887..a8c05e3f36 100644 --- a/lib/kokkos/containers/src/Kokkos_ScatterView.hpp +++ b/lib/kokkos/containers/src/Kokkos_ScatterView.hpp @@ -57,9 +57,16 @@ namespace Kokkos { namespace Experimental { -//TODO: replace this enum with the Kokkos::Sum, etc reducers for parallel_reduce +/* + * Reduction Type list + * - These corresponds to subset of the reducers in parallel_reduce + * - See Implementations of ScatterValue for details. + */ enum : int { ScatterSum, + ScatterProd, + ScatterMax, + ScatterMin, }; enum : int { @@ -114,6 +121,21 @@ struct DefaultContribution +struct DefaultDuplication { + enum : int { value = Kokkos::Experimental::ScatterDuplicated }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterAtomic }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; +}; +#endif + #ifdef KOKKOS_ENABLE_THREADS template <> struct DefaultDuplication { @@ -144,39 +166,277 @@ struct DefaultContribution is the object returned by the access operator() of ScatterAccess, + This class inherits from the Sum<> reducer and it wraps join(dest, src) with convenient operator+=, etc. + Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions + See ReduceDuplicates and ResetDuplicates ) */ template struct ScatterValue; template -struct ScatterValue { +struct ScatterValue : + Sum { public: - KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {} - KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : value( other.value ) {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Sum(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : + Sum(other.reference()) + {} KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { - value += rhs; + this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) { - value -= rhs; + this->join( this->reference(), -rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); } - private: - ValueType& value; }; +/* ScatterValue is the object returned by the access operator() + * of ScatterAccess, similar to that returned by an Atomic View, it wraps Kokkos::atomic_add with convenient + operator+=, etc. This version also has the update(rhs) and reset() functions. */ template -struct ScatterValue { +struct ScatterValue : + Sum { public: - KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Sum(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { - Kokkos::atomic_add(&value, rhs); + this->join(this->reference(), rhs); } KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) { - Kokkos::atomic_add(&value, -rhs); + this->join(this->reference(), -rhs); } - private: - ValueType& value; + + KOKKOS_INLINE_FUNCTION + void join(ValueType& dest, const ValueType& src) const { + Kokkos::atomic_add(&dest, src); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile ValueType& dest, const volatile ValueType& src) const { + Kokkos::atomic_add(&dest, src); + } + + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } +}; + +/* ScatterValue is the object returned by the access operator() of ScatterAccess, + This class inherits from the Prod<> reducer and it wraps join(dest, src) with convenient operator*=, etc. + Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions + See ReduceDuplicates and ResetDuplicates ) */ +template +struct ScatterValue : + Prod { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Prod(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : + Prod(other.reference()) + {} + KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) { + this->join( this->reference(), static_cast(1)/rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } +}; + +/* ScatterValue is the object returned by the access operator() + * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_prod with convenient + operator*=, etc. atomic_prod uses the atomic_compare_exchange. This version also has the update(rhs) and reset() functions. */ +template +struct ScatterValue : + Prod { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Prod(value_in) + {} + + KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { + this->join(this->reference(), rhs); + } + KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) { + this->join(this->reference(), static_cast(1)/rhs); + } + + KOKKOS_FORCEINLINE_FUNCTION + void atomic_prod(ValueType & dest, const ValueType& src) const { + + bool success = false; + while(!success) { + ValueType dest_old = dest; + ValueType dest_new = dest_old * src; + dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); + success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); + } + } + + KOKKOS_INLINE_FUNCTION + void join(ValueType& dest, const ValueType& src) const { + atomic_prod(dest, src); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile ValueType& dest, const volatile ValueType& src) const { + atomic_prod(dest, src); + } + + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } + +}; + +/* ScatterValue is the object returned by the access operator() of ScatterAccess, + This class inherits from the Min<> reducer and it wraps join(dest, src) with convenient update(rhs). + Note the addition of update(ValueType const& rhs) and reset() are so that all reducers can have a common update function + See ReduceDuplicates and ResetDuplicates ) */ +template +struct ScatterValue : + Min { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Min(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : + Min(other.reference()) + {} + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } +}; + +/* ScatterValue is the object returned by the access operator() + * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_min with the update(rhs) + function. atomic_min uses the atomic_compare_exchange. This version also has the reset() function */ +template +struct ScatterValue : + Min { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Min(value_in) + {} + + KOKKOS_FORCEINLINE_FUNCTION + void atomic_min(ValueType & dest, const ValueType& src) const { + + bool success = false; + while(!success) { + ValueType dest_old = dest; + ValueType dest_new = ( dest_old > src ) ? src : dest_old; + dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); + success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); + } + } + + KOKKOS_INLINE_FUNCTION + void join(ValueType& dest, const ValueType& src) const { + atomic_min(dest, src); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile ValueType& dest, const volatile ValueType& src) const { + atomic_min(dest, src); + } + + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } + +}; + +/* ScatterValue is the object returned by the access operataor() of ScatterAccess, + This class inherits from the Max<> reducer and it wraps join(dest, src) with convenient update(rhs). + Note the addition of update(ValueType const& rhs) and reset() are so that all reducers can have a common update function + See ReduceDuplicates and ResetDuplicates ) */ +template +struct ScatterValue : + Max { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Max(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : + Max(other.reference()) + {} + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } +}; + +/* ScatterValue is the object returned by the access operator() + * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_max with the update(rhs) + function. atomic_max uses the atomic_compare_exchange. This version also has the reset() function */ +template +struct ScatterValue : + Max { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Max(value_in) + {} + + KOKKOS_FORCEINLINE_FUNCTION + void atomic_max(ValueType & dest, const ValueType& src) const { + + bool success = false; + while(!success) { + ValueType dest_old = dest; + ValueType dest_new = ( dest_old < src ) ? src : dest_old; + dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); + success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); + } + } + + KOKKOS_INLINE_FUNCTION + void join(ValueType& dest, const ValueType& src) const { + atomic_max(dest, src); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile ValueType& dest, const volatile ValueType& src) const { + atomic_max(dest, src); + } + + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } + }; /* DuplicatedDataType, given a View DataType, will create a new DataType @@ -226,6 +486,18 @@ struct DuplicatedDataType { typedef typename DuplicatedDataType::value_type* value_type; }; +/* Insert integer argument pack into array */ + +template +void args_to_array(size_t* array, int pos, T dim0) { + array[pos] = dim0; +} +template +void args_to_array(size_t* array, int pos, T dim0, Dims ... dims) { + array[pos] = dim0; + args_to_array(array,pos+1,dims...); +} + /* Slice is just responsible for stuffing the correct number of Kokkos::ALL arguments on the correct side of the index in a call to subview() to get a subview where the index specified is the largest-stride one. */ @@ -304,21 +576,26 @@ struct ReduceDuplicatesBase { } }; -template -struct ReduceDuplicates : - public ReduceDuplicatesBase +/* ReduceDuplicates -- Perform reduction on destination array using strided source + * Use ScatterValue<> specific to operation to wrap destination array so that + * the reduction operation can be accessed via the update(rhs) function */ +template +struct ReduceDuplicates : + public ReduceDuplicatesBase { - typedef ReduceDuplicatesBase Base; + typedef ReduceDuplicatesBase Base; ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name): Base(src_in, dst_in, stride_in, start_in, n_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { for (size_t j = Base::start; j < Base::n; ++j) { - Base::dst[i] += Base::src[i + Base::stride * j]; + ScatterValue sv(Base::dst[i]); + sv.update( Base::src[i + Base::stride * j] ); } } }; + template struct ResetDuplicates; @@ -347,19 +624,24 @@ struct ResetDuplicatesBase { } }; -template -struct ResetDuplicates : - public ResetDuplicatesBase +/* ResetDuplicates -- Perform reset on destination array + * Use ScatterValue<> specific to operation to wrap destination array so that + * the reset operation can be accessed via the reset() function */ +template +struct ResetDuplicates : + public ResetDuplicatesBase { - typedef ResetDuplicatesBase Base; + typedef ResetDuplicatesBase Base; ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name): Base(data_in, size_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { - Base::data[i] = Kokkos::reduction_identity::sum(); + ScatterValue sv(Base::data[i]); + sv.reset(); } }; + }}} // Kokkos::Impl::Experimental namespace Kokkos { @@ -519,12 +801,22 @@ public: typedef Kokkos::Impl::Experimental::ScatterValue< original_value_type, Op, override_contribution> value_type; + KOKKOS_INLINE_FUNCTION + ScatterAccess() : + view(view_type()) { + } + KOKKOS_INLINE_FUNCTION ScatterAccess(view_type const& view_in) : view(view_in) { } + KOKKOS_INLINE_FUNCTION + ~ScatterAccess() + { + } + template KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args ... args) const { @@ -608,7 +900,7 @@ public: } template - inline + KOKKOS_FORCEINLINE_FUNCTION ScatterAccess access() const { return ScatterAccess{*this}; @@ -729,14 +1021,14 @@ public: : unique_token() { size_t arg_N[8] = { - original_view.extent(0), - original_view.extent(1), - original_view.extent(2), - original_view.extent(3), - original_view.extent(4), - original_view.extent(5), - original_view.extent(6), - 0 + original_view.rank>0?original_view.extent(0):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>1?original_view.extent(1):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>2?original_view.extent(2):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>3?original_view.extent(3):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>4?original_view.extent(4):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>5?original_view.extent(5):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>6?original_view.extent(6):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + KOKKOS_IMPL_CTOR_DEFAULT_ARG }; arg_N[internal_view_type::rank - 1] = unique_token.size(); internal_view = internal_view_type( @@ -748,14 +1040,28 @@ public: } template - ScatterView(std::string const& name, Dims ... dims) - : internal_view(Kokkos::ViewAllocateWithoutInitializing(name), dims ..., unique_token.size()) - { + ScatterView(std::string const& name, Dims ... dims) { + original_view_type original_view; + size_t arg_N[8] = { + original_view.rank>0?original_view.static_extent(0):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>1?original_view.static_extent(1):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>2?original_view.static_extent(2):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>3?original_view.static_extent(3):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>4?original_view.static_extent(4):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>5?original_view.static_extent(5):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>6?original_view.static_extent(6):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + KOKKOS_IMPL_CTOR_DEFAULT_ARG + }; + Kokkos::Impl::Experimental::args_to_array(arg_N,0,dims ...); + arg_N[internal_view_type::rank - 1] = unique_token.size(); + internal_view = internal_view_type(Kokkos::ViewAllocateWithoutInitializing(name), + arg_N[0], arg_N[1], arg_N[2], arg_N[3], + arg_N[4], arg_N[5], arg_N[6], arg_N[7]); reset(); } template - inline + KOKKOS_FORCEINLINE_FUNCTION ScatterAccess access() const { return ScatterAccess{*this}; @@ -770,9 +1076,13 @@ public: } template - void contribute_into(View const& dest) const + void contribute_into(View const& dest) const { - typedef View dest_type; + typedef View dest_type; + static_assert(std::is_same< + typename dest_type::value_type, + typename original_view_type::non_const_value_type>::value, + "ScatterView deep_copy destination has wrong value_type"); static_assert(std::is_same< typename dest_type::array_layout, Kokkos::LayoutLeft>::value, @@ -891,12 +1201,14 @@ public: typedef Kokkos::Impl::Experimental::ScatterValue< original_value_type, Op, override_contribution> value_type; - inline ScatterAccess(view_type const& view_in) + KOKKOS_FORCEINLINE_FUNCTION + ScatterAccess(view_type const& view_in) : view(view_in) , thread_id(view_in.unique_token.acquire()) { } - inline ~ScatterAccess() { + KOKKOS_FORCEINLINE_FUNCTION + ~ScatterAccess() { if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id); } @@ -926,8 +1238,9 @@ private: public: // do need to allow moves though, for the common // auto b = a.access(); - // that assignments turns into a move constructor call - inline ScatterAccess(ScatterAccess&& other) + // that assignments turns into a move constructor call + KOKKOS_FORCEINLINE_FUNCTION + ScatterAccess(ScatterAccess&& other) : view(other.view) , thread_id(other.thread_id) { diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp index 64601e6b59..aed723288f 100644 --- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -437,9 +437,9 @@ public: { bool result = !erasable(); if (is_insertable_map && result) { - execution_space::fence(); + execution_space().fence(); set_flag(erasable_idx); - execution_space::fence(); + execution_space().fence(); } return result; } @@ -448,10 +448,10 @@ public: { bool result = erasable(); if (is_insertable_map && result) { - execution_space::fence(); + execution_space().fence(); Impl::UnorderedMapErase f(*this); f.apply(); - execution_space::fence(); + execution_space().fence(); reset_flag(erasable_idx); } return result; diff --git a/lib/kokkos/containers/src/Kokkos_Vector.hpp b/lib/kokkos/containers/src/Kokkos_Vector.hpp index 76c515941e..9b151d9505 100644 --- a/lib/kokkos/containers/src/Kokkos_Vector.hpp +++ b/lib/kokkos/containers/src/Kokkos_Vector.hpp @@ -121,12 +121,12 @@ public: if( DV::template need_sync() ) { set_functor_host f(DV::h_view,val); parallel_for(n,f); - DV::t_host::execution_space::fence(); + typename DV::t_host::execution_space().fence(); DV::template modify(); } else { set_functor f(DV::d_view,val); parallel_for(n,f); - DV::t_dev::execution_space::fence(); + typename DV::t_dev::execution_space().fence(); DV::template modify(); } } diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt index 0f94afec8c..8564bd9ddd 100644 --- a/lib/kokkos/containers/unit_tests/CMakeLists.txt +++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt @@ -86,6 +86,31 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( ) ENDIF() +IF(Kokkos_ENABLE_HPX) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_HPX + SOURCES + UnitTestMain.cpp + hpx/TestHPX_BitSet.cpp + hpx/TestHPX_DualView.cpp + hpx/TestHPX_DynamicView.cpp + hpx/TestHPX_DynRankViewAPI_generic.cpp + hpx/TestHPX_DynRankViewAPI_rank12345.cpp + hpx/TestHPX_DynRankViewAPI_rank67.cpp + hpx/TestHPX_ErrorReporter.cpp + hpx/TestHPX_OffsetView.cpp + hpx/TestHPX_ScatterView.cpp + hpx/TestHPX_StaticCrsGraph.cpp + hpx/TestHPX_UnorderedMap.cpp + hpx/TestHPX_Vector.cpp + hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) +ENDIF() + IF(Kokkos_ENABLE_Cuda) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Cuda diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile index c0e5d2820c..a7e0233f8a 100644 --- a/lib/kokkos/containers/unit_tests/Makefile +++ b/lib/kokkos/containers/unit_tests/Makefile @@ -4,6 +4,7 @@ GTEST_PATH = ../../TPL/gtest vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/openmp +vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/hpx vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/serial vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/threads vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/rocm @@ -106,6 +107,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) TEST_TARGETS += test-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + OBJ_HPX = UnitTestMain.o gtest-all.o + OBJ_HPX += TestHPX_BitSet.o + OBJ_HPX += TestHPX_DualView.o + OBJ_HPX += TestHPX_DynamicView.o + OBJ_HPX += TestHPX_DynRankViewAPI_generic.o + OBJ_HPX += TestHPX_DynRankViewAPI_rank12345.o + OBJ_HPX += TestHPX_DynRankViewAPI_rank67.o + OBJ_HPX += TestHPX_ErrorReporter.o + OBJ_HPX += TestHPX_OffsetView.o + OBJ_HPX += TestHPX_ScatterView.o + OBJ_HPX += TestHPX_StaticCrsGraph.o + OBJ_HPX += TestHPX_UnorderedMap.o + OBJ_HPX += TestHPX_Vector.o + OBJ_HPX += TestHPX_ViewCtorPropEmbeddedDim.o + TARGETS += KokkosContainers_UnitTest_HPX + TEST_TARGETS += test-hpx +endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL = UnitTestMain.o gtest-all.o OBJ_SERIAL += TestSerial_BitSet.o @@ -137,6 +157,9 @@ KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_OpenMP +KokkosContainers_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_HPX + KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Serial @@ -152,6 +175,9 @@ test-threads: KokkosContainers_UnitTest_Threads test-openmp: KokkosContainers_UnitTest_OpenMP ./KokkosContainers_UnitTest_OpenMP +test-hpx: KokkosContainers_UnitTest_HPX + ./KokkosContainers_UnitTest_HPX + test-serial: KokkosContainers_UnitTest_Serial ./KokkosContainers_UnitTest_Serial diff --git a/lib/kokkos/containers/unit_tests/TestBitset.hpp b/lib/kokkos/containers/unit_tests/TestBitset.hpp index 6200124644..371c0288b1 100644 --- a/lib/kokkos/containers/unit_tests/TestBitset.hpp +++ b/lib/kokkos/containers/unit_tests/TestBitset.hpp @@ -66,7 +66,7 @@ struct TestBitset unsigned testit(unsigned collisions) { - execution_space::fence(); + execution_space().fence(); unsigned count = 0; Kokkos::parallel_reduce( m_bitset.size()*collisions, *this, count); @@ -114,7 +114,7 @@ struct TestBitsetTest unsigned testit() { - execution_space::fence(); + execution_space().fence(); unsigned count = 0; Kokkos::parallel_reduce( m_bitset.size(), *this, count); @@ -151,7 +151,7 @@ struct TestBitsetAny unsigned testit() { - execution_space::fence(); + execution_space().fence(); unsigned count = 0; Kokkos::parallel_reduce( m_bitset.size(), *this, count); diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp index 6684a55452..13e56c9f8d 100644 --- a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp +++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp @@ -1276,6 +1276,7 @@ public: Kokkos::deep_copy( dx , hx ); Kokkos::deep_copy( dy , dx ); Kokkos::deep_copy( hy , dy ); + Kokkos::fence(); for ( size_t ip = 0 ; ip < N0 ; ++ip ) { for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { @@ -1286,6 +1287,7 @@ public: Kokkos::deep_copy( dx , T(0) ); Kokkos::deep_copy( hx , dx ); + Kokkos::fence(); for ( size_t ip = 0 ; ip < N0 ; ++ip ) { for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { diff --git a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp index ecb7542232..7e48089b43 100644 --- a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp +++ b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp @@ -162,6 +162,7 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase void execute(int reporter_capacity, int test_size) { Kokkos::parallel_for(Kokkos::RangePolicy(0,test_size), *this); + Kokkos::fence(); driver_base::check_expectations(reporter_capacity, test_size); } @@ -194,6 +195,7 @@ struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase driver_base::m_errorReporter.add_report(work_idx, report); } }); + Kokkos::fence(); driver_base::check_expectations(reporter_capacity, test_size); } diff --git a/lib/kokkos/containers/unit_tests/TestScatterView.hpp b/lib/kokkos/containers/unit_tests/TestScatterView.hpp index d402a91b9f..a9d97b32f3 100644 --- a/lib/kokkos/containers/unit_tests/TestScatterView.hpp +++ b/lib/kokkos/containers/unit_tests/TestScatterView.hpp @@ -48,79 +48,387 @@ namespace Test { +template +struct test_scatter_view_impl_cls; + template -void test_scatter_view_config(int n) +struct test_scatter_view_impl_cls { - Kokkos::View original_view("original_view", n); - { - auto scatter_view = Kokkos::Experimental::create_scatter_view - < Kokkos::Experimental::ScatterSum - , duplication - , contribution - > (original_view); -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) - auto policy = Kokkos::RangePolicy(0, n); - auto f = KOKKOS_LAMBDA(int i) { +public: + + typedef Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterSum + , duplication + , contribution + > scatter_view_type; + + typedef Kokkos::View orig_view_type; + + + scatter_view_type scatter_view; + int scatterSize; + + test_scatter_view_impl_cls(const scatter_view_type& view){ + scatter_view = view; + scatterSize = 0; + } + + void initialize(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + host_view(i, 0) = 0.0; + host_view(i, 1) = 0.0; + host_view(i, 2) = 0.0; + } + Kokkos::fence(); + Kokkos::deep_copy(orig, host_view); + } + + void run_parallel(int n) { + scatterSize = n; + auto policy = Kokkos::RangePolicy(0, n); + Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum"); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { auto scatter_access = scatter_view.access(); auto scatter_access_atomic = scatter_view.template access(); for (int j = 0; j < 10; ++j) { - auto k = (i + j) % n; + auto k = (i + j) % scatterSize; scatter_access(k, 0) += 4.2; scatter_access_atomic(k, 1) += 2.0; scatter_access(k, 2) += 1.0; } - }; - Kokkos::parallel_for(policy, f, "scatter_view_test"); -#endif - Kokkos::Experimental::contribute(original_view, scatter_view); - scatter_view.reset_except(original_view); -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) - Kokkos::parallel_for(policy, f, "scatter_view_test"); -#endif - Kokkos::Experimental::contribute(original_view, scatter_view); - } -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) - Kokkos::fence(); - auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), original_view); - Kokkos::fence(); - for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { - auto val0 = host_view(i, 0); - auto val1 = host_view(i, 1); - auto val2 = host_view(i, 2); - EXPECT_TRUE(std::fabs((val0 - 84.0) / 84.0) < 1e-15); - EXPECT_TRUE(std::fabs((val1 - 40.0) / 40.0) < 1e-15); - EXPECT_TRUE(std::fabs((val2 - 20.0) / 20.0) < 1e-15); - } -#endif - { - Kokkos::Experimental::ScatterView - < double*[3] - , Layout - , ExecSpace - , Kokkos::Experimental::ScatterSum - , duplication - , contribution - > - persistent_view("persistent", n); - auto result_view = persistent_view.subview(); - contribute(result_view, persistent_view); - } -} + } -template + void validateResults(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 84.0) / 84.0) < 1e-14); + EXPECT_TRUE(std::fabs((val1 - 40.0) / 40.0) < 1e-14); + EXPECT_TRUE(std::fabs((val2 - 20.0) / 20.0) < 1e-14); + } + } +}; + + +template +struct test_scatter_view_impl_cls +{ +public: + + typedef Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterProd + , duplication + , contribution + > scatter_view_type; + + typedef Kokkos::View orig_view_type; + + + scatter_view_type scatter_view; + int scatterSize; + + test_scatter_view_impl_cls(const scatter_view_type& view){ + scatter_view = view; + scatterSize = 0; + } + + void initialize(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + host_view(i, 0) = 1.0; + host_view(i, 1) = 1.0; + host_view(i, 2) = 1.0; + } + Kokkos::fence(); + Kokkos::deep_copy(orig, host_view); + } + + void run_parallel(int n) { + scatterSize = n; + auto policy = Kokkos::RangePolicy(0, n); + Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + auto scatter_access = scatter_view.access(); + auto scatter_access_atomic = scatter_view.template access(); + for (int j = 0; j < 4; ++j) { + auto k = (i + j) % scatterSize; + scatter_access(k, 0) *= 4.0; + scatter_access_atomic(k, 1) *= 2.0; + scatter_access(k, 2) *= 1.0; + } + } + + void validateResults(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14); + EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14); + EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14); + } + } +}; + + +template +struct test_scatter_view_impl_cls +{ +public: + + typedef Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterMin + , duplication + , contribution + > scatter_view_type; + + typedef Kokkos::View orig_view_type; + + + scatter_view_type scatter_view; + int scatterSize; + + test_scatter_view_impl_cls(const scatter_view_type& view){ + scatter_view = view; + scatterSize = 0; + } + + void initialize(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + host_view(i, 0) = 999999.0; + host_view(i, 1) = 999999.0; + host_view(i, 2) = 999999.0; + } + Kokkos::fence(); + Kokkos::deep_copy(orig, host_view); + } + + void run_parallel(int n) { + scatterSize = n; + auto policy = Kokkos::RangePolicy(0, n); + Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + auto scatter_access = scatter_view.access(); + auto scatter_access_atomic = scatter_view.template access(); + for (int j = 0; j < 4; ++j) { + auto k = (i + j) % scatterSize; + scatter_access(k, 0).update((double)(j+1)*4); + scatter_access_atomic(k, 1).update((double)(j+1)*2.0); + scatter_access(k, 2).update((double)(j+1)*1.0); + } + } + + void validateResults(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14); + EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14); + EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14); + } + } +}; + + +template +struct test_scatter_view_impl_cls +{ +public: + + typedef Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterMax + , duplication + , contribution + > scatter_view_type; + + typedef Kokkos::View orig_view_type; + + + scatter_view_type scatter_view; + int scatterSize; + + test_scatter_view_impl_cls(const scatter_view_type& view){ + scatter_view = view; + scatterSize = 0; + } + + void initialize(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + host_view(i, 0) = 0.0; + host_view(i, 1) = 0.0; + host_view(i, 2) = 0.0; + } + Kokkos::fence(); + Kokkos::deep_copy(orig, host_view); + } + + void run_parallel(int n) { + scatterSize = n; + auto policy = Kokkos::RangePolicy(0, n); + Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + auto scatter_access = scatter_view.access(); + auto scatter_access_atomic = scatter_view.template access(); + for (int j = 0; j < 4; ++j) { + auto k = (i + j) % scatterSize; + scatter_access(k, 0).update((double)(j+1)*4); + scatter_access_atomic(k, 1).update((double)(j+1)*2.0); + scatter_access(k, 2).update((double)(j+1)*1.0); + } + } + + void validateResults(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14); + EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14); + EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14); + } + } +}; + + + +template +struct test_scatter_view_config +{ + public: + typedef typename test_scatter_view_impl_cls::scatter_view_type scatter_view_def; + typedef typename test_scatter_view_impl_cls::orig_view_type orig_view_def; + + test_scatter_view_config() { + } + + void run_test(int n) + { + //Test creation via create_scatter_view + { + orig_view_def original_view("original_view", n); + scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view + < op + , duplication + , contribution + > (original_view); + + test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); + scatter_view_test_impl.initialize(original_view); + scatter_view_test_impl.run_parallel(n); + + Kokkos::Experimental::contribute(original_view, scatter_view); + scatter_view.reset_except(original_view); + + scatter_view_test_impl.run_parallel(n); + + Kokkos::Experimental::contribute(original_view, scatter_view); + Kokkos::fence(); + + scatter_view_test_impl.validateResults(original_view); + + { + scatter_view_def persistent_view("persistent", n); + auto result_view = persistent_view.subview(); + contribute(result_view, persistent_view); + Kokkos::fence(); + } + } + //Test creation via constructor + { + orig_view_def original_view("original_view", n); + scatter_view_def scatter_view(original_view); + + test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); + scatter_view_test_impl.initialize(original_view); + scatter_view_test_impl.run_parallel(n); + + Kokkos::Experimental::contribute(original_view, scatter_view); + scatter_view.reset_except(original_view); + + scatter_view_test_impl.run_parallel(n); + + Kokkos::Experimental::contribute(original_view, scatter_view); + Kokkos::fence(); + + scatter_view_test_impl.validateResults(original_view); + + { + scatter_view_def persistent_view("persistent", n); + auto result_view = persistent_view.subview(); + contribute(result_view, persistent_view); + Kokkos::fence(); + } + } + } + +}; + + +template struct TestDuplicatedScatterView { TestDuplicatedScatterView(int n) { + // ScatterSum test test_scatter_view_config(n); + Kokkos::Experimental::ScatterNonAtomic, + ScatterType> test_sv_right_config; + test_sv_right_config.run_test(n); + test_scatter_view_config test_sv_left_config; + test_sv_left_config.run_test(n); } }; #ifdef KOKKOS_ENABLE_CUDA // disable duplicated instantiation with CUDA until // UniqueToken can support it -template <> -struct TestDuplicatedScatterView { +template +struct TestDuplicatedScatterView { TestDuplicatedScatterView(int) { } }; @@ -129,14 +437,14 @@ struct TestDuplicatedScatterView { #ifdef KOKKOS_ENABLE_ROCM // disable duplicated instantiation with ROCm until // UniqueToken can support it -template <> -struct TestDuplicatedScatterView { +template +struct TestDuplicatedScatterView { TestDuplicatedScatterView(int) { } }; #endif -template +template void test_scatter_view(int n) { // all of these configurations should compile okay, but only some of them are @@ -149,29 +457,47 @@ void test_scatter_view(int n) if (unique_token.size() == 1) { test_scatter_view_config(n); + Kokkos::Experimental::ScatterNonAtomic, + ScatterType> test_sv_config; + test_sv_config.run_test(n); } #ifdef KOKKOS_ENABLE_SERIAL if (!std::is_same::value) { #endif test_scatter_view_config(n); + Kokkos::Experimental::ScatterAtomic, + ScatterType> test_sv_config; + test_sv_config.run_test(n); #ifdef KOKKOS_ENABLE_SERIAL } #endif - - TestDuplicatedScatterView duptest(n); + // with hundreds of threads we were running out of memory. + // limit (n) so that duplication doesn't exceed 8GB + constexpr std::size_t maximum_allowed_total_bytes = 8ull * 1024ull * 1024ull * 1024ull; + std::size_t const maximum_allowed_copy_bytes = maximum_allowed_total_bytes / std::size_t(unique_token.size()); + constexpr std::size_t bytes_per_value = sizeof(double) * 3; + std::size_t const maximum_allowed_copy_values = maximum_allowed_copy_bytes / bytes_per_value; + n = std::min(n, int(maximum_allowed_copy_values)); + TestDuplicatedScatterView duptest(n); } TEST_F( TEST_CATEGORY, scatterview) { #ifndef KOKKOS_ENABLE_ROCM - test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + // tests were timing out in DEBUG mode, reduce the amount of work #ifdef KOKKOS_ENABLE_DEBUG - test_scatter_view(100000); + int big_n = 100 * 1000; #else - test_scatter_view(10000000); + int big_n = 10 * 1000 * 1000; #endif + test_scatter_view(big_n); + test_scatter_view(big_n); + test_scatter_view(big_n); + test_scatter_view(big_n); #endif } diff --git a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp index 426db1dbf0..2d34267df3 100644 --- a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp +++ b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp @@ -69,7 +69,7 @@ struct TestInsert void testit( bool rehash_on_fail = true ) { - execution_space::fence(); + execution_space().fence(); uint32_t failed_count = 0; do { @@ -82,7 +82,7 @@ struct TestInsert } } while (rehash_on_fail && failed_count > 0u); - execution_space::fence(); + execution_space().fence(); } @@ -122,9 +122,9 @@ struct TestInsert void testit() { - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for(m_num_erase, *this); - execution_space::fence(); + execution_space().fence(); } KOKKOS_INLINE_FUNCTION @@ -161,9 +161,9 @@ struct TestInsert void testit(value_type &errors) { - execution_space::execution_space::fence(); + execution_space().fence(); Kokkos::parallel_reduce(m_map.capacity(), *this, errors); - execution_space::execution_space::fence(); + execution_space().fence(); } KOKKOS_INLINE_FUNCTION @@ -247,7 +247,7 @@ void test_failed_insert( uint32_t num_nodes) map_type map(num_nodes); Impl::TestInsert test_insert(map, 2u*num_nodes, 1u); test_insert.testit(false /*don't rehash on fail*/); - Device::execution_space::fence(); + typename Device::execution_space().fence(); EXPECT_TRUE( map.failed_insert() ); } diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_BitSet.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_BitSet.cpp new file mode 100644 index 0000000000..cec24e00c7 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_BitSet.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_Category.hpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_Category.hpp new file mode 100644 index 0000000000..358b42d1aa --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_Category.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_HPX_HPP +#define KOKKOS_TEST_HPX_HPP + +#include + +namespace Test { + +class hpx : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; + +} // namespace Test + +#define TEST_CATEGORY hpx +#define TEST_EXECSPACE Kokkos::Experimental::HPX + +#endif diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DualView.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DualView.cpp new file mode 100644 index 0000000000..80af9dc33a --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DualView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp new file mode 100644 index 0000000000..95d49c8acf --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp new file mode 100644 index 0000000000..72e0bc6616 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp new file mode 100644 index 0000000000..5a104f0de2 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynamicView.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynamicView.cpp new file mode 100644 index 0000000000..718b322684 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynamicView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp new file mode 100644 index 0000000000..ea819ae343 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_OffsetView.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_OffsetView.cpp new file mode 100644 index 0000000000..4d3684923f --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_OffsetView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_ScatterView.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ScatterView.cpp new file mode 100644 index 0000000000..6a871cc121 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ScatterView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp new file mode 100644 index 0000000000..fbb70a762b --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp new file mode 100644 index 0000000000..7e7aad309f --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_Vector.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_Vector.cpp new file mode 100644 index 0000000000..5fb3664197 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_Vector.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp new file mode 100644 index 0000000000..fb9c263c83 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/core/cmake/Dependencies.cmake b/lib/kokkos/core/cmake/Dependencies.cmake index 8d9872725e..9ad7660bdf 100644 --- a/lib/kokkos/core/cmake/Dependencies.cmake +++ b/lib/kokkos/core/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREADS DLlib + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREADS DLlib HPX TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt index d9c0f89413..d92462a357 100644 --- a/lib/kokkos/core/perf_test/CMakeLists.txt +++ b/lib/kokkos/core/perf_test/CMakeLists.txt @@ -47,6 +47,7 @@ TRIBITS_ADD_EXECUTABLE( PerformanceTest_TaskDAG SOURCES test_taskdag.cpp COMM serial mpi + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) TRIBITS_ADD_TEST( diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile index a0ce1e2c31..ca98ca6dde 100644 --- a/lib/kokkos/core/perf_test/Makefile +++ b/lib/kokkos/core/perf_test/Makefile @@ -30,6 +30,7 @@ TARGETS = # OBJ_PERF = PerfTestMain.o gtest-all.o +OBJ_PERF += PerfTest_ExecSpacePartitioning.o OBJ_PERF += PerfTestGramSchmidt.o OBJ_PERF += PerfTestHexGrad.o OBJ_PERF += PerfTest_CustomReduction.o diff --git a/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp b/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp index bb2fb5fce5..ff9bf5a91b 100644 --- a/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp +++ b/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp @@ -44,6 +44,8 @@ #ifndef KOKKOS_BLAS_KERNELS_HPP #define KOKKOS_BLAS_KERNELS_HPP +#include + namespace Kokkos { template< class ConstVectorType , @@ -123,15 +125,10 @@ struct Dot { typedef typename Device::execution_space execution_space ; - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< Type::Rank > >::type ok_rank ; + static_assert( static_cast(Type::Rank) == static_cast(1), + "Dot static_assert Fail: Rank != 1"); -/* typedef typename - Impl::StaticAssertSame< execution_space , - typename Type::execution_space >::type ok_device ;*/ - typedef double value_type ; #if 1 @@ -164,13 +161,8 @@ struct DotSingle { typedef typename Device::execution_space execution_space ; - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< Type::Rank > >::type ok_rank ; - -/* typedef typename - Impl::StaticAssertSame< execution_space , - typename Type::execution_space >::type ok_device ;*/ + static_assert( static_cast(Type::Rank) == static_cast(1), + "DotSingle static_assert Fail: Rank != 1"); typedef double value_type ; @@ -204,25 +196,11 @@ struct Scale { typedef typename Device::execution_space execution_space ; -/* typedef typename - Impl::StaticAssertSame< execution_space , - typename ScalarType::execution_space >::type - ok_scalar_device ; + static_assert( static_cast(ScalarType::Rank) == static_cast(0), + "Scale static_assert Fail: ScalarType::Rank != 0"); - typedef typename - Impl::StaticAssertSame< execution_space , - typename VectorType::execution_space >::type - ok_vector_device ;*/ - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 0 > , - Impl::unsigned_< ScalarType::Rank > >::type - ok_scalar_rank ; - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< VectorType::Rank > >::type - ok_vector_rank ; + static_assert( static_cast(VectorType::Rank) == static_cast(1), + "Scale static_assert Fail: VectorType::Rank != 1"); #if 1 typename ScalarType::const_type alpha ; @@ -251,35 +229,14 @@ struct AXPBY { typedef typename Device::execution_space execution_space ; -/* typedef typename - Impl::StaticAssertSame< execution_space , - typename ScalarType::execution_space >::type - ok_scalar_device ; + static_assert( static_cast(ScalarType::Rank) == static_cast(0), + "AXPBY static_assert Fail: ScalarType::Rank != 0"); - typedef typename - Impl::StaticAssertSame< execution_space , - typename ConstVectorType::execution_space >::type - ok_const_vector_device ; + static_assert( static_cast(ConstVectorType::Rank) == static_cast(1), + "AXPBY static_assert Fail: ConstVectorType::Rank != 1"); - typedef typename - Impl::StaticAssertSame< execution_space , - typename VectorType::execution_space >::type - ok_vector_device ;*/ - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 0 > , - Impl::unsigned_< ScalarType::Rank > >::type - ok_scalar_rank ; - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< ConstVectorType::Rank > >::type - ok_const_vector_rank ; - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< VectorType::Rank > >::type - ok_vector_rank ; + static_assert( static_cast(VectorType::Rank) == static_cast(1), + "AXPBY static_assert Fail: VectorType::Rank != 1"); #if 1 typename ScalarType::const_type alpha , beta ; diff --git a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp index b169b02903..d812b16d85 100644 --- a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp +++ b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp @@ -183,7 +183,7 @@ struct ModifiedGramSchmidt } } - execution_space::fence(); + execution_space().fence(); return timer.seconds(); } diff --git a/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp b/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp index b228dd2e2e..03285a375c 100644 --- a/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp +++ b/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp @@ -253,12 +253,12 @@ struct HexGrad double dt_min = 0 ; Kokkos::parallel_for( count , Init( coord ) ); - execution_space::fence(); + execution_space().fence(); for ( int i = 0 ; i < iter ; ++i ) { Kokkos::Timer timer ; Kokkos::parallel_for( count , HexGrad( coord , grad ) ); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; diff --git a/lib/kokkos/core/perf_test/PerfTestMDRange.hpp b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp index 51affa6a2e..f433451f78 100644 --- a/lib/kokkos/core/perf_test/PerfTestMDRange.hpp +++ b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp @@ -125,15 +125,15 @@ struct MultiDimRangePerf3D Kokkos::MDRangePolicy, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} ); Kokkos::parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) ); - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) ); - execution_space::fence(); + execution_space().fence(); for (int i = 0; i < iter; ++i) { Kokkos::Timer timer; Kokkos::parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) ); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; @@ -189,15 +189,15 @@ struct MultiDimRangePerf3D Kokkos::MDRangePolicy, execution_space > policy({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}} ); Kokkos::parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) ); - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) ); - execution_space::fence(); + execution_space().fence(); for (int i = 0; i < iter; ++i) { Kokkos::Timer timer; Kokkos::parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) ); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; @@ -368,15 +368,15 @@ struct RangePolicyCollapseTwo double dt_min = 0; Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) ); - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) ); - execution_space::fence(); + execution_space().fence(); for (int i = 0; i < iter; ++i) { Kokkos::Timer timer; Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount)); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; @@ -513,15 +513,15 @@ struct RangePolicyCollapseAll double dt_min = 0; Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) ); - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) ); - execution_space::fence(); + execution_space().fence(); for (int i = 0; i < iter; ++i) { Kokkos::Timer timer; Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount)); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; diff --git a/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp b/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp new file mode 100644 index 0000000000..2fc889beed --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp @@ -0,0 +1,564 @@ +#include +#include +#include + + +namespace Test { + +namespace { + template + struct SpaceInstance { + static ExecSpace create() { + return ExecSpace(); + } + static void destroy(ExecSpace&) { + } + static bool overlap() { + return false; + } + }; + + #ifndef KOKKOS_ENABLE_DEBUG + #ifdef KOKKOS_ENABLE_CUDA + template<> + struct SpaceInstance { + static Kokkos::Cuda create() { + cudaStream_t stream; + cudaStreamCreate(&stream); + return Kokkos::Cuda(stream); + } + static void destroy(Kokkos::Cuda& space) { + cudaStream_t stream = space.cuda_stream(); + cudaStreamDestroy(stream); + } + static bool overlap() { + bool value = true; + auto local_rank_str = std::getenv("CUDA_LAUNCH_BLOCKING"); + if(local_rank_str) { + value = (std::atoi(local_rank_str)==0); + } + return value; + } + }; + #endif + #endif +} + +struct FunctorRange { + int M,R; + Kokkos::View a; + FunctorRange(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const int i) const { + for(int r=0;r a; + FunctorMDRange(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const int i, const int) const { + for(int j=0;j a; + FunctorTeam(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const Kokkos::TeamPolicy::member_type& team) const { + int i = team.league_rank(); + for(int r=0;r a; + FunctorRangeReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const int i, double& tmp) const { + for(int r=0;r a; + FunctorMDRangeReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const int i, const int, double& tmp) const { + for(int j=0;j a; + FunctorTeamReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const Kokkos::TeamPolicy::member_type& team, double& tmp) const { + int i = team.league_rank(); + for(int r=0;r::create(); + TEST_EXECSPACE space2 = SpaceInstance::create(); + + Kokkos::View a("A",N,M); + FunctorRange f(M,R,a); + FunctorRangeReduce fr(M,R,a); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", + Kokkos::RangePolicy(0,N), FunctorRange(M,R,a)); + + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space1,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space2,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + Kokkos::Timer timer; + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space1,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorRange(M,R,a)); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space2,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorRange(M,R,a)); + Kokkos::fence(); + double time_overlap = timer.seconds(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + double time_end = timer.seconds(); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE( (time_end > 1.5*time_overlap) ); + } + printf("Time RangePolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); + + Kokkos::View result("result"); + Kokkos::View result1("result1"); + Kokkos::View result2("result2"); + Kokkos::View h_result("h_result"); + Kokkos::View h_result1("h_result1"); + Kokkos::View h_result2("h_result2"); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_fenced = timer.seconds(); + Kokkos::deep_copy(h_result,result); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + double time_not_fenced = timer.seconds(); + Kokkos::fence(); + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_fenced>2.0*time_not_fenced); + } + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_no_overlapped_reduce = timer.seconds(); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space1,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result1); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space2,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result2); + Kokkos::fence(); + double time_overlapped_reduce = timer.seconds(); + + Kokkos::deep_copy(h_result2,result2); + Kokkos::deep_copy(h_result1,result1); + + ASSERT_EQ(h_result1(),h_result()); + ASSERT_EQ(h_result2(),h_result()); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); + } + printf("Time RangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); + SpaceInstance::destroy(space1); + SpaceInstance::destroy(space2); +} + +TEST_F( default_exec, overlap_mdrange_policy ) { + int N = 200; + int M = 10000; + int R = 10; + + TEST_EXECSPACE space; + TEST_EXECSPACE space1 = SpaceInstance::create(); + TEST_EXECSPACE space2 = SpaceInstance::create(); + + Kokkos::View a("A",N,M); + FunctorMDRange f(M,R,a); + FunctorMDRangeReduce fr(M,R,a); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>({0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorMDRange(M,R,a)); + + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + Kokkos::Timer timer; + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorMDRange(M,R,a)); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorMDRange(M,R,a)); + Kokkos::fence(); + double time_overlap = timer.seconds(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + double time_end = timer.seconds(); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE( (time_end > 1.5*time_overlap) ); + } + printf("Time MDRangePolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); + + Kokkos::View result("result"); + Kokkos::View result1("result1"); + Kokkos::View result2("result2"); + Kokkos::View h_result("h_result"); + Kokkos::View h_result1("h_result1"); + Kokkos::View h_result2("h_result2"); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_fenced = timer.seconds(); + Kokkos::deep_copy(h_result,result); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + double time_not_fenced = timer.seconds(); + Kokkos::fence(); + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_fenced>2.0*time_not_fenced); + } + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_no_overlapped_reduce = timer.seconds(); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result1); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result2); + Kokkos::fence(); + double time_overlapped_reduce = timer.seconds(); + + Kokkos::deep_copy(h_result2,result2); + Kokkos::deep_copy(h_result1,result1); + + ASSERT_EQ(h_result1(),h_result()); + ASSERT_EQ(h_result2(),h_result()); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); + } + printf("Time MDRangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); + SpaceInstance::destroy(space2); + SpaceInstance::destroy(space1); + +} + +TEST_F( default_exec, overlap_team_policy ) { + int N = 20; + int M = 1000000; + int R = 10; + + TEST_EXECSPACE space; + TEST_EXECSPACE space1 = SpaceInstance::create(); + TEST_EXECSPACE space2 = SpaceInstance::create(); + + Kokkos::View a("A",N,M); + FunctorTeam f(M,R,a); + FunctorTeamReduce fr(M,R,a); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorTeam(M,R,a)); + + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + Kokkos::Timer timer; + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorTeam(M,R,a)); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorTeam(M,R,a)); + Kokkos::fence(); + double time_overlap = timer.seconds(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + double time_end = timer.seconds(); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE( (time_end > 1.5*time_overlap) ); + } + printf("Time TeamPolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); + + Kokkos::View result("result"); + Kokkos::View result1("result1"); + Kokkos::View result2("result2"); + Kokkos::View h_result("h_result"); + Kokkos::View h_result1("h_result1"); + Kokkos::View h_result2("h_result2"); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_fenced = timer.seconds(); + Kokkos::deep_copy(h_result,result); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + double time_not_fenced = timer.seconds(); + Kokkos::fence(); + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_fenced>2.0*time_not_fenced); + } + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_no_overlapped_reduce = timer.seconds(); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result1); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result2); + Kokkos::fence(); + double time_overlapped_reduce = timer.seconds(); + + Kokkos::deep_copy(h_result2,result2); + Kokkos::deep_copy(h_result1,result1); + + ASSERT_EQ(h_result1(),h_result()); + ASSERT_EQ(h_result2(),h_result()); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); + } + printf("Time TeamPolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); + SpaceInstance::destroy(space1); + SpaceInstance::destroy(space2); +} +} diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp index 7d64591d9f..685194c150 100644 --- a/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp +++ b/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp @@ -121,6 +121,7 @@ void run_allocateview_tests(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a_ptr[i] = 0.0; }); + Kokkos::fence(); Kokkos::kokkos_free(a_ptr); } time_raw = timer.seconds()/R; diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp index 3f46187957..eff31c69bb 100644 --- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp +++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp @@ -95,6 +95,7 @@ void run_deepcopyview_tests123(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -135,6 +136,7 @@ void run_deepcopyview_tests45(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -169,6 +171,7 @@ void run_deepcopyview_tests6(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -202,6 +205,7 @@ void run_deepcopyview_tests7(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -235,6 +239,7 @@ void run_deepcopyview_tests8(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp index c50d13d1ed..b17356f0c8 100644 --- a/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp +++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp @@ -90,6 +90,7 @@ void run_fillview_tests123(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -126,6 +127,7 @@ void run_fillview_tests45(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -157,6 +159,7 @@ void run_fillview_tests6(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -187,6 +190,7 @@ void run_fillview_tests7(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -217,6 +221,7 @@ void run_fillview_tests8(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp index 2720f4855c..b5019b467a 100644 --- a/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp +++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp @@ -95,7 +95,9 @@ void run_resizeview_tests123(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -143,7 +145,9 @@ void run_resizeview_tests45(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -181,7 +185,9 @@ void run_resizeview_tests6(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -218,7 +224,9 @@ void run_resizeview_tests7(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -255,7 +263,9 @@ void run_resizeview_tests8(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif diff --git a/lib/kokkos/core/perf_test/test_atomic.cpp b/lib/kokkos/core/perf_test/test_atomic.cpp index 6bb22e4e30..24e4f015d3 100644 --- a/lib/kokkos/core/perf_test/test_atomic.cpp +++ b/lib/kokkos/core/perf_test/test_atomic.cpp @@ -69,7 +69,7 @@ typedef Kokkos::DefaultExecutionSpace exec_space; #define WHITE 8 void textcolor(int attr, int fg, int bg) -{ char command[13]; +{ char command[40]; /* Command is the control command to the terminal */ sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40); @@ -85,7 +85,7 @@ struct ZeroFunctor{ typedef typename Kokkos::View::HostMirror h_type; type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { data() = 0; } }; @@ -101,7 +101,7 @@ struct AddFunctor{ type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { Kokkos::atomic_fetch_add(&data(),(T)1); } }; @@ -113,12 +113,12 @@ T AddLoop(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct AddFunctor f_add; f_add.data = data; Kokkos::parallel_for(loop,f_add); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); T val = h_data(); @@ -132,7 +132,7 @@ struct AddNonAtomicFunctor{ type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { data()+=(T)1; } }; @@ -145,12 +145,12 @@ T AddLoopNonAtomic(int loop) { f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct AddNonAtomicFunctor f_add; f_add.data = data; Kokkos::parallel_for(loop,f_add); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); T val = h_data(); @@ -178,7 +178,7 @@ struct CASFunctor{ type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { T old = data(); T newval, assumed; do { @@ -197,12 +197,12 @@ T CASLoop(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct CASFunctor f_cas; f_cas.data = data; Kokkos::parallel_for(loop,f_cas); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); T val = h_data(); @@ -217,7 +217,7 @@ struct CASNonAtomicFunctor{ type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { volatile T assumed; volatile T newval; bool fail=1; @@ -240,12 +240,12 @@ T CASLoopNonAtomic(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct CASNonAtomicFunctor f_cas; f_cas.data = data; Kokkos::parallel_for(loop,f_cas); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); T val = h_data(); @@ -296,19 +296,19 @@ T ExchLoop(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); typename ZeroFunctor::type data2("Data"); typename ZeroFunctor::h_type h_data2("HData"); f_zero.data = data2; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct ExchFunctor f_exch; f_exch.data = data; f_exch.data2 = data2; Kokkos::parallel_for(loop,f_exch); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); Kokkos::deep_copy(h_data2,data2); @@ -339,19 +339,19 @@ T ExchLoopNonAtomic(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); typename ZeroFunctor::type data2("Data"); typename ZeroFunctor::h_type h_data2("HData"); f_zero.data = data2; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct ExchNonAtomicFunctor f_exch; f_exch.data = data; f_exch.data2 = data2; Kokkos::parallel_for(loop,f_exch); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); Kokkos::deep_copy(h_data2,data2); diff --git a/lib/kokkos/core/perf_test/test_mempool.cpp b/lib/kokkos/core/perf_test/test_mempool.cpp index 9fd58eda91..c47730ec69 100644 --- a/lib/kokkos/core/perf_test/test_mempool.cpp +++ b/lib/kokkos/core/perf_test/test_mempool.cpp @@ -153,6 +153,7 @@ struct TestFunctor { typedef Kokkos::RangePolicy< ExecSpace , TagDel > policy ; Kokkos::parallel_for( policy(0,range_iter), *this ); + Kokkos::fence(); } //---------------------------------------- diff --git a/lib/kokkos/core/perf_test/test_taskdag.cpp b/lib/kokkos/core/perf_test/test_taskdag.cpp index 8d5e1c475f..41198edfe1 100644 --- a/lib/kokkos/core/perf_test/test_taskdag.cpp +++ b/lib/kokkos/core/perf_test/test_taskdag.cpp @@ -92,27 +92,26 @@ long fib_alloc_count( long n ) return count[ n & mask ]; } -template< class Space > +template< class Scheduler > struct TestFib { - using Scheduler = Kokkos::TaskScheduler< Space > ; using MemorySpace = typename Scheduler::memory_space ; using MemberType = typename Scheduler::member_type ; - using FutureType = Kokkos::Future< long , Space > ; + using FutureType = Kokkos::BasicFuture< long , Scheduler > ; typedef long value_type ; - Scheduler sched ; FutureType dep[2] ; const value_type n ; KOKKOS_INLINE_FUNCTION - TestFib( const Scheduler & arg_sched , const value_type arg_n ) - : sched( arg_sched ), dep{} , n( arg_n ) {} + TestFib( const value_type arg_n ) + : dep{} , n( arg_n ) {} KOKKOS_INLINE_FUNCTION - void operator()( const MemberType & , value_type & result ) noexcept + void operator()( MemberType & member, value_type & result ) noexcept { + auto& sched = member.scheduler(); if ( n < 2 ) { result = n ; } @@ -126,13 +125,13 @@ struct TestFib { dep[1] = Kokkos::task_spawn ( Kokkos::TaskSingle( sched, Kokkos::TaskPriority::High ) - , TestFib( sched, n - 2 ) ); + , TestFib( n - 2 ) ); dep[0] = Kokkos::task_spawn ( Kokkos::TaskSingle( sched ) - , TestFib( sched, n - 1 ) ); + , TestFib( n - 1 ) ); - Kokkos::Future< ExecSpace > fib_all = Kokkos::when_all( dep, 2 ); + auto fib_all = sched.when_all( dep, 2 ); if ( ! dep[0].is_null() && ! dep[1].is_null() && ! fib_all.is_null() ) { // High priority to retire this branch. @@ -202,13 +201,15 @@ int main( int argc , char* argv[] ) return -1; } - typedef TestFib< ExecSpace > Functor ; + using Scheduler = Kokkos::TaskSchedulerMultiple; + + typedef TestFib< Scheduler > Functor ; Kokkos::initialize(argc,argv); { - Functor::Scheduler sched( Functor::MemorySpace() + Scheduler sched( Functor::MemorySpace() , total_alloc_size , min_block_size , max_block_size @@ -217,21 +218,21 @@ int main( int argc , char* argv[] ) Functor::FutureType f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ) - , Functor( sched , fib_input ) + , Functor( fib_input ) ); Kokkos::wait( sched ); test_result = f.get(); - task_count_max = sched.allocated_task_count_max(); - task_count_accum = sched.allocated_task_count_accum(); + //task_count_max = sched.allocated_task_count_max(); + //task_count_accum = sched.allocated_task_count_accum(); - if ( number_alloc != task_count_accum ) { - std::cout << " number_alloc( " << number_alloc << " )" - << " != task_count_accum( " << task_count_accum << " )" - << std::endl ; - } + //if ( number_alloc != task_count_accum ) { + // std::cout << " number_alloc( " << number_alloc << " )" + // << " != task_count_accum( " << task_count_accum << " )" + // << std::endl ; + //} if ( fib_output != test_result ) { std::cout << " answer( " << fib_output << " )" @@ -239,7 +240,7 @@ int main( int argc , char* argv[] ) << std::endl ; } - if ( fib_output != test_result || number_alloc != task_count_accum ) { + if ( fib_output != test_result) { // || number_alloc != task_count_accum ) { printf(" TEST FAILED\n"); return -1; } @@ -252,7 +253,7 @@ int main( int argc , char* argv[] ) Functor::FutureType ftmp = Kokkos::host_spawn( Kokkos::TaskSingle( sched ) - , Functor( sched , fib_input ) + , Functor( fib_input ) ); Kokkos::wait( sched ); diff --git a/lib/kokkos/core/src/CMakeLists.txt b/lib/kokkos/core/src/CMakeLists.txt index ab7f3f55c7..a941c5da0c 100644 --- a/lib/kokkos/core/src/CMakeLists.txt +++ b/lib/kokkos/core/src/CMakeLists.txt @@ -61,6 +61,16 @@ IF(KOKKOS_LEGACY_TRIBITS) #----------------------------------------------------------------------------- + FILE(GLOB HEADERS_HPX HPX/*.hpp) + FILE(GLOB SOURCES_HPX HPX/*.cpp) + + LIST(APPEND HEADERS_PRIVATE ${HEADERS_HPX} ) + LIST(APPEND SOURCES ${SOURCES_HPX} ) + + INSTALL(FILES ${HEADERS_HPX} DESTINATION ${TRILINOS_INCDIR}/HPX/) + + #----------------------------------------------------------------------------- + FILE(GLOB HEADERS_CUDA Cuda/*.hpp) FILE(GLOB SOURCES_CUDA Cuda/*.cpp) diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp deleted file mode 100644 index c31b7f5b5d..0000000000 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp +++ /dev/null @@ -1,419 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDAEXEC_HPP -#define KOKKOS_CUDAEXEC_HPP - -#include -#ifdef KOKKOS_ENABLE_CUDA - -#include -#include -#include -#include -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -struct CudaTraits { - enum { WarpSize = 32 /* 0x0020 */ }; - enum { WarpIndexMask = 0x001f /* Mask for warpindex */ }; - enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ }; - - enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ }; - enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ }; - enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ }; - - enum { UpperBoundGridCount = 65535 /* Hard upper bound */ }; - enum { ConstantMemoryCapacity = 0x010000 /* 64k bytes */ }; - enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ }; - enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ }; - - typedef unsigned long - ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ]; - - enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ }; - - KOKKOS_INLINE_FUNCTION static - CudaSpace::size_type warp_count( CudaSpace::size_type i ) - { return ( i + WarpIndexMask ) >> WarpIndexShift ; } - - KOKKOS_INLINE_FUNCTION static - CudaSpace::size_type warp_align( CudaSpace::size_type i ) - { - enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) }; - return ( i + WarpIndexMask ) & Mask ; - } -}; - -//---------------------------------------------------------------------------- - -CudaSpace::size_type cuda_internal_multiprocessor_count(); -CudaSpace::size_type cuda_internal_maximum_warp_count(); -CudaSpace::size_type cuda_internal_maximum_grid_count(); -CudaSpace::size_type cuda_internal_maximum_shared_words(); - -CudaSpace::size_type cuda_internal_maximum_concurrent_block_count(); - -CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size ); -CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size ); -CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size ); - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) - -/** \brief Access to constant memory on the device */ -#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE - -__device__ __constant__ -extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ; - -#else - -__device__ __constant__ -unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ; - -#endif - -namespace Kokkos { -namespace Impl { - void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink = false); -} -} - -template< typename T > -inline -__device__ -T * kokkos_impl_cuda_shared_memory() -{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; } - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -// See section B.17 of Cuda C Programming Guide Version 3.2 -// for discussion of -// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) -// function qualifier which could be used to improve performance. -//---------------------------------------------------------------------------- -// Maximize L1 cache and minimize shared memory: -// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 ); -// For 2.0 capability: 48 KB L1 and 16 KB shared -//---------------------------------------------------------------------------- - -template< class DriverType> -__global__ -static void cuda_parallel_launch_constant_memory() -{ - const DriverType & driver = - *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); - - driver(); -} - -template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > -__global__ -__launch_bounds__(maxTperB, minBperSM) -static void cuda_parallel_launch_constant_memory() -{ - const DriverType & driver = - *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); - - driver(); -} - -template< class DriverType> -__global__ -static void cuda_parallel_launch_local_memory( const DriverType driver ) -{ - driver(); -} - -template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > -__global__ -__launch_bounds__(maxTperB, minBperSM) -static void cuda_parallel_launch_local_memory( const DriverType driver ) -{ - driver(); -} - -template < class DriverType - , class LaunchBounds = Kokkos::LaunchBounds<> - , bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) > -struct CudaParallelLaunch ; - -template < class DriverType - , unsigned int MaxThreadsPerBlock - , unsigned int MinBlocksPerSM > -struct CudaParallelLaunch< DriverType - , Kokkos::LaunchBounds< MaxThreadsPerBlock - , MinBlocksPerSM > - , true > -{ - inline - CudaParallelLaunch( const DriverType & driver - , const dim3 & grid - , const dim3 & block - , const int shmem - , const cudaStream_t stream = 0 ) - { - if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - // Fence before changing settings and copying closure - Kokkos::Cuda::fence(); - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - #ifndef KOKKOS_ARCH_KEPLER - // On Kepler the L1 has no benefit since it doesn't cache reads - else { - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig - ( cuda_parallel_launch_constant_memory - < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > - , ( shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) - ) ); - } - #endif - - // Copy functor to constant memory on the device - cudaMemcpyToSymbol( - kokkos_impl_cuda_constant_memory_buffer, &driver, sizeof(DriverType) ); - - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - - // Invoke the driver function on the device - cuda_parallel_launch_constant_memory - < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > - <<< grid , block , shmem , stream >>>(); - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - CUDA_SAFE_CALL( cudaGetLastError() ); - Kokkos::Cuda::fence(); -#endif - } - } -}; - -template < class DriverType > -struct CudaParallelLaunch< DriverType - , Kokkos::LaunchBounds<> - , true > -{ - inline - CudaParallelLaunch( const DriverType & driver - , const dim3 & grid - , const dim3 & block - , const int shmem - , const cudaStream_t stream = 0 ) - { - if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - // Fence before changing settings and copying closure - Kokkos::Cuda::fence(); - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - #ifndef KOKKOS_ARCH_KEPLER - // On Kepler the L1 has no benefit since it doesn't cache reads - else { - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig - ( cuda_parallel_launch_constant_memory< DriverType > - , ( shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) - ) ); - } - #endif - - // Copy functor to constant memory on the device - cudaMemcpyToSymbol( - kokkos_impl_cuda_constant_memory_buffer, &driver, sizeof(DriverType) ); - - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - - // Invoke the driver function on the device - cuda_parallel_launch_constant_memory< DriverType > - <<< grid , block , shmem , stream >>>(); - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - CUDA_SAFE_CALL( cudaGetLastError() ); - Kokkos::Cuda::fence(); -#endif - } - } -}; - -template < class DriverType - , unsigned int MaxThreadsPerBlock - , unsigned int MinBlocksPerSM > -struct CudaParallelLaunch< DriverType - , Kokkos::LaunchBounds< MaxThreadsPerBlock - , MinBlocksPerSM > - , false > -{ - inline - CudaParallelLaunch( const DriverType & driver - , const dim3 & grid - , const dim3 & block - , const int shmem - , const cudaStream_t stream = 0 ) - { - if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - #ifndef KOKKOS_ARCH_KEPLER - // On Kepler the L1 has no benefit since it doesn't cache reads - else { - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig - ( cuda_parallel_launch_local_memory - < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > - , ( shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) - ) ); - } - #endif - - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - - // Invoke the driver function on the device - cuda_parallel_launch_local_memory - < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > - <<< grid , block , shmem , stream >>>( driver ); - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - CUDA_SAFE_CALL( cudaGetLastError() ); - Kokkos::Cuda::fence(); -#endif - } - } -}; - -template < class DriverType > -struct CudaParallelLaunch< DriverType - , Kokkos::LaunchBounds<> - , false > -{ - inline - CudaParallelLaunch( const DriverType & driver - , const dim3 & grid - , const dim3 & block - , const int shmem - , const cudaStream_t stream = 0 ) - { - if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - #ifndef KOKKOS_ARCH_KEPLER - // On Kepler the L1 has no benefit since it doesn't cache reads - else { - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig - ( cuda_parallel_launch_local_memory< DriverType > - , ( shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) - ) ); - } - #endif - - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - - // Invoke the driver function on the device - cuda_parallel_launch_local_memory< DriverType > - <<< grid , block , shmem , stream >>>( driver ); - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - CUDA_SAFE_CALL( cudaGetLastError() ); - Kokkos::Cuda::fence(); -#endif - } - } -}; - -//---------------------------------------------------------------------------- - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* defined( __CUDACC__ ) */ -#endif /* defined( KOKKOS_ENABLE_CUDA ) */ -#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */ - diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index e13744e327..4c9ed47085 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -55,7 +55,7 @@ #include #include -#include +//#include #include #if defined(KOKKOS_ENABLE_PROFILING) @@ -183,7 +183,7 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const enum { max_uvm_allocations = 65536 }; - Cuda::fence(); + Cuda::impl_static_fence(); if ( arg_alloc_size > 0 ) { Kokkos::Impl::num_uvm_allocations++; @@ -194,7 +194,7 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) ); } - Cuda::fence(); + Cuda::impl_static_fence(); return ptr ; } @@ -217,14 +217,14 @@ void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_all void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const { - Cuda::fence(); + Cuda::impl_static_fence(); try { if ( arg_alloc_ptr != nullptr ) { Kokkos::Impl::num_uvm_allocations--; CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) ); } } catch(...) {} - Cuda::fence(); + Cuda::impl_static_fence(); } void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const @@ -390,7 +390,7 @@ SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: { #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { - Cuda::fence(); //Make sure I can access the label ... + Cuda::impl_static_fence(); //Make sure I can access the label ... Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::CudaUVMSpace::name()),RecordBase::m_alloc_ptr->m_label, data(),size()); diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp new file mode 100644 index 0000000000..9d4bcbc8cf --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp @@ -0,0 +1,657 @@ +/* +@HEADER +================================================================================ + +ORIGINAL LICENSE +---------------- + +Copyright (c) 2018, NVIDIA Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +================================================================================ + +LICENSE ASSOCIATED WITH SUBSEQUENT MODIFICATIONS +------------------------------------------------ + +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +@HEADER +*/ + +#include +#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + +#include + +#ifndef _SIMT_DETAILS_CONFIG +#define _SIMT_DETAILS_CONFIG + +namespace Kokkos { +namespace Impl { + + +#ifndef __simt_scope +// Modification: Kokkos GPU atomics should default to `gpu` scope +#define __simt_scope "gpu" +#endif + +#define __simt_fence_signal_() asm volatile("":::"memory") +#define __simt_fence_sc_() asm volatile("fence.sc." __simt_scope ";":::"memory") +#define __simt_fence_() asm volatile("fence." __simt_scope ";":::"memory") + +#define __simt_load_acquire_8_as_32(ptr,ret) asm volatile("ld.acquire." __simt_scope ".b8 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory") +#define __simt_load_relaxed_8_as_32(ptr,ret) asm volatile("ld.relaxed." __simt_scope ".b8 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory") +#define __simt_store_release_8_as_32(ptr,desired) asm volatile("st.release." __simt_scope ".b8 [%0], %1;" :: "l"(ptr), "r"(desired) : "memory") +#define __simt_store_relaxed_8_as_32(ptr,desired) asm volatile("st.relaxed." __simt_scope ".b8 [%0], %1;" :: "l"(ptr), "r"(desired) : "memory") + +#define __simt_load_acquire_16(ptr,ret) asm volatile("ld.acquire." __simt_scope ".b16 %0, [%1];" : "=h"(ret) : "l"(ptr) : "memory") +#define __simt_load_relaxed_16(ptr,ret) asm volatile("ld.relaxed." __simt_scope ".b16 %0, [%1];" : "=h"(ret) : "l"(ptr) : "memory") +#define __simt_store_release_16(ptr,desired) asm volatile("st.release." __simt_scope ".b16 [%0], %1;" :: "l"(ptr), "h"(desired) : "memory") +#define __simt_store_relaxed_16(ptr,desired) asm volatile("st.relaxed." __simt_scope ".b16 [%0], %1;" :: "l"(ptr), "h"(desired) : "memory") + +#define __simt_load_acquire_32(ptr,ret) asm volatile("ld.acquire." __simt_scope ".b32 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory") +#define __simt_load_relaxed_32(ptr,ret) asm volatile("ld.relaxed." __simt_scope ".b32 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory") +#define __simt_store_release_32(ptr,desired) asm volatile("st.release." __simt_scope ".b32 [%0], %1;" :: "l"(ptr), "r"(desired) : "memory") +#define __simt_store_relaxed_32(ptr,desired) asm volatile("st.relaxed." __simt_scope ".b32 [%0], %1;" :: "l"(ptr), "r"(desired) : "memory") +#define __simt_exch_release_32(ptr,old,desired) asm volatile("atom.exch.release." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(desired) : "memory") +#define __simt_exch_acquire_32(ptr,old,desired) asm volatile("atom.exch.acquire." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(desired) : "memory") +#define __simt_exch_acq_rel_32(ptr,old,desired) asm volatile("atom.exch.acq_rel." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(desired) : "memory") +#define __simt_exch_relaxed_32(ptr,old,desired) asm volatile("atom.exch.relaxed." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(desired) : "memory") +#define __simt_cas_release_32(ptr,old,expected,desired) asm volatile("atom.cas.release." __simt_scope ".b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +#define __simt_cas_acquire_32(ptr,old,expected,desired) asm volatile("atom.cas.acquire." __simt_scope ".b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +#define __simt_cas_acq_rel_32(ptr,old,expected,desired) asm volatile("atom.cas.acq_rel." __simt_scope ".b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +#define __simt_cas_relaxed_32(ptr,old,expected,desired) asm volatile("atom.cas.relaxed." __simt_scope ".b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +#define __simt_add_release_32(ptr,old,addend) asm volatile("atom.add.release." __simt_scope ".u32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(addend) : "memory") +#define __simt_add_acquire_32(ptr,old,addend) asm volatile("atom.add.acquire." __simt_scope ".u32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(addend) : "memory") +#define __simt_add_acq_rel_32(ptr,old,addend) asm volatile("atom.add.acq_rel." __simt_scope ".u32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(addend) : "memory") +#define __simt_add_relaxed_32(ptr,old,addend) asm volatile("atom.add.relaxed." __simt_scope ".u32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(addend) : "memory") +#define __simt_and_release_32(ptr,old,andend) asm volatile("atom.and.release." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(andend) : "memory") +#define __simt_and_acquire_32(ptr,old,andend) asm volatile("atom.and.acquire." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(andend) : "memory") +#define __simt_and_acq_rel_32(ptr,old,andend) asm volatile("atom.and.acq_rel." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(andend) : "memory") +#define __simt_and_relaxed_32(ptr,old,andend) asm volatile("atom.and.relaxed." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(andend) : "memory") +#define __simt_or_release_32(ptr,old,orend) asm volatile("atom.or.release." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(orend) : "memory") +#define __simt_or_acquire_32(ptr,old,orend) asm volatile("atom.or.acquire." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(orend) : "memory") +#define __simt_or_acq_rel_32(ptr,old,orend) asm volatile("atom.or.acq_rel." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(orend) : "memory") +#define __simt_or_relaxed_32(ptr,old,orend) asm volatile("atom.or.relaxed." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(orend) : "memory") +#define __simt_xor_release_32(ptr,old,xorend) asm volatile("atom.xor.release." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(xorend) : "memory") +#define __simt_xor_acquire_32(ptr,old,xorend) asm volatile("atom.xor.acquire." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(xorend) : "memory") +#define __simt_xor_acq_rel_32(ptr,old,xorend) asm volatile("atom.xor.acq_rel." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(xorend) : "memory") +#define __simt_xor_relaxed_32(ptr,old,xorend) asm volatile("atom.xor.relaxed." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(xorend) : "memory") + +#define __simt_load_acquire_64(ptr,ret) asm volatile("ld.acquire." __simt_scope ".b64 %0, [%1];" : "=l"(ret) : "l"(ptr) : "memory") +#define __simt_load_relaxed_64(ptr,ret) asm volatile("ld.relaxed." __simt_scope ".b64 %0, [%1];" : "=l"(ret) : "l"(ptr) : "memory") +#define __simt_store_release_64(ptr,desired) asm volatile("st.release." __simt_scope ".b64 [%0], %1;" :: "l"(ptr), "l"(desired) : "memory") +#define __simt_store_relaxed_64(ptr,desired) asm volatile("st.relaxed." __simt_scope ".b64 [%0], %1;" :: "l"(ptr), "l"(desired) : "memory") +#define __simt_exch_release_64(ptr,old,desired) asm volatile("atom.exch.release." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(desired) : "memory") +#define __simt_exch_acquire_64(ptr,old,desired) asm volatile("atom.exch.acquire." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(desired) : "memory") +#define __simt_exch_acq_rel_64(ptr,old,desired) asm volatile("atom.exch.acq_rel." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(desired) : "memory") +#define __simt_exch_relaxed_64(ptr,old,desired) asm volatile("atom.exch.relaxed." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(desired) : "memory") +#define __simt_cas_release_64(ptr,old,expected,desired) asm volatile("atom.cas.release." __simt_scope ".b64 %0, [%1], %2, %3;" : "=l"(old) : "l"(ptr), "l"(expected), "l"(desired) : "memory") +#define __simt_cas_acquire_64(ptr,old,expected,desired) asm volatile("atom.cas.acquire." __simt_scope ".b64 %0, [%1], %2, %3;" : "=l"(old) : "l"(ptr), "l"(expected), "l"(desired) : "memory") +#define __simt_cas_acq_rel_64(ptr,old,expected,desired) asm volatile("atom.cas.acq_rel." __simt_scope ".b64 %0, [%1], %2, %3;" : "=l"(old) : "l"(ptr), "l"(expected), "l"(desired) : "memory") +#define __simt_cas_relaxed_64(ptr,old,expected,desired) asm volatile("atom.cas.relaxed." __simt_scope ".b64 %0, [%1], %2, %3;" : "=l"(old) : "l"(ptr), "l"(expected), "l"(desired) : "memory") +#define __simt_add_release_64(ptr,old,addend) asm volatile("atom.add.release." __simt_scope ".u64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(addend) : "memory") +#define __simt_add_acquire_64(ptr,old,addend) asm volatile("atom.add.acquire." __simt_scope ".u64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(addend) : "memory") +#define __simt_add_acq_rel_64(ptr,old,addend) asm volatile("atom.add.acq_rel." __simt_scope ".u64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(addend) : "memory") +#define __simt_add_relaxed_64(ptr,old,addend) asm volatile("atom.add.relaxed." __simt_scope ".u64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(addend) : "memory") +#define __simt_and_release_64(ptr,old,andend) asm volatile("atom.and.release." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(andend) : "memory") +#define __simt_and_acquire_64(ptr,old,andend) asm volatile("atom.and.acquire." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(andend) : "memory") +#define __simt_and_acq_rel_64(ptr,old,andend) asm volatile("atom.and.acq_rel." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(andend) : "memory") +#define __simt_and_relaxed_64(ptr,old,andend) asm volatile("atom.and.relaxed." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(andend) : "memory") +#define __simt_or_release_64(ptr,old,orend) asm volatile("atom.or.release." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(orend) : "memory") +#define __simt_or_acquire_64(ptr,old,orend) asm volatile("atom.or.acquire." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(orend) : "memory") +#define __simt_or_acq_rel_64(ptr,old,orend) asm volatile("atom.or.acq_rel." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(orend) : "memory") +#define __simt_or_relaxed_64(ptr,old,orend) asm volatile("atom.or.relaxed." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(orend) : "memory") +#define __simt_xor_release_64(ptr,old,xorend) asm volatile("atom.xor.release." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(xorend) : "memory") +#define __simt_xor_acquire_64(ptr,old,xorend) asm volatile("atom.xor.acquire." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(xorend) : "memory") +#define __simt_xor_acq_rel_64(ptr,old,xorend) asm volatile("atom.xor.acq_rel." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(xorend) : "memory") +#define __simt_xor_relaxed_64(ptr,old,xorend) asm volatile("atom.xor.relaxed." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(xorend) : "memory") + +#define __simt_nanosleep(timeout) asm volatile("nanosleep.u32 %0;" :: "r"(unsigned(timeout)) : ) + +/* + definitions +*/ + +#ifndef __GCC_ATOMIC_BOOL_LOCK_FREE +#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +#define __GCC_ATOMIC_INT_LOCK_FREE 2 +#define __GCC_ATOMIC_LONG_LOCK_FREE 2 +#define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +#endif + +#ifndef __ATOMIC_RELAXED +#define __ATOMIC_RELAXED 0 +#define __ATOMIC_CONSUME 1 +#define __ATOMIC_ACQUIRE 2 +#define __ATOMIC_RELEASE 3 +#define __ATOMIC_ACQ_REL 4 +#define __ATOMIC_SEQ_CST 5 +#endif + +inline __device__ int __stronger_order_simt_(int a, int b) { + if (b == __ATOMIC_SEQ_CST) return __ATOMIC_SEQ_CST; + if (b == __ATOMIC_RELAXED) return a; + switch (a) { + case __ATOMIC_SEQ_CST: + case __ATOMIC_ACQ_REL: return a; + case __ATOMIC_CONSUME: + case __ATOMIC_ACQUIRE: if (b != __ATOMIC_ACQUIRE) return __ATOMIC_ACQ_REL; else return __ATOMIC_ACQUIRE; + case __ATOMIC_RELEASE: if (b != __ATOMIC_RELEASE) return __ATOMIC_ACQ_REL; else return __ATOMIC_RELEASE; + case __ATOMIC_RELAXED: return b; + default: assert(0); + } + return __ATOMIC_SEQ_CST; +} + +/* + base +*/ + +#define DO__atomic_load_simt_(bytes, bits) \ +template::type = 0> \ +void __device__ __atomic_load_simt_ (const type *ptr, type *ret, int memorder) { \ + int##bits##_t tmp = 0; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_load_acquire_##bits(ptr, tmp); break; \ + case __ATOMIC_RELAXED: __simt_load_relaxed_##bits(ptr, tmp); break; \ + default: assert(0); \ + } \ + memcpy(ret, &tmp, bytes); \ +} +DO__atomic_load_simt_(1,32) +DO__atomic_load_simt_(2,16) +DO__atomic_load_simt_(4,32) +DO__atomic_load_simt_(8,64) + +template +type __device__ __atomic_load_n_simt_(const type *ptr, int memorder) { + type ret; + __atomic_load_simt_(ptr, &ret, memorder); + return ret; +} + +#define DO__atomic_store_simt_(bytes, bits) \ +template::type = 0> \ +void __device__ __atomic_store_simt_ (type *ptr, type *val, int memorder) { \ + int##bits##_t tmp = 0; \ + memcpy(&tmp, val, bytes); \ + switch (memorder) { \ + case __ATOMIC_RELEASE: __simt_store_release_##bits(ptr, tmp); break; \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_RELAXED: __simt_store_relaxed_##bits(ptr, tmp); break; \ + default: assert(0); \ + } \ +} +DO__atomic_store_simt_(1,32) +DO__atomic_store_simt_(2,16) +DO__atomic_store_simt_(4,32) +DO__atomic_store_simt_(8,64) + +template +void __device__ __atomic_store_n_simt_(type *ptr, type val, int memorder) { + __atomic_store_simt_(ptr, &val, memorder); +} + +#define DO__atomic_compare_exchange_simt_(bytes, bits) \ +template::type = 0> \ +bool __device__ __atomic_compare_exchange_simt_ (type *ptr, type *expected, const type *desired, bool, int success_memorder, int failure_memorder) { \ + int##bits##_t tmp = 0, old = 0, old_tmp; \ + memcpy(&tmp, desired, bytes); \ + memcpy(&old, expected, bytes); \ + old_tmp = old; \ + switch (__stronger_order_simt_(success_memorder, failure_memorder)) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_cas_acquire_##bits(ptr, old, old_tmp, tmp); break; \ + case __ATOMIC_ACQ_REL: __simt_cas_acq_rel_##bits(ptr, old, old_tmp, tmp); break; \ + case __ATOMIC_RELEASE: __simt_cas_release_##bits(ptr, old, old_tmp, tmp); break; \ + case __ATOMIC_RELAXED: __simt_cas_relaxed_##bits(ptr, old, old_tmp, tmp); break; \ + default: assert(0); \ + } \ + bool const ret = old == old_tmp; \ + memcpy(expected, &old, bytes); \ + return ret; \ +} +DO__atomic_compare_exchange_simt_(4, 32) +DO__atomic_compare_exchange_simt_(8, 64) + +template::type = 0> \ +bool __device__ __atomic_compare_exchange_simt_(type *ptr, type *expected, const type *desired, bool, int success_memorder, int failure_memorder) { + + using R = typename std::conditional::value, volatile uint32_t, uint32_t>::type; + auto const aligned = (R*)((intptr_t)ptr & ~(sizeof(uint32_t) - 1)); + auto const offset = uint32_t((intptr_t)ptr & (sizeof(uint32_t) - 1)) * 8; + auto const mask = ((1 << sizeof(type)*8) - 1) << offset; + + uint32_t old = *expected << offset, old_value; + while (1) { + old_value = (old & mask) >> offset; + if (old_value != *expected) + break; + uint32_t const attempt = (old & ~mask) | (*desired << offset); + if (__atomic_compare_exchange_simt_ (aligned, &old, &attempt, true, success_memorder, failure_memorder)) + return true; + } + *expected = old_value; + return false; +} + +template +bool __device__ __atomic_compare_exchange_n_simt_(type *ptr, type *expected, type desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_simt_(ptr, expected, &desired, weak, success_memorder, failure_memorder); +} + +#define DO__atomic_exchange_simt_(bytes, bits) \ +template::type = 0> \ +void __device__ __atomic_exchange_simt_ (type *ptr, type *val, type *ret, int memorder) { \ + int##bits##_t tmp = 0; \ + memcpy(&tmp, val, bytes); \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_exch_acquire_##bits(ptr, tmp, tmp); break; \ + case __ATOMIC_ACQ_REL: __simt_exch_acq_rel_##bits(ptr, tmp, tmp); break; \ + case __ATOMIC_RELEASE: __simt_exch_release_##bits(ptr, tmp, tmp); break; \ + case __ATOMIC_RELAXED: __simt_exch_relaxed_##bits(ptr, tmp, tmp); break; \ + default: assert(0); \ + } \ + memcpy(ret, &tmp, bytes); \ +} +DO__atomic_exchange_simt_(4,32) +DO__atomic_exchange_simt_(8,64) + +template::type = 0> +void __device__ __atomic_exchange_simt_ (type *ptr, type *val, type *ret, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + while(!__atomic_compare_exchange_simt_(ptr, &expected, val, true, memorder, memorder)) + ; + *ret = expected; +} + +template +type __device__ __atomic_exchange_n_simt_(type *ptr, type val, int memorder) { + type ret; + __atomic_exchange_simt_(ptr, &val, &ret, memorder); + return ret; +} + +#define DO__atomic_fetch_add_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_add_simt_ (type *ptr, delta val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_add_acquire_##bits(ptr, ret, val); break; \ + case __ATOMIC_ACQ_REL: __simt_add_acq_rel_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELEASE: __simt_add_release_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELAXED: __simt_add_relaxed_##bits(ptr, ret, val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_add_simt_(4, 32) +DO__atomic_fetch_add_simt_(8, 64) + +template::type = 0> +type __device__ __atomic_fetch_add_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected + val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +#define DO__atomic_fetch_sub_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_sub_simt_ (type *ptr, delta val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_add_acquire_##bits(ptr, ret, -val); break; \ + case __ATOMIC_ACQ_REL: __simt_add_acq_rel_##bits(ptr, ret, -val); break; \ + case __ATOMIC_RELEASE: __simt_add_release_##bits(ptr, ret, -val); break; \ + case __ATOMIC_RELAXED: __simt_add_relaxed_##bits(ptr, ret, -val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_sub_simt_(4,32) +DO__atomic_fetch_sub_simt_(8,64) + +template::type = 0> +type __device__ __atomic_fetch_sub_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected - val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +#define DO__atomic_fetch_and_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_and_simt_ (type *ptr, type val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_and_acquire_##bits(ptr, ret, val); break; \ + case __ATOMIC_ACQ_REL: __simt_and_acq_rel_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELEASE: __simt_and_release_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELAXED: __simt_and_relaxed_##bits(ptr, ret, val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_and_simt_(4,32) +DO__atomic_fetch_and_simt_(8,64) + +template::type = 0> +type __device__ __atomic_fetch_and_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected & val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +#define DO__atomic_fetch_xor_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_xor_simt_ (type *ptr, type val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_xor_acquire_##bits(ptr, ret, val); break; \ + case __ATOMIC_ACQ_REL: __simt_xor_acq_rel_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELEASE: __simt_xor_release_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELAXED: __simt_xor_relaxed_##bits(ptr, ret, val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_xor_simt_(4,32) +DO__atomic_fetch_xor_simt_(8,64) + +template::type = 0> +type __device__ __atomic_fetch_xor_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected ^ val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +#define DO__atomic_fetch_or_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_or_simt_ (type *ptr, type val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_or_acquire_##bits(ptr, ret, val); break; \ + case __ATOMIC_ACQ_REL: __simt_or_acq_rel_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELEASE: __simt_or_release_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELAXED: __simt_or_relaxed_##bits(ptr, ret, val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_or_simt_(4,32) +DO__atomic_fetch_or_simt_(8,64) + +template::type = 0> +type __device__ __atomic_fetch_or_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected | val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +template +inline bool __device__ __atomic_test_and_set_simt_(type *ptr, int memorder) { + return __atomic_exchange_n_simt_((char*)ptr, (char)1, memorder) == 1; +} +template +inline void __device__ __atomic_clear_simt_(type *ptr, int memorder) { + return __atomic_store_n_simt_((char*)ptr, (char)0, memorder); +} + +inline constexpr __device__ bool __atomic_always_lock_free_simt_ (size_t size, void *) { + return size <= 8; +} +inline __device__ bool __atomic_is_lock_free_simt_(size_t size, void * ptr) { + return __atomic_always_lock_free_simt_(size, ptr); +} + +/* + fences +*/ + +inline void __device__ __atomic_thread_fence_simt(int memorder) { + switch (memorder) { + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); break; + case __ATOMIC_CONSUME: + case __ATOMIC_ACQUIRE: + case __ATOMIC_ACQ_REL: + case __ATOMIC_RELEASE: __simt_fence_(); break; + case __ATOMIC_RELAXED: break; + default: assert(0); + } +} +inline void __device__ __atomic_signal_fence_simt(int memorder) { + __atomic_thread_fence_simt(memorder); +} + +/* + non-volatile +*/ + +template type __device__ __atomic_load_n_simt(const type *ptr, int memorder) { + return __atomic_load_n_simt_(const_cast(ptr), memorder); +} +template void __device__ __atomic_load_simt(const type *ptr, type *ret, int memorder) { + __atomic_load_simt_(const_cast(ptr), ret, memorder); +} +template void __device__ __atomic_store_n_simt(type *ptr, type val, int memorder) { + __atomic_store_n_simt_(const_cast(ptr), val, memorder); +} +template void __device__ __atomic_store_simt(type *ptr, type *val, int memorder) { + __atomic_store_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_exchange_n_simt(type *ptr, type val, int memorder) { + return __atomic_exchange_n_simt_(const_cast(ptr), val, memorder); +} +template void __device__ __atomic_exchange_simt(type *ptr, type *val, type *ret, int memorder) { + __atomic_exchange_simt_(const_cast(ptr), val, ret, memorder); +} +template bool __device__ __atomic_compare_exchange_n_simt(type *ptr, type *expected, type desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_n_simt_(const_cast(ptr), expected, desired, weak, success_memorder, failure_memorder); +} +template bool __device__ __atomic_compare_exchange_simt(type *ptr, type *expected, type *desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_simt_(const_cast(ptr), expected, desired, weak, success_memorder, failure_memorder); +} +template type __device__ __atomic_fetch_add_simt(type *ptr, delta val, int memorder) { + return __atomic_fetch_add_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_sub_simt(type *ptr, delta val, int memorder) { + return __atomic_fetch_sub_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_and_simt(type *ptr, type val, int memorder) { + return __atomic_fetch_and_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_xor_simt(type *ptr, type val, int memorder) { + return __atomic_fetch_xor_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_or_simt(type *ptr, type val, int memorder) { + return __atomic_fetch_or_simt_(const_cast(ptr), val, memorder); +} +template bool __device__ __atomic_test_and_set_simt(void *ptr, int memorder) { + return __atomic_test_and_set_simt_(const_cast(ptr), memorder); +} +template void __device__ __atomic_clear_simt(void *ptr, int memorder) { + return __atomic_clear_simt_(const_cast(ptr), memorder); +} +inline bool __device__ __atomic_always_lock_free_simt(size_t size, void *ptr) { + return __atomic_always_lock_free_simt_(size, const_cast(ptr)); +} +inline bool __device__ __atomic_is_lock_free_simt(size_t size, void *ptr) { + return __atomic_is_lock_free_simt_(size, const_cast(ptr)); +} + +/* + volatile +*/ + +template type __device__ __atomic_load_n_simt(const volatile type *ptr, int memorder) { + return __atomic_load_n_simt_(const_cast(ptr), memorder); +} +template void __device__ __atomic_load_simt(const volatile type *ptr, type *ret, int memorder) { + __atomic_load_simt_(const_cast(ptr), ret, memorder); +} +template void __device__ __atomic_store_n_simt(volatile type *ptr, type val, int memorder) { + __atomic_store_n_simt_(const_cast(ptr), val, memorder); +} +template void __device__ __atomic_store_simt(volatile type *ptr, type *val, int memorder) { + __atomic_store_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_exchange_n_simt(volatile type *ptr, type val, int memorder) { + return __atomic_exchange_n_simt_(const_cast(ptr), val, memorder); +} +template void __device__ __atomic_exchange_simt(volatile type *ptr, type *val, type *ret, int memorder) { + __atomic_exchange_simt_(const_cast(ptr), val, ret, memorder); +} +template bool __device__ __atomic_compare_exchange_n_simt(volatile type *ptr, type *expected, type desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_n_simt_(const_cast(ptr), expected, desired, weak, success_memorder, failure_memorder); +} +template bool __device__ __atomic_compare_exchange_simt(volatile type *ptr, type *expected, type *desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_simt_(const_cast(ptr), expected, desired, weak, success_memorder, failure_memorder); +} +template type __device__ __atomic_fetch_add_simt(volatile type *ptr, delta val, int memorder) { + return __atomic_fetch_add_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_sub_simt(volatile type *ptr, delta val, int memorder) { + return __atomic_fetch_sub_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_and_simt(volatile type *ptr, type val, int memorder) { + return __atomic_fetch_and_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_xor_simt(volatile type *ptr, type val, int memorder) { + return __atomic_fetch_xor_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_or_simt(volatile type *ptr, type val, int memorder) { + return __atomic_fetch_or_simt_(const_cast(ptr), val, memorder); +} +template bool __device__ __atomic_test_and_set_simt(volatile void *ptr, int memorder) { + return __atomic_test_and_set_simt_(const_cast(ptr), memorder); +} +template void __device__ __atomic_clear_simt(volatile void *ptr, int memorder) { + return __atomic_clear_simt_(const_cast(ptr), memorder); +} + + + +} // end namespace Impl +} // end namespace Kokkos + +#endif //_SIMT_DETAILS_CONFIG + +#ifndef KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED +/* + builtins +*/ + +#define __atomic_load_n __atomic_load_n_simt +#define __atomic_load __atomic_load_simt +#define __atomic_store_n __atomic_store_n_simt +#define __atomic_store __atomic_store_simt +#define __atomic_exchange_n __atomic_exchange_n_simt +#define __atomic_exchange __atomic_exchange_simt +#define __atomic_compare_exchange_n __atomic_compare_exchange_n_simt +#define __atomic_compare_exchange __atomic_compare_exchange_simt +#define __atomic_fetch_add __atomic_fetch_add_simt +#define __atomic_fetch_sub __atomic_fetch_sub_simt +#define __atomic_fetch_and __atomic_fetch_and_simt +#define __atomic_fetch_xor __atomic_fetch_xor_simt +#define __atomic_fetch_or __atomic_fetch_or_simt +#define __atomic_test_and_set __atomic_test_and_set_simt +#define __atomic_clear __atomic_clear_simt +#define __atomic_always_lock_free __atomic_always_lock_free_simt +#define __atomic_is_lock_free __atomic_is_lock_free_simt +#define __atomic_thread_fence __atomic_thread_fence_simt +#define __atomic_signal_fence __atomic_signal_fence_simt + +#define KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED + +#endif //__CUDA_ARCH__ && KOKKOS_ENABLE_CUDA_ASM_ATOMICS +#endif // KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp new file mode 100644 index 0000000000..bedb147227 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp @@ -0,0 +1,68 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifdef KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED + +#undef __atomic_load_n +#undef __atomic_load +#undef __atomic_store_n +#undef __atomic_store +#undef __atomic_exchange_n +#undef __atomic_exchange +#undef __atomic_compare_exchange_n +#undef __atomic_compare_exchange +#undef __atomic_fetch_add +#undef __atomic_fetch_sub +#undef __atomic_fetch_and +#undef __atomic_fetch_xor +#undef __atomic_fetch_or +#undef __atomic_test_and_set +#undef __atomic_clear +#undef __atomic_always_lock_free +#undef __atomic_is_lock_free +#undef __atomic_thread_fence +#undef __atomic_signal_fence + +#undef KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED + +#endif // KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp similarity index 69% rename from lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp rename to lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp index 145d93ed76..932bde2b37 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp @@ -58,7 +58,68 @@ struct CudaGetMaxBlockSize; template int cuda_get_max_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { - return CudaGetMaxBlockSize::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread); + return CudaGetMaxBlockSize::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread); +} + +template +int cuda_get_max_block_size(const CudaInternal* cuda_instance, const cudaFuncAttributes& attr, const FunctorType& f, const size_t vector_length, + const size_t shmem_block, const size_t shmem_thread) { + + const int min_blocks_per_sm = LaunchBounds::minBperSM == 0 ? + 1 : LaunchBounds::minBperSM ; + const int max_threads_per_block = LaunchBounds::maxTperB == 0 ? + cuda_instance->m_maxThreadsPerBlock : LaunchBounds::maxTperB ; + + const int regs_per_thread = attr.numRegs; + const int regs_per_sm = cuda_instance->m_regsPerSM; + const int shmem_per_sm = cuda_instance->m_shmemPerSM; + const int max_shmem_per_block = cuda_instance->m_maxShmemPerBlock; + const int max_blocks_per_sm = cuda_instance->m_maxBlocksPerSM; + const int max_threads_per_sm = cuda_instance->m_maxThreadsPerSM; + + int block_size = std::min(attr.maxThreadsPerBlock,max_threads_per_block); + + int functor_shmem = FunctorTeamShmemSize< FunctorType >::value( f , block_size/vector_length ); + int total_shmem = shmem_block + shmem_thread*(block_size/vector_length) + functor_shmem + attr.sharedSizeBytes; + int max_blocks_regs = regs_per_sm/(regs_per_thread*block_size); + int max_blocks_shmem = (total_shmem0?shmem_per_sm/total_shmem:max_blocks_regs):0; + int blocks_per_sm = std::min(max_blocks_regs,max_blocks_shmem); + int threads_per_sm = blocks_per_sm * block_size; + if(threads_per_sm > max_threads_per_sm) { + blocks_per_sm = max_threads_per_sm/block_size; + threads_per_sm = blocks_per_sm * block_size; + } + int opt_block_size = (blocks_per_sm>=min_blocks_per_sm) ? block_size : 0; + int opt_threads_per_sm = threads_per_sm; + //printf("BlockSizeMax: %i Shmem: %i %i %i %i Regs: %i %i Blocks: %i %i Achieved: %i %i Opt: %i %i\n",block_size, + // shmem_per_sm,max_shmem_per_block,functor_shmem,total_shmem, + // regs_per_sm,regs_per_thread,max_blocks_shmem,max_blocks_regs,blocks_per_sm,threads_per_sm,opt_block_size,opt_threads_per_sm); + block_size-=32; + while ((blocks_per_sm==0) && (block_size>=32)) { + functor_shmem = FunctorTeamShmemSize< FunctorType >::value( f , block_size/vector_length ); + total_shmem = shmem_block + shmem_thread*(block_size/vector_length) + functor_shmem + attr.sharedSizeBytes; + max_blocks_regs = regs_per_sm/(regs_per_thread*block_size); + max_blocks_shmem = (total_shmem0?shmem_per_sm/total_shmem:max_blocks_regs):0; + blocks_per_sm = std::min(max_blocks_regs,max_blocks_shmem); + threads_per_sm = blocks_per_sm * block_size; + if(threads_per_sm > max_threads_per_sm) { + blocks_per_sm = max_threads_per_sm/block_size; + threads_per_sm = blocks_per_sm * block_size; + } + if((blocks_per_sm >= min_blocks_per_sm) && (blocks_per_sm <= max_blocks_per_sm)) { + if(threads_per_sm>=opt_threads_per_sm) { + opt_block_size = block_size; + opt_threads_per_sm = threads_per_sm; + } + } + //printf("BlockSizeMax: %i Shmem: %i %i %i %i Regs: %i %i Blocks: %i %i Achieved: %i %i Opt: %i %i\n",block_size, + // shmem_per_sm,max_shmem_per_block,functor_shmem,total_shmem, + // regs_per_sm,regs_per_thread,max_blocks_shmem,max_blocks_regs,blocks_per_sm,threads_per_sm,opt_block_size,opt_threads_per_sm); + block_size-=32; + } + return opt_block_size; } @@ -241,11 +302,71 @@ struct CudaGetOptBlockSize; template int cuda_get_opt_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { - return CudaGetOptBlockSize::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread); + return CudaGetOptBlockSize::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread); +} + +template +int cuda_get_opt_block_size(const CudaInternal* cuda_instance, const cudaFuncAttributes& attr, const FunctorType& f, const size_t vector_length, + const size_t shmem_block, const size_t shmem_thread) { + + const int min_blocks_per_sm = LaunchBounds::minBperSM == 0 ? + 1 : LaunchBounds::minBperSM ; + const int max_threads_per_block = LaunchBounds::maxTperB == 0 ? + cuda_instance->m_maxThreadsPerBlock : LaunchBounds::maxTperB ; + + const int regs_per_thread = attr.numRegs; + const int regs_per_sm = cuda_instance->m_regsPerSM; + const int shmem_per_sm = cuda_instance->m_shmemPerSM; + const int max_shmem_per_block = cuda_instance->m_maxShmemPerBlock; + const int max_blocks_per_sm = cuda_instance->m_maxBlocksPerSM; + const int max_threads_per_sm = cuda_instance->m_maxThreadsPerSM; + + int block_size = std::min(attr.maxThreadsPerBlock,max_threads_per_block); + + int functor_shmem = FunctorTeamShmemSize< FunctorType >::value( f , block_size/vector_length ); + int total_shmem = shmem_block + shmem_thread*(block_size/vector_length) + functor_shmem + attr.sharedSizeBytes; + int max_blocks_regs = regs_per_sm/(regs_per_thread*block_size); + int max_blocks_shmem = (total_shmem0?shmem_per_sm/total_shmem:max_blocks_regs):0; + int blocks_per_sm = std::min(max_blocks_regs,max_blocks_shmem); + int threads_per_sm = blocks_per_sm * block_size; + if(threads_per_sm > max_threads_per_sm) { + blocks_per_sm = max_threads_per_sm/block_size; + threads_per_sm = blocks_per_sm * block_size; + } + int opt_block_size = (blocks_per_sm>=min_blocks_per_sm) ? block_size : 0; + int opt_threads_per_sm = threads_per_sm; + + block_size-=32; + while ((block_size>=32)) { + functor_shmem = FunctorTeamShmemSize< FunctorType >::value( f , block_size/vector_length ); + total_shmem = shmem_block + shmem_thread*(block_size/vector_length) + functor_shmem + attr.sharedSizeBytes; + max_blocks_regs = regs_per_sm/(regs_per_thread*block_size); + max_blocks_shmem = (total_shmem0?shmem_per_sm/total_shmem:max_blocks_regs):0; + blocks_per_sm = std::min(max_blocks_regs,max_blocks_shmem); + threads_per_sm = blocks_per_sm * block_size; + if(threads_per_sm > max_threads_per_sm) { + blocks_per_sm = max_threads_per_sm/block_size; + threads_per_sm = blocks_per_sm * block_size; + } + if((blocks_per_sm >= min_blocks_per_sm) && (blocks_per_sm <= max_blocks_per_sm)) { + if(threads_per_sm>=opt_threads_per_sm) { + opt_block_size = block_size; + opt_threads_per_sm = threads_per_sm; + } + } + block_size-=32; + } + return opt_block_size; } template -struct CudaGetOptBlockSize,true> { +struct CudaGetOptBlockSize,true> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int blockSize=16; @@ -275,7 +396,7 @@ struct CudaGetOptBlockSize,true> { }; template -struct CudaGetOptBlockSize,false> { +struct CudaGetOptBlockSize,false> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int blockSize=16; @@ -305,7 +426,7 @@ struct CudaGetOptBlockSize,false> { }; template -struct CudaGetOptBlockSize,true> { +struct CudaGetOptBlockSize,true> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int blockSize=16; diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp similarity index 86% rename from lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp rename to lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp index 4fd7a9c69e..0ca9e3c160 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -50,7 +50,8 @@ #include #include -#include +#include +#include #include #include #include @@ -217,78 +218,6 @@ const CudaInternalDevices & CudaInternalDevices::singleton() } -//---------------------------------------------------------------------------- - -class CudaInternal { -private: - - CudaInternal( const CudaInternal & ); - CudaInternal & operator = ( const CudaInternal & ); - - -public: - - typedef Cuda::size_type size_type ; - - int m_cudaDev ; - int m_cudaArch ; - unsigned m_multiProcCount ; - unsigned m_maxWarpCount ; - unsigned m_maxBlock ; - unsigned m_maxSharedWords ; - uint32_t m_maxConcurrency ; - size_type m_scratchSpaceCount ; - size_type m_scratchFlagsCount ; - size_type m_scratchUnifiedCount ; - size_type m_scratchUnifiedSupported ; - size_type m_streamCount ; - size_type * m_scratchSpace ; - size_type * m_scratchFlags ; - size_type * m_scratchUnified ; - uint32_t * m_scratchConcurrentBitset ; - cudaStream_t * m_stream ; - - static int was_initialized; - static int was_finalized; - - static CudaInternal & singleton(); - - int verify_is_initialized( const char * const label ) const ; - - int is_initialized() const - { return 0 != m_scratchSpace && 0 != m_scratchFlags ; } - - void initialize( int cuda_device_id , int stream_count ); - void finalize(); - - void print_configuration( std::ostream & ) const ; - - ~CudaInternal(); - - CudaInternal() - : m_cudaDev( -1 ) - , m_cudaArch( -1 ) - , m_multiProcCount( 0 ) - , m_maxWarpCount( 0 ) - , m_maxBlock( 0 ) - , m_maxSharedWords( 0 ) - , m_maxConcurrency( 0 ) - , m_scratchSpaceCount( 0 ) - , m_scratchFlagsCount( 0 ) - , m_scratchUnifiedCount( 0 ) - , m_scratchUnifiedSupported( 0 ) - , m_streamCount( 0 ) - , m_scratchSpace( 0 ) - , m_scratchFlags( 0 ) - , m_scratchUnified( 0 ) - , m_scratchConcurrentBitset( 0 ) - , m_stream( 0 ) - {} - - size_type * scratch_space( const size_type size ); - size_type * scratch_flags( const size_type size ); - size_type * scratch_unified( const size_type size ); -}; int CudaInternal::was_initialized = 0; int CudaInternal::was_finalized = 0; @@ -366,8 +295,11 @@ CudaInternal & CudaInternal::singleton() static CudaInternal self ; return self ; } +void CudaInternal::fence() const { + cudaStreamSynchronize(m_stream); +} -void CudaInternal::initialize( int cuda_device_id , int stream_count ) +void CudaInternal::initialize( int cuda_device_id , cudaStream_t stream ) { if ( was_finalized ) Kokkos::abort("Calling Cuda::initialize after Cuda::finalize is illegal\n"); was_initialized = 1; @@ -454,6 +386,15 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) m_maxBlock = cudaProp.maxGridSize[0] ; + m_shmemPerSM = cudaProp.sharedMemPerMultiprocessor ; + m_maxShmemPerBlock = cudaProp.sharedMemPerBlock ; + m_regsPerSM = cudaProp.regsPerMultiprocessor ; + m_maxBlocksPerSM = m_cudaArch < 500 ? 16 : ( + m_cudaArch < 750 ? 32 : ( + m_cudaArch == 750 ? 16 : 32)); + m_maxThreadsPerSM = cudaProp.maxThreadsPerMultiProcessor ; + m_maxThreadsPerBlock = cudaProp.maxThreadsPerBlock ; + //---------------------------------- m_scratchUnifiedSupported = cudaProp.unifiedAddressing ; @@ -482,10 +423,9 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) // Concurrent bitset for obtaining unique tokens from within // an executing kernel. { - const unsigned max_threads_per_sm = 2048 ; // up to capability 7.0 m_maxConcurrency = - max_threads_per_sm * cudaProp.multiProcessorCount ; + m_maxThreadsPerSM * cudaProp.multiProcessorCount ; const int32_t buffer_bound = Kokkos::Impl::concurrent_bitset::buffer_bound( m_maxConcurrency ); @@ -507,11 +447,6 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) } //---------------------------------- - if ( stream_count ) { - m_stream = (cudaStream_t*) ::malloc( stream_count * sizeof(cudaStream_t) ); - m_streamCount = stream_count ; - for ( size_type i = 0 ; i < m_streamCount ; ++i ) m_stream[i] = 0 ; - } } else { @@ -539,7 +474,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) if( Kokkos::show_warnings() && !cuda_launch_blocking() ) { std::cerr << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl; std::cerr << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl; - std::cerr << " The code must call Cuda::fence() after each kernel" << std::endl; + std::cerr << " The code must call Cuda().fence() after each kernel" << std::endl; std::cerr << " or will likely crash when accessing data on the host." << std::endl; } @@ -568,7 +503,10 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) #endif // Init the array for used for arbitrarily sized atomics - Impl::initialize_host_cuda_lock_arrays(); + if(stream == 0) + Impl::initialize_host_cuda_lock_arrays(); + + m_stream = stream; } //---------------------------------------------------------------------------- @@ -578,7 +516,7 @@ enum { sizeScratchGrain = sizeof(ScratchGrain) }; Cuda::size_type * -CudaInternal::scratch_flags( const Cuda::size_type size ) +CudaInternal::scratch_flags( const Cuda::size_type size ) const { if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) { @@ -587,6 +525,9 @@ CudaInternal::scratch_flags( const Cuda::size_type size ) typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ; + if( m_scratchFlags ) + Record::decrement( Record::get_record( m_scratchFlags ) ); + Record * const r = Record::allocate( Kokkos::CudaSpace() , "InternalScratchFlags" , ( sizeof( ScratchGrain ) * m_scratchFlagsCount ) ); @@ -602,7 +543,7 @@ CudaInternal::scratch_flags( const Cuda::size_type size ) } Cuda::size_type * -CudaInternal::scratch_space( const Cuda::size_type size ) +CudaInternal::scratch_space( const Cuda::size_type size ) const { if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) { @@ -610,6 +551,9 @@ CudaInternal::scratch_space( const Cuda::size_type size ) typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ; + if( m_scratchSpace ) + Record::decrement( Record::get_record( m_scratchSpace ) ); + Record * const r = Record::allocate( Kokkos::CudaSpace() , "InternalScratchSpace" , ( sizeof( ScratchGrain ) * m_scratchSpaceCount ) ); @@ -623,7 +567,7 @@ CudaInternal::scratch_space( const Cuda::size_type size ) } Cuda::size_type * -CudaInternal::scratch_unified( const Cuda::size_type size ) +CudaInternal::scratch_unified( const Cuda::size_type size ) const { if ( verify_is_initialized("scratch_unified") && m_scratchUnifiedSupported && m_scratchUnifiedCount * sizeScratchGrain < size ) { @@ -632,6 +576,9 @@ CudaInternal::scratch_unified( const Cuda::size_type size ) typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > Record ; + if( m_scratchUnified ) + Record::decrement( Record::get_record( m_scratchUnified ) ); + Record * const r = Record::allocate( Kokkos::CudaHostPinnedSpace() , "InternalScratchUnified" , ( sizeof( ScratchGrain ) * m_scratchUnifiedCount ) ); @@ -644,6 +591,31 @@ CudaInternal::scratch_unified( const Cuda::size_type size ) return m_scratchUnified ; } +Cuda::size_type * +CudaInternal::scratch_functor( const Cuda::size_type size ) const +{ + if ( verify_is_initialized("scratch_functor") && + m_scratchFunctorSize < size ) { + + m_scratchFunctorSize = size ; + + typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ; + + if( m_scratchFunctor ) + Record::decrement( Record::get_record( m_scratchFunctor ) ); + + Record * const r = Record::allocate( Kokkos::CudaSpace() + , "InternalScratchFunctor" + , m_scratchFunctorSize ); + + Record::increment( r ); + + m_scratchFunctor = reinterpret_cast( r->data() ); + } + + return m_scratchFunctor ; +} + //---------------------------------------------------------------------------- void CudaInternal::finalize() @@ -653,13 +625,7 @@ void CudaInternal::finalize() Impl::finalize_host_cuda_lock_arrays(); - if ( m_stream ) { - for ( size_type i = 1 ; i < m_streamCount ; ++i ) { - cudaStreamDestroy( m_stream[i] ); - m_stream[i] = 0 ; - } - ::free( m_stream ); - } + if(m_stream!=0) cudaStreamDestroy(m_stream); typedef Kokkos::Impl::SharedAllocationRecord< CudaSpace > RecordCuda ; typedef Kokkos::Impl::SharedAllocationRecord< CudaHostPinnedSpace > RecordHost ; @@ -668,6 +634,8 @@ void CudaInternal::finalize() RecordCuda::decrement( RecordCuda::get_record( m_scratchSpace ) ); RecordHost::decrement( RecordHost::get_record( m_scratchUnified ) ); RecordCuda::decrement( RecordCuda::get_record( m_scratchConcurrentBitset ) ); + if(m_scratchFunctorSize>0) + RecordCuda::decrement( RecordCuda::get_record( m_scratchFunctor ) ); m_cudaDev = -1 ; m_multiProcCount = 0 ; @@ -713,14 +681,14 @@ Cuda::size_type cuda_internal_maximum_grid_count() Cuda::size_type cuda_internal_maximum_shared_words() { return CudaInternal::singleton().m_maxSharedWords ; } -Cuda::size_type * cuda_internal_scratch_space( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_space( size ); } +Cuda::size_type * cuda_internal_scratch_space( const Cuda& instance, const Cuda::size_type size ) +{ return instance.impl_internal_space_instance()->scratch_space( size ); } -Cuda::size_type * cuda_internal_scratch_flags( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_flags( size ); } +Cuda::size_type * cuda_internal_scratch_flags( const Cuda& instance, const Cuda::size_type size ) +{ return instance.impl_internal_space_instance()->scratch_flags( size ); } -Cuda::size_type * cuda_internal_scratch_unified( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_unified( size ); } +Cuda::size_type * cuda_internal_scratch_unified( const Cuda& instance, const Cuda::size_type size ) +{ return instance.impl_internal_space_instance()->scratch_unified( size ); } } // namespace Impl @@ -749,7 +717,7 @@ void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances ) void Cuda::impl_initialize( const Cuda::SelectDevice config , size_t num_instances ) #endif { - Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); + Impl::CudaInternal::singleton().initialize( config.cuda_device_id , 0 ); #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); @@ -800,19 +768,17 @@ void Cuda::impl_finalize() } Cuda::Cuda() - : m_device( Impl::CudaInternal::singleton().m_cudaDev ) - , m_stream( 0 ) + : m_space_instance( &Impl::CudaInternal::singleton() ) { Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" ); } -Cuda::Cuda( const int instance_id ) - : m_device( Impl::CudaInternal::singleton().m_cudaDev ) - , m_stream( - Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" ) - ? Impl::CudaInternal::singleton().m_stream[ instance_id % Impl::CudaInternal::singleton().m_streamCount ] - : 0 ) -{} +Cuda::Cuda(cudaStream_t stream) + : m_space_instance(new Impl::CudaInternal) +{ + Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" ); + m_space_instance->initialize(Impl::CudaInternal::singleton().m_cudaDev,stream); +} void Cuda::print_configuration( std::ostream & s , const bool ) { Impl::CudaInternal::singleton().print_configuration( s ); } @@ -823,13 +789,27 @@ bool Cuda::sleep() { return false ; } bool Cuda::wake() { return true ; } #endif -void Cuda::fence() +void Cuda::impl_static_fence() { Kokkos::Impl::cuda_device_synchronize(); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE +void Cuda::fence() { + impl_static_fence(); +} +#else +void Cuda::fence() const { + m_space_instance->fence(); +} +#endif + const char* Cuda::name() { return "Cuda"; } +cudaStream_t Cuda::cuda_stream() const { return m_space_instance->m_stream ; } +int Cuda::cuda_device() const { return m_space_instance->m_cudaDev ; } + + } // namespace Kokkos namespace Kokkos { diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp new file mode 100644 index 0000000000..f9e333fcf0 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp @@ -0,0 +1,156 @@ +#ifndef KOKKOS_CUDA_INSTANCE_HPP_ +#define KOKKOS_CUDA_INSTANCE_HPP_ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct CudaTraits { + enum { WarpSize = 32 /* 0x0020 */ }; + enum { WarpIndexMask = 0x001f /* Mask for warpindex */ }; + enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ }; + + enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ }; + enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ }; + enum { KernelArgumentLimit = 0x001000 /* 4k bytes */ }; + + typedef unsigned long + ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ]; + +#if defined(KOKKOS_ARCH_VOLTA) || \ + defined(KOKKOS_ARCH_PASCAL) + enum { ConstantMemoryUseThreshold = 0x000200 /* 0 bytes -> always use constant (or global)*/ }; +#else + enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ }; +#endif + + KOKKOS_INLINE_FUNCTION static + CudaSpace::size_type warp_count( CudaSpace::size_type i ) + { return ( i + WarpIndexMask ) >> WarpIndexShift ; } + + KOKKOS_INLINE_FUNCTION static + CudaSpace::size_type warp_align( CudaSpace::size_type i ) + { + enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) }; + return ( i + WarpIndexMask ) & Mask ; + } +}; + +//---------------------------------------------------------------------------- + +CudaSpace::size_type cuda_internal_multiprocessor_count(); +CudaSpace::size_type cuda_internal_maximum_warp_count(); +CudaSpace::size_type cuda_internal_maximum_grid_count(); +CudaSpace::size_type cuda_internal_maximum_shared_words(); + +CudaSpace::size_type cuda_internal_maximum_concurrent_block_count(); + +CudaSpace::size_type * cuda_internal_scratch_flags( const Cuda&, const CudaSpace::size_type size ); +CudaSpace::size_type * cuda_internal_scratch_space( const Cuda&, const CudaSpace::size_type size ); +CudaSpace::size_type * cuda_internal_scratch_unified( const Cuda&, const CudaSpace::size_type size ); + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +namespace Kokkos { +namespace Impl { + +class CudaInternal { +private: + + CudaInternal( const CudaInternal & ); + CudaInternal & operator = ( const CudaInternal & ); + + +public: + + typedef Cuda::size_type size_type ; + + int m_cudaDev ; + + // Device Properties + int m_cudaArch ; + unsigned m_multiProcCount ; + unsigned m_maxWarpCount ; + unsigned m_maxBlock ; + unsigned m_maxSharedWords ; + uint32_t m_maxConcurrency ; + int m_shmemPerSM ; + int m_maxShmemPerBlock ; + int m_regsPerSM ; + int m_maxBlocksPerSM ; + int m_maxThreadsPerSM ; + int m_maxThreadsPerBlock ; + + mutable size_type m_scratchSpaceCount ; + mutable size_type m_scratchFlagsCount ; + mutable size_type m_scratchUnifiedCount ; + mutable size_type m_scratchFunctorSize ; + size_type m_scratchUnifiedSupported ; + size_type m_streamCount ; + mutable size_type * m_scratchSpace ; + mutable size_type * m_scratchFlags ; + mutable size_type * m_scratchUnified ; + mutable size_type * m_scratchFunctor ; + uint32_t * m_scratchConcurrentBitset ; + cudaStream_t m_stream ; + + static int was_initialized; + static int was_finalized; + + static CudaInternal & singleton(); + + int verify_is_initialized( const char * const label ) const ; + + int is_initialized() const + { return 0 != m_scratchSpace && 0 != m_scratchFlags ; } + + void initialize( int cuda_device_id , cudaStream_t stream = 0 ); + void finalize(); + + void print_configuration( std::ostream & ) const ; + + void fence() const ; + + ~CudaInternal(); + + CudaInternal() + : m_cudaDev( -1 ) + , m_cudaArch( -1 ) + , m_multiProcCount( 0 ) + , m_maxWarpCount( 0 ) + , m_maxBlock( 0 ) + , m_maxSharedWords( 0 ) + , m_maxConcurrency( 0 ) + , m_shmemPerSM( 0 ) + , m_maxShmemPerBlock( 0 ) + , m_regsPerSM( 0 ) + , m_maxBlocksPerSM( 0 ) + , m_maxThreadsPerSM( 0 ) + , m_maxThreadsPerBlock( 0 ) + , m_scratchSpaceCount( 0 ) + , m_scratchFlagsCount( 0 ) + , m_scratchUnifiedCount( 0 ) + , m_scratchFunctorSize( 0 ) + , m_scratchUnifiedSupported( 0 ) + , m_streamCount( 0 ) + , m_scratchSpace( 0 ) + , m_scratchFlags( 0 ) + , m_scratchUnified( 0 ) + , m_scratchFunctor( 0 ) + , m_scratchConcurrentBitset( 0 ) + , m_stream( 0 ) + {} + + size_type * scratch_space( const size_type size ) const ; + size_type * scratch_flags( const size_type size ) const ; + size_type * scratch_unified( const size_type size ) const ; + size_type * scratch_functor( const size_type size ) const ; +}; + +} // Namespace Impl +} // Namespace Kokkos +#endif diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp new file mode 100644 index 0000000000..2ec868c1f1 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp @@ -0,0 +1,579 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDAEXEC_HPP +#define KOKKOS_CUDAEXEC_HPP + +#include +#ifdef KOKKOS_ENABLE_CUDA + +#include +#include +#include +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if defined( __CUDACC__ ) + +/** \brief Access to constant memory on the device */ +#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE + +__device__ __constant__ +extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ; + +#else + +__device__ __constant__ +unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ; + +#endif + +namespace Kokkos { +namespace Impl { + void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink = false); +} +} + +template< typename T > +inline +__device__ +T * kokkos_impl_cuda_shared_memory() +{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; } + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +// See section B.17 of Cuda C Programming Guide Version 3.2 +// for discussion of +// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) +// function qualifier which could be used to improve performance. +//---------------------------------------------------------------------------- +// Maximize L1 cache and minimize shared memory: +// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 ); +// For 2.0 capability: 48 KB L1 and 16 KB shared +//---------------------------------------------------------------------------- + +template< class DriverType> +__global__ +static void cuda_parallel_launch_constant_memory() +{ + const DriverType & driver = + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > +__global__ +__launch_bounds__(maxTperB, minBperSM) +static void cuda_parallel_launch_constant_memory() +{ + const DriverType & driver = + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType> +__global__ +static void cuda_parallel_launch_local_memory( const DriverType driver ) +{ + driver(); +} + +template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > +__global__ +__launch_bounds__(maxTperB, minBperSM) +static void cuda_parallel_launch_local_memory( const DriverType driver ) +{ + driver(); +} + +template< class DriverType> +__global__ +static void cuda_parallel_launch_global_memory( const DriverType* driver ) +{ + driver->operator()(); +} + +template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > +__global__ +__launch_bounds__(maxTperB, minBperSM) +static void cuda_parallel_launch_global_memory( const DriverType* driver ) +{ + driver->operator()(); +} + +template< class DriverType> +__global__ +static void cuda_parallel_launch_constant_or_global_memory( const DriverType* driver_ptr ) +{ + const DriverType & driver = driver_ptr!=NULL ? *driver_ptr : + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > +__global__ +__launch_bounds__(maxTperB, minBperSM) +static void cuda_parallel_launch_constant_or_global_memory( const DriverType* driver_ptr ) +{ + const DriverType & driver = driver_ptr!=NULL ? *driver_ptr : + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType > +struct DeduceCudaLaunchMechanism { + constexpr static const Kokkos::Experimental::WorkItemProperty::HintLightWeight_t light_weight = Kokkos::Experimental::WorkItemProperty::HintLightWeight; + constexpr static const Kokkos::Experimental::WorkItemProperty::HintHeavyWeight_t heavy_weight = Kokkos::Experimental::WorkItemProperty::HintHeavyWeight ; + constexpr static const typename DriverType::Policy::work_item_property property = typename DriverType::Policy::work_item_property(); + + static constexpr const Experimental::CudaLaunchMechanism valid_launch_mechanism = + // BuildValidMask + (sizeof(DriverType) + , Experimental::CudaLaunchMechanism LaunchMechanism = + DeduceCudaLaunchMechanism::launch_mechanism > +struct CudaParallelLaunch ; + +template < class DriverType + , unsigned int MaxThreadsPerBlock + , unsigned int MinBlocksPerSM> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds< MaxThreadsPerBlock + , MinBlocksPerSM > + , Experimental::CudaLaunchMechanism::ConstantMemory> +{ + static_assert(sizeof(DriverType)m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_constant_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + // Copy functor to constant memory on the device + cudaMemcpyToSymbolAsync( + kokkos_impl_cuda_constant_memory_buffer, &driver, sizeof(DriverType), 0, cudaMemcpyHostToDevice, cudaStream_t(cuda_instance->m_stream)); + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + // Invoke the driver function on the device + cuda_parallel_launch_constant_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + <<< grid , block , shmem , cuda_instance->m_stream >>>(); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_constant_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM >); + return attr; + } +}; + +template < class DriverType> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds<0,0> + , Experimental::CudaLaunchMechanism::ConstantMemory > +{ + static_assert(sizeof(DriverType)m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_constant_memory< DriverType > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + // Copy functor to constant memory on the device + cudaMemcpyToSymbolAsync( + kokkos_impl_cuda_constant_memory_buffer, &driver, sizeof(DriverType), 0, cudaMemcpyHostToDevice, cudaStream_t(cuda_instance->m_stream)); + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + // Invoke the driver function on the device + cuda_parallel_launch_constant_memory< DriverType > + <<< grid , block , shmem , cuda_instance->m_stream >>>(); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_constant_memory + < DriverType >); + return attr; + } +}; + +template < class DriverType + , unsigned int MaxThreadsPerBlock + , unsigned int MinBlocksPerSM > +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds< MaxThreadsPerBlock + , MinBlocksPerSM > + , Experimental::CudaLaunchMechanism::LocalMemory > +{ + static_assert(sizeof(DriverType)m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_local_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + // Invoke the driver function on the device + cuda_parallel_launch_local_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + <<< grid , block , shmem , cuda_instance->m_stream >>>( driver ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_local_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM >); + return attr; + } +}; + +template < class DriverType> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds<0,0> + , Experimental::CudaLaunchMechanism::LocalMemory > +{ + static_assert(sizeof(DriverType)m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_local_memory< DriverType > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + // Invoke the driver function on the device + cuda_parallel_launch_local_memory< DriverType > + <<< grid , block , shmem , cuda_instance->m_stream >>>( driver ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_local_memory + < DriverType >); + return attr; + } +}; + +template < class DriverType + , unsigned int MaxThreadsPerBlock + , unsigned int MinBlocksPerSM> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds< MaxThreadsPerBlock + , MinBlocksPerSM> + , Experimental::CudaLaunchMechanism::GlobalMemory > +{ + inline + CudaParallelLaunch( const DriverType & driver + , const dim3 & grid + , const dim3 & block + , const int shmem + , CudaInternal* cuda_instance + , const bool prefer_shmem ) + { + if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { + + if ( cuda_instance->m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_global_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + DriverType* driver_ptr = NULL; + driver_ptr = reinterpret_cast(cuda_instance->scratch_functor(sizeof(DriverType))); + cudaMemcpyAsync(driver_ptr,&driver, sizeof(DriverType), cudaMemcpyDefault, cuda_instance->m_stream); + + // Invoke the driver function on the device + cuda_parallel_launch_global_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + <<< grid , block , shmem , cuda_instance->m_stream >>>( driver_ptr ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_global_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM >); + return attr; + } + +}; + +template < class DriverType> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds<0,0> + , Experimental::CudaLaunchMechanism::GlobalMemory > +{ + inline + CudaParallelLaunch( const DriverType & driver + , const dim3 & grid + , const dim3 & block + , const int shmem + , CudaInternal* cuda_instance + , const bool prefer_shmem) + { + if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { + + if ( cuda_instance->m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_global_memory< DriverType > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + DriverType* driver_ptr = NULL; + driver_ptr = reinterpret_cast(cuda_instance->scratch_functor(sizeof(DriverType))); + cudaMemcpyAsync(driver_ptr,&driver, sizeof(DriverType), cudaMemcpyDefault, cuda_instance->m_stream); + + cuda_parallel_launch_global_memory< DriverType > + <<< grid , block , shmem , cuda_instance->m_stream >>>( driver_ptr ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_global_memory + < DriverType >); + return attr; + } +}; +//---------------------------------------------------------------------------- + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined( __CUDACC__ ) */ +#endif /* defined( KOKKOS_ENABLE_CUDA ) */ +#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index 665d0732a7..c05fbcc6c1 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -55,9 +55,9 @@ #include #include -#include +#include #include -#include +#include #include #include #include @@ -73,6 +73,9 @@ //---------------------------------------------------------------------------- namespace Kokkos { + +extern bool show_warnings() noexcept; + namespace Impl { template< class ... Properties > @@ -85,10 +88,14 @@ public: typedef PolicyTraits traits; + template< class ExecSpace, class ... OtherProperties > + friend class TeamPolicyInternal; + private: enum { MAX_WARP = 8 }; + typename traits::execution_space m_space; int m_league_size ; int m_team_size ; int m_vector_length ; @@ -101,6 +108,19 @@ public: //! Execution space of this execution policy typedef Kokkos::Cuda execution_space ; + template + TeamPolicyInternal( const TeamPolicyInternal& p ) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + m_space = p.m_space; + } + TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { m_league_size = p.m_league_size; m_team_size = p.m_team_size; @@ -110,6 +130,7 @@ public: m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; m_chunk_size = p.m_chunk_size; + m_space = p.m_space; return *this; } @@ -117,7 +138,7 @@ public: #ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > - inline static + static inline int team_size_max( const FunctorType & functor ) { int n = MAX_WARP * Impl::CudaTraits::WarpSize ; @@ -128,7 +149,7 @@ public: /* for team reduce */ + ( n + 2 ) * sizeof(double) /* for team shared */ + Impl::FunctorTeamShmemSize< FunctorType >::value( functor , n ); - if ( shmem_size < Impl::CudaTraits::SharedMemoryCapacity ) break ; + if ( shmem_size < typename traits::execution_space().impl_internal_space_instance()->m_maxShmemPerBlock ) break ; } return n ; @@ -138,7 +159,10 @@ public: template int team_size_max( const FunctorType& f, const ParallelForTag& ) const { typedef Impl::ParallelFor< FunctorType , TeamPolicy > closure_type; - int block_size = Kokkos::Impl::cuda_get_max_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + cudaFuncAttributes attr = CudaParallelLaunch< closure_type, typename traits::launch_bounds >:: + get_cuda_func_attributes(); + int block_size = Kokkos::Impl::cuda_get_max_block_size< FunctorType, typename traits::launch_bounds >( + space().impl_internal_space_instance(),attr,f ,(size_t) vector_length(), (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) ); return block_size/vector_length(); } @@ -150,7 +174,10 @@ public: typedef Impl::ParallelReduce< FunctorType , TeamPolicy, reducer_type > closure_type; typedef Impl::FunctorValueTraits< FunctorType , typename traits::work_tag > functor_value_traits; - int block_size = Kokkos::Impl::cuda_get_max_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + cudaFuncAttributes attr = CudaParallelLaunch< closure_type, typename traits::launch_bounds >:: + get_cuda_func_attributes(); + int block_size = Kokkos::Impl::cuda_get_max_block_size< FunctorType, typename traits::launch_bounds >( + space().impl_internal_space_instance(),attr,f ,(size_t) vector_length(), (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) + ((functor_value_traits::StaticValueSize!=0)?0:functor_value_traits::value_size( f ))); @@ -178,7 +205,11 @@ public: template int team_size_recommended( const FunctorType& f, const ParallelForTag& ) const { typedef Impl::ParallelFor< FunctorType , TeamPolicy > closure_type; - int block_size = Kokkos::Impl::cuda_get_opt_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + cudaFuncAttributes attr = CudaParallelLaunch< closure_type, typename traits::launch_bounds >:: + get_cuda_func_attributes(); + const int block_size = Kokkos::Impl::cuda_get_opt_block_size< FunctorType, typename traits::launch_bounds>( + space().impl_internal_space_instance(), + attr, f , (size_t) vector_length(), (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double)); return block_size/vector_length(); } @@ -190,10 +221,18 @@ public: typedef Impl::ParallelReduce< FunctorType , TeamPolicy, reducer_type > closure_type; typedef Impl::FunctorValueTraits< FunctorType , typename traits::work_tag > functor_value_traits; - int block_size = Kokkos::Impl::cuda_get_opt_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + cudaFuncAttributes attr = CudaParallelLaunch< closure_type, typename traits::launch_bounds >:: + get_cuda_func_attributes(); + const int block_size = Kokkos::Impl::cuda_get_opt_block_size< FunctorType, typename traits::launch_bounds>( + space().impl_internal_space_instance(), + attr, f , (size_t) vector_length(), (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) + ((functor_value_traits::StaticValueSize!=0)?0:functor_value_traits::value_size( f ))); - return block_size/vector_length(); + // Currently we require Power-of-2 team size for reductions. + int p2 = 1; + while(p2<=block_size) p2*=2; + p2/=2; + return p2/vector_length(); } @@ -201,6 +240,25 @@ public: int vector_length_max() { return Impl::CudaTraits::WarpSize; } + inline static + int verify_requested_vector_length( int requested_vector_length ) { + int test_vector_length = std::min( requested_vector_length, vector_length_max() ); + + // Allow only power-of-two vector_length + if ( !(is_integral_power_of_two( test_vector_length ) ) ) { + int test_pow2 = 1; + for (int i = 0; i < 5; i++) { + test_pow2 = test_pow2 << 1; + if (test_pow2 > test_vector_length) { + break; + } + } + test_vector_length = test_pow2 >> 1; + } + + return test_vector_length; + } + inline static int scratch_size_max(int level) { return (level==0? @@ -224,9 +282,14 @@ public: return m_thread_scratch_size[level]; } + inline typename traits::execution_space space() const { + return m_space; + } + TeamPolicyInternal() - : m_league_size( 0 ) - , m_team_size( 0 ) + : m_space(typename traits::execution_space()) + , m_league_size( 0 ) + , m_team_size( -1 ) , m_vector_length( 0 ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} @@ -234,22 +297,18 @@ public: {} /** \brief Specify league size, request team size */ - TeamPolicyInternal( execution_space & + TeamPolicyInternal( const execution_space space_ , int league_size_ , int team_size_request , int vector_length_request = 1 ) - : m_league_size( league_size_ ) + : m_space( space_ ) + , m_league_size( league_size_ ) , m_team_size( team_size_request ) - , m_vector_length( vector_length_request ) + , m_vector_length( verify_requested_vector_length(vector_length_request) ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} , m_chunk_size ( 32 ) { - // Allow only power-of-two vector_length - if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { - Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); - } - // Make sure league size is permissable if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); @@ -261,22 +320,18 @@ public: } /** \brief Specify league size, request team size */ - TeamPolicyInternal( execution_space & + TeamPolicyInternal( const execution_space space_ , int league_size_ , const Kokkos::AUTO_t & /* team_size_request */ , int vector_length_request = 1 ) - : m_league_size( league_size_ ) + : m_space( space_ ) + , m_league_size( league_size_ ) , m_team_size( -1 ) - , m_vector_length( vector_length_request ) + , m_vector_length( verify_requested_vector_length(vector_length_request) ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} , m_chunk_size ( 32 ) { - // Allow only power-of-two vector_length - if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { - Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); - } - // Make sure league size is permissable if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); @@ -285,18 +340,14 @@ public: TeamPolicyInternal( int league_size_ , int team_size_request , int vector_length_request = 1 ) - : m_league_size( league_size_ ) + : m_space( typename traits::execution_space() ) + , m_league_size( league_size_ ) , m_team_size( team_size_request ) - , m_vector_length ( vector_length_request ) + , m_vector_length ( verify_requested_vector_length(vector_length_request) ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} , m_chunk_size ( 32 ) { - // Allow only power-of-two vector_length - if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { - Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); - } - // Make sure league size is permissable if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); @@ -310,18 +361,14 @@ public: TeamPolicyInternal( int league_size_ , const Kokkos::AUTO_t & /* team_size_request */ , int vector_length_request = 1 ) - : m_league_size( league_size_ ) + : m_space( typename traits::execution_space() ) + , m_league_size( league_size_ ) , m_team_size( -1 ) - , m_vector_length ( vector_length_request ) + , m_vector_length ( verify_requested_vector_length(vector_length_request) ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} , m_chunk_size ( 32 ) { - // Allow only power-of-two vector_length - if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { - Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); - } - // Make sure league size is permissable if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); @@ -431,9 +478,10 @@ class ParallelFor< FunctorType , Kokkos::Cuda > { +public: + typedef Kokkos::RangePolicy< Traits ... > Policy; private: - typedef Kokkos::RangePolicy< Traits ... > Policy; typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::launch_bounds LaunchBounds ; @@ -479,11 +527,17 @@ public: void execute() const { const typename Policy::index_type nwork = m_policy.end() - m_policy.begin(); - const int block_size = Kokkos::Impl::cuda_get_opt_block_size< ParallelFor, LaunchBounds>( m_functor , 1, 0 , 0 ); - const dim3 block( 1 , block_size , 1); - const dim3 grid( std::min( typename Policy::index_type(( nwork + block.y - 1 ) / block.y) , typename Policy::index_type(cuda_internal_maximum_grid_count()) ) , 1 , 1); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + cudaFuncAttributes attr = CudaParallelLaunch< ParallelFor, LaunchBounds >:: + get_cuda_func_attributes(); + const int block_size = Kokkos::Impl::cuda_get_opt_block_size< FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), + attr, m_functor , 1, 0 , 0 ); + const dim3 block( 1 , block_size , 1); + const dim3 grid( std::min( typename Policy::index_type(( nwork + block.y - 1 ) / block.y) , + typename Policy::index_type(cuda_internal_maximum_grid_count()) ) , 1 , 1); + + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_policy.space().impl_internal_space_instance() , false ); } ParallelFor( const FunctorType & arg_functor , @@ -491,6 +545,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) { } + }; @@ -501,8 +556,9 @@ class ParallelFor< FunctorType , Kokkos::Cuda > { -private: +public: typedef Kokkos::MDRangePolicy< Traits ... > Policy ; +private: using RP = Policy; typedef typename Policy::array_index_type array_index_type; typedef typename Policy::index_type index_type; @@ -526,7 +582,7 @@ public: void execute() const { if(m_rp.m_num_tiles==0) return; - const array_index_type maxblocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount); + const array_index_type maxblocks = static_cast(m_rp.space().impl_internal_space_instance()->m_maxBlock); if ( RP::rank == 2 ) { const dim3 block( m_rp.m_tile[0] , m_rp.m_tile[1] , 1); @@ -535,7 +591,7 @@ public: , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks ) , 1 ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else if ( RP::rank == 3 ) { @@ -545,7 +601,7 @@ public: , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks ) , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1 ) / block.z , maxblocks ) ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else if ( RP::rank == 4 ) { @@ -557,7 +613,7 @@ public: , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1 ) / block.y , maxblocks ) , std::min( ( m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1 ) / block.z , maxblocks ) ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else if ( RP::rank == 5 ) { @@ -570,7 +626,7 @@ public: , static_cast(maxblocks) ) , std::min( ( m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1 ) / block.z , maxblocks ) ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else if ( RP::rank == 6 ) { @@ -584,7 +640,7 @@ public: , std::min( static_cast( m_rp.m_tile_end[4] * m_rp.m_tile_end[5] ) , static_cast(maxblocks) ) ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else { @@ -609,9 +665,10 @@ class ParallelFor< FunctorType , Kokkos::Cuda > { +public: + typedef TeamPolicyInternal< Kokkos::Cuda , Properties ... > Policy ; private: - typedef TeamPolicyInternal< Kokkos::Cuda , Properties ... > Policy ; typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::launch_bounds LaunchBounds ; @@ -631,13 +688,14 @@ private: // const FunctorType m_functor ; + const Policy m_policy ; const size_type m_league_size ; - const size_type m_team_size ; + int m_team_size ; const size_type m_vector_size ; - const int m_shmem_begin ; - const int m_shmem_size ; + int m_shmem_begin ; + int m_shmem_size ; void* m_scratch_ptr[2] ; - const int m_scratch_size[2] ; + int m_scratch_size[2] ; template< class TagType > __device__ inline @@ -705,7 +763,7 @@ public: const dim3 grid( int(m_league_size) , 1 , 1 ); const dim3 block( int(m_vector_size) , int(m_team_size) , 1 ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this, grid, block, shmem_size_total ); // copy to device and execute + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this, grid, block, shmem_size_total, m_policy.space().impl_internal_space_instance() , true ); // copy to device and execute } @@ -713,26 +771,37 @@ public: , const Policy & arg_policy ) : m_functor( arg_functor ) + , m_policy( arg_policy ) , m_league_size( arg_policy.league_size() ) - , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelFor, LaunchBounds >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) + , m_team_size( arg_policy.team_size() ) , m_vector_size( arg_policy.vector_length() ) - , m_shmem_begin( sizeof(double) * ( m_team_size + 2 ) ) - , m_shmem_size( arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( m_functor , m_team_size ) ) - , m_scratch_ptr{NULL,NULL} - , m_scratch_size{arg_policy.scratch_size(0,m_team_size),arg_policy.scratch_size(1,m_team_size)} { + cudaFuncAttributes attr = CudaParallelLaunch< ParallelFor, LaunchBounds >:: + get_cuda_func_attributes(); + m_team_size = m_team_size>=0?m_team_size:Kokkos::Impl::cuda_get_opt_block_size< FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), + attr, m_functor , m_vector_size, + m_policy.team_scratch_size(0), m_policy.thread_scratch_size(0) )/m_vector_size; + + m_shmem_begin = ( sizeof(double) * ( m_team_size + 2 ) ); + m_shmem_size = ( m_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( m_functor , m_team_size ) ); + m_scratch_size[0] = m_policy.scratch_size(0,m_team_size); + m_scratch_size[1] = m_policy.scratch_size(1,m_team_size); + // Functor's reduce memory, team scan memory, and team shared memory depend upon team size. - m_scratch_ptr[1] = cuda_resize_scratch_space(m_scratch_size[1]*(Cuda::concurrency()/(m_team_size*m_vector_size))); + m_scratch_ptr[0] = NULL; + m_scratch_ptr[1] = m_team_size<=0?NULL:cuda_resize_scratch_space(static_cast(m_scratch_size[1])*static_cast(Cuda::concurrency()/(m_team_size*m_vector_size))); const int shmem_size_total = m_shmem_begin + m_shmem_size ; - if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + if ( m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size_total ) { + printf("%i %i\n",m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock,shmem_size_total); Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory")); } if ( int(m_team_size) > - int(Kokkos::Impl::cuda_get_max_block_size< ParallelFor, LaunchBounds > - ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { + int(Kokkos::Impl::cuda_get_max_block_size< FunctorType, LaunchBounds > + ( m_policy.space().impl_internal_space_instance(), + attr, arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > requested too large team size.")); } } @@ -754,9 +823,10 @@ class ParallelReduce< FunctorType , Kokkos::Cuda > { +public: + typedef Kokkos::RangePolicy< Traits ... > Policy ; private: - typedef Kokkos::RangePolicy< Traits ... > Policy ; typedef typename Policy::WorkRange WorkRange ; typedef typename Policy::work_tag WorkTag ; @@ -897,11 +967,16 @@ public: }*/ // Determine block size constrained by shared memory: - static inline + inline unsigned local_block_size( const FunctorType & f ) { unsigned n = CudaTraits::WarpSize * 8 ; - while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } + int shmem_size = cuda_single_inter_block_reduce_scan_shmem( f , n ); + while ( (n && (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size)) || + (n > static_cast(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce, LaunchBounds>( f , 1, shmem_size , 0 )))) { + n >>= 1 ; + shmem_size = cuda_single_inter_block_reduce_scan_shmem( f , n ); + } return n ; } @@ -912,9 +987,9 @@ public: if ( nwork ) { const int block_size = local_block_size( m_functor ); - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + m_scratch_space = cuda_internal_scratch_space( m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); + m_scratch_flags = cuda_internal_scratch_flags( m_policy.space(), sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); // REQUIRED ( 1 , N , 1 ) const dim3 block( 1 , block_size , 1 ); @@ -923,10 +998,10 @@ public: const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( m_functor , block.y ); - CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute if(!m_result_ptr_device_accessible) { - Cuda::fence(); + Cuda().fence(); if ( m_result_ptr ) { if ( m_unified_space ) { @@ -987,9 +1062,10 @@ class ParallelReduce< FunctorType , Kokkos::Cuda > { +public: + typedef Kokkos::MDRangePolicy< Traits ... > Policy ; private: - typedef Kokkos::MDRangePolicy< Traits ... > Policy ; typedef typename Policy::array_index_type array_index_type; typedef typename Policy::index_type index_type; @@ -1121,11 +1197,16 @@ public: } */ // Determine block size constrained by shared memory: - static inline + inline unsigned local_block_size( const FunctorType & f ) { unsigned n = CudaTraits::WarpSize * 8 ; - while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } + int shmem_size = cuda_single_inter_block_reduce_scan_shmem( f , n ); + while ( (n && (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size)) || + (n > static_cast(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce, LaunchBounds>( f , 1, shmem_size , 0 )))) { + n >>= 1 ; + shmem_size = cuda_single_inter_block_reduce_scan_shmem( f , n ); + } return n ; } @@ -1144,9 +1225,9 @@ public: block_size = (block_size > suggested_blocksize) ? block_size : suggested_blocksize ; //Note: block_size must be less than or equal to 512 - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + m_scratch_space = cuda_internal_scratch_space( m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); + m_scratch_flags = cuda_internal_scratch_flags( m_policy.space(), sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); // REQUIRED ( 1 , N , 1 ) const dim3 block( 1 , block_size , 1 ); @@ -1155,10 +1236,10 @@ public: const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( m_functor , block.y ); - CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute if(!m_result_ptr_device_accessible) { - Cuda::fence(); + Cuda().fence(); if ( m_result_ptr ) { if ( m_unified_space ) { @@ -1213,8 +1294,6 @@ public: //---------------------------------------------------------------------------- -#if 1 - template< class FunctorType , class ReducerType, class ... Properties > class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Properties ... > @@ -1222,9 +1301,10 @@ class ParallelReduce< FunctorType , Kokkos::Cuda > { +public: + typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... > Policy ; private: - typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... > Policy ; typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::launch_bounds LaunchBounds ; @@ -1261,6 +1341,7 @@ private: // const FunctorType m_functor ; + const Policy m_policy ; const ReducerType m_reducer ; const pointer_type m_result_ptr ; const bool m_result_ptr_device_accessible ; @@ -1273,7 +1354,7 @@ private: void* m_scratch_ptr[2] ; int m_scratch_size[2] ; const size_type m_league_size ; - const size_type m_team_size ; + int m_team_size ; const size_type m_vector_size ; template< class TagType > @@ -1412,20 +1493,20 @@ public: const int nwork = m_league_size * m_team_size ; if ( nwork ) { const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024*32) ) - :std::min( m_league_size , m_team_size ); + :std::min( int(m_league_size) , m_team_size ); - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + m_scratch_space = cuda_internal_scratch_space(m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count ); + m_scratch_flags = cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( m_policy.space(),ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); const dim3 block( m_vector_size , m_team_size , 1 ); const dim3 grid( block_count , 1 , 1 ); const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; - CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem_size_total ); // copy to device and execute + CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem_size_total , m_policy.space().impl_internal_space_instance() , true ); // copy to device and execute if(!m_result_ptr_device_accessible) { - Cuda::fence(); + Cuda().fence(); if ( m_result_ptr ) { if ( m_unified_space ) { @@ -1454,6 +1535,7 @@ public: Kokkos::is_view< ViewType >::value ,void*>::type = NULL) : m_functor( arg_functor ) + , m_policy ( arg_policy ) , m_reducer( InvalidType() ) , m_result_ptr( arg_result.data() ) , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ViewType::memory_space>::accessible ) @@ -1464,35 +1546,30 @@ public: , m_shmem_begin( 0 ) , m_shmem_size( 0 ) , m_scratch_ptr{NULL,NULL} - , m_scratch_size{ - arg_policy.scratch_size(0,( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), - arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) - ), arg_policy.scratch_size(1,( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), - arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) - )} , m_league_size( arg_policy.league_size() ) - , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), - arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) + , m_team_size( arg_policy.team_size() ) , m_vector_size( arg_policy.vector_length() ) { + cudaFuncAttributes attr = CudaParallelLaunch< ParallelReduce, LaunchBounds >:: + get_cuda_func_attributes(); + m_team_size = m_team_size>=0?m_team_size: + Kokkos::Impl::cuda_get_opt_block_size< FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), + attr, m_functor , m_vector_size, + m_policy.team_scratch_size(0), m_policy.thread_scratch_size(0) )/m_vector_size; + // Return Init value if the number of worksets is zero - if( arg_policy.league_size() == 0) { + if( m_league_size*m_team_size == 0) { ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , arg_result.data() ); return ; } m_team_begin = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( arg_functor , m_team_size ); m_shmem_begin = sizeof(double) * ( m_team_size + 2 ); - m_shmem_size = arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); - m_scratch_ptr[1] = cuda_resize_scratch_space(static_cast(m_scratch_size[1])*(static_cast(Cuda::concurrency()/(m_team_size*m_vector_size)))); + m_shmem_size = m_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); m_scratch_size[0] = m_shmem_size; - m_scratch_size[1] = arg_policy.scratch_size(1,m_team_size); + m_scratch_size[1] = m_policy.scratch_size(1,m_team_size); + m_scratch_ptr[1] = m_team_size<=0?NULL:cuda_resize_scratch_space(static_cast(m_scratch_size[1])*(static_cast(Cuda::concurrency()/(m_team_size*m_vector_size)))); // The global parallel_reduce does not support vector_length other than 1 at the moment if( (arg_policy.vector_length() > 1) && !UseShflReduction ) @@ -1509,7 +1586,7 @@ public: Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); } - if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + if ( m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size_total ) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch memory")); } @@ -1523,6 +1600,7 @@ public: , const Policy & arg_policy , const ReducerType & reducer) : m_functor( arg_functor ) + , m_policy( arg_policy ) , m_reducer( reducer ) , m_result_ptr( reducer.view().data() ) , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ReducerType::result_view_type::memory_space>::accessible ) @@ -1534,12 +1612,17 @@ public: , m_shmem_size( 0 ) , m_scratch_ptr{NULL,NULL} , m_league_size( arg_policy.league_size() ) - , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), - arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) + , m_team_size( arg_policy.team_size() ) , m_vector_size( arg_policy.vector_length() ) { + cudaFuncAttributes attr = CudaParallelLaunch< ParallelReduce, LaunchBounds >:: + get_cuda_func_attributes(); + m_team_size = m_team_size>=0?m_team_size: + Kokkos::Impl::cuda_get_opt_block_size< FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), + attr, m_functor , m_vector_size, + m_policy.team_scratch_size(0), m_policy.thread_scratch_size(0) )/m_vector_size; + // Return Init value if the number of worksets is zero if( arg_policy.league_size() == 0) { ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr ); @@ -1548,10 +1631,10 @@ public: m_team_begin = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( arg_functor , m_team_size ); m_shmem_begin = sizeof(double) * ( m_team_size + 2 ); - m_shmem_size = arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); - m_scratch_ptr[1] = cuda_resize_scratch_space(m_scratch_size[1]*(Cuda::concurrency()/(m_team_size*m_vector_size))); + m_shmem_size = m_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); m_scratch_size[0] = m_shmem_size; - m_scratch_size[1] = arg_policy.scratch_size(1,m_team_size); + m_scratch_size[1] = m_policy.scratch_size(1,m_team_size); + m_scratch_ptr[1] = m_team_size<=0?NULL:cuda_resize_scratch_space(static_cast(m_scratch_size[1])*static_cast(Cuda::concurrency()/(m_team_size*m_vector_size))); // The global parallel_reduce does not support vector_length other than 1 at the moment if( (arg_policy.vector_length() > 1) && !UseShflReduction ) @@ -1565,7 +1648,7 @@ public: const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; if ( (! Kokkos::Impl::is_integral_power_of_two( m_team_size ) && !UseShflReduction ) || - CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size_total ) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); } if ( int(m_team_size) > arg_policy.team_size_max(m_functor,ParallelReduceTag()) ) { @@ -1575,365 +1658,6 @@ public: } }; -//---------------------------------------------------------------------------- -#else -//---------------------------------------------------------------------------- - -template< class FunctorType , class ReducerType, class ... Properties > -class ParallelReduce< FunctorType - , Kokkos::TeamPolicy< Properties ... > - , ReducerType - , Kokkos::Cuda - > -{ -private: - - enum : int { align_scratch_value = 0x0100 /* 256 */ }; - enum : int { align_scratch_mask = align_scratch_value - 1 }; - - KOKKOS_INLINE_FUNCTION static constexpr - int align_scratch( const int n ) - { - return ( n & align_scratch_mask ) - ? n + align_scratch_value - ( n & align_scratch_mask ) : n ; - } - - //---------------------------------------- - // Reducer does not wrap a functor - template< class R = ReducerType , class F = void > - struct reducer_type : public R { - - template< class S > - using rebind = reducer_type< typename R::rebind , void > ; - - KOKKOS_INLINE_FUNCTION - reducer_type( FunctorType const * - , ReducerType const * arg_reducer - , typename R::value_type * arg_value ) - : R( *arg_reducer , arg_value ) {} - }; - - // Reducer does wrap a functor - template< class R > - struct reducer_type< R , FunctorType > : public R { - - template< class S > - using rebind = reducer_type< typename R::rebind , FunctorType > ; - - KOKKOS_INLINE_FUNCTION - reducer_type( FunctorType const * arg_functor - , ReducerType const * - , typename R::value_type * arg_value ) - : R( arg_functor , arg_value ) {} - }; - - //---------------------------------------- - - typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... > Policy ; - typedef CudaTeamMember Member ; - typedef typename Policy::work_tag WorkTag ; - typedef typename reducer_type<>::pointer_type pointer_type ; - typedef typename reducer_type<>::reference_type reference_type ; - typedef typename reducer_type<>::value_type value_type ; - typedef typename Policy::launch_bounds LaunchBounds ; - - typedef Kokkos::Impl::FunctorAnalysis - < Kokkos::Impl::FunctorPatternInterface::REDUCE - , Policy - , FunctorType - > Analysis ; - -public: - - typedef FunctorType functor_type ; - typedef Cuda::size_type size_type ; - -private: - - const FunctorType m_functor ; - const reducer_type<> m_reducer ; - size_type * m_scratch_space ; - size_type * m_unified_space ; - size_type m_team_begin ; - size_type m_shmem_begin ; - size_type m_shmem_size ; - void* m_scratch_ptr[2] ; - int m_scratch_size[2] ; - const size_type m_league_size ; - const size_type m_team_size ; - const size_type m_vector_size ; - - template< class TagType > - __device__ inline - typename std::enable_if< std::is_same< TagType , void >::value >::type - exec_team( const Member & member , reference_type update ) const - { m_functor( member , update ); } - - template< class TagType > - __device__ inline - typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec_team( const Member & member , reference_type update ) const - { m_functor( TagType() , member , update ); } - - -public: - - __device__ inline - void operator() () const - { - void * const shmem = kokkos_impl_cuda_shared_memory(); - - const bool reduce_to_host = - std::is_same< typename reducer_type<>::memory_space - , Kokkos::HostSpace >::value && - m_reducer.data(); - - value_type value ; - - typename reducer_type<>::rebind< CudaSpace > - reduce( & m_functor , & m_reducer , & value ); - - reduce.init( reduce.data() ); - - // Iterate this block through the league - - for ( int league_rank = blockIdx.x - ; league_rank < m_league_size - ; league_rank += gridDim.x ) { - - // Initialization of team member data: - - const Member member - ( shmem - , m_shmem_team_begin - , m_shmem_team_size - , reinterpret_cast(m_scratch_space) + m_global_team_begin - , m_global_team_size - , league_rank - , m_league_size ); - - ParallelReduce::template - exec_team< WorkTag >( member , reduce.reference() ); - } - - if ( Member::global_reduce( reduce - , m_scratch_space - , reinterpret_cast(m_scratch_space) - + aligned_flag_size - , shmem - , m_shmem_size ) ) { - - // Single thread with data in value - - reduce.final( reduce.data() ); - - if ( reduce_to_host ) { - reducer.copy( m_unified_space , reduce.data() ); - } - } - } - - - inline - void execute() - { - const bool reduce_to_host = - std::is_same< typename reducer_type<>::memory_space - , Kokkos::HostSpace >::value && - m_reducer.data(); - - const bool reduce_to_gpu = - std::is_same< typename reducer_type<>::memory_space - , Kokkos::CudaSpace >::value && - m_reducer.data(); - - if ( m_league_size && m_team_size ) { - - const int value_size = Analysis::value_size( m_functor ); - - m_scratch_space = cuda_internal_scratch_space( m_scratch_size ); - m_unified_space = cuda_internal_scratch_unified( value_size ); - - const dim3 block( m_vector_size , m_team_size , m_team_per_block ); - const dim3 grid( m_league_size , 1 , 1 ); - const int shmem = m_shmem_team_begin + m_shmem_team_size ; - - // copy to device and execute - CudaParallelLaunch( *this, grid, block, shmem ); - - Cuda::fence(); - - if ( reduce_to_host ) { - m_reducer.copy( m_reducer.data() , pointer_type(m_unified_space) ); - } - } - else if ( reduce_to_host ) { - m_reducer.init( m_reducer.data() ); - } - else if ( reduce_to_gpu ) { - value_type tmp ; - m_reduce.init( & tmp ); - cudaMemcpy( m_reduce.data() , & tmp , cudaMemcpyHostToDevice ); - } - } - - - /**\brief Set up parameters and allocations for kernel launch. - * - * block = { vector_size , team_size , team_per_block } - * grid = { number_of_teams , 1 , 1 } - * - * shmem = shared memory for: - * [ team_reduce_buffer - * , team_scratch_buffer_level_0 ] - * reused by: - * [ global_reduce_buffer ] - * - * global_scratch for: - * [ global_reduce_flag_buffer - * , global_reduce_value_buffer - * , team_scratch_buffer_level_1 * max_concurrent_team ] - */ - - ParallelReduce( FunctorType && arg_functor - , Policy && arg_policy - , ReducerType const & arg_reducer - ) - : m_functor( arg_functor ) - // the input reducer may wrap the input functor so must - // generate a reducer bound to the copied functor. - , m_reducer( & m_functor , & arg_reducer , arg_reducer.data() ) - , m_scratch_space( 0 ) - , m_unified_space( 0 ) - , m_team_begin( 0 ) - , m_shmem_begin( 0 ) - , m_shmem_size( 0 ) - , m_scratch_ptr{NULL,NULL} - , m_league_size( arg_policy.league_size() ) - , m_team_per_block( 0 ) - , m_team_size( arg_policy.team_size() ) - , m_vector_size( arg_policy.vector_length() ) - { - if ( 0 == m_league_size ) return ; - - const int value_size = Analysis::value_size( m_functor ); - - //---------------------------------------- - // Vector length must be <= WarpSize and power of two - - const bool ok_vector = m_vector_size < CudaTraits::WarpSize && - Kokkos::Impl::is_integral_power_of_two( m_vector_size ); - - //---------------------------------------- - - if ( 0 == m_team_size ) { - // Team size is AUTO, use a whole block per team. - // Calculate block size using the occupance calculator. - // Occupancy calculator assumes whole block. - - m_team_size = - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds > - ( arg_functor - , arg_policy.vector_length() - , arg_policy.team_scratch_size(0) - , arg_policy.thread_scratch_size(0) / arg_policy.vector_length() ); - - m_team_per_block = 1 ; - } - - //---------------------------------------- - // How many CUDA threads per team. - // If more than a warp or multiple teams cannot exactly fill a warp - // then only one team per block. - - const int team_threads = m_team_size * m_vector_size ; - - if ( ( CudaTraits::WarpSize < team_threads ) || - ( CudaTraits::WarpSize % team_threads ) ) { - m_team_per_block = 1 ; - } - - //---------------------------------------- - // How much team scratch shared memory determined from - // either the functor or the policy: - - if ( CudaTraits::WarpSize < team_threads ) { - // Need inter-warp team reduction (collectives) shared memory - // Speculate an upper bound for the value size - - m_shmem_team_begin = - align_scratch( CudaTraits::warp_count(team_threads) * sizeof(double) ); - } - - m_shmem_team_size = arg_policy.scratch_size(0,m_team_size); - - if ( 0 == m_shmem_team_size ) { - m_shmem_team_size = Analysis::team_shmem_size( m_functor , m_team_size ); - } - - m_shmem_team_size = align_scratch( m_shmem_team_size ); - - // Can fit a team in a block: - - const bool ok_shmem_team = - ( m_shmem_team_begin + m_shmem_team_size ) - < CudaTraits::SharedMemoryCapacity ; - - //---------------------------------------- - - if ( 0 == m_team_per_block ) { - // Potentially more than one team per block. - // Determine number of teams per block based upon - // how much team scratch can fit and exactly filling each warp. - - const int team_per_warp = team_threads / CudaTraits::WarpSize ; - - const int max_team_per_block = - Kokkos::Impl::CudaTraits::SharedMemoryCapacity - / shmem_team_scratch_size ; - - for ( m_team_per_block = team_per_warp ; - m_team_per_block + team_per_warp < max_team_per_block ; - m_team_per_block += team_per_warp ); - } - - //---------------------------------------- - // How much global reduce scratch shared memory. - - int shmem_global_reduce_size = 8 * value_size ; - - //---------------------------------------- - // Global scratch memory requirements. - - const int aligned_flag_size = align_scratch( sizeof(int) ); - - const int max_concurrent_block = - cuda_internal_maximum_concurrent_block_count(); - - // Reduce space has claim flag followed by vaue buffer - const int global_reduce_value_size = - max_concurrent_block * - ( aligned_flag_size + align_scratch( value_size ) ); - - // Scratch space has claim flag followed by scratch buffer - const int global_team_scratch_size = - max_concurrent_block * m_team_per_block * - ( aligned_flag_size + - align_scratch( arg_policy.scratch_size(1,m_team_size) / m_vector_size ) - ); - - const int global_size = aligned_flag_size - + global_reduce_value_size - + global_team_scratch_size ; - - m_global_reduce_begin = aligned_flag_size ; - m_global_team_begin = m_global_reduce_begin + global_reduce_value_size ; - m_global_size = m_global_team_begin + global_team_scratch_size ; - } -}; - -#endif - } // namespace Impl } // namespace Kokkos @@ -1949,9 +1673,9 @@ class ParallelScan< FunctorType , Kokkos::Cuda > { -private: - +public: typedef Kokkos::RangePolicy< Traits ... > Policy ; +private: typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::WorkRange WorkRange ; @@ -2105,7 +1829,7 @@ public: } // Determine block size constrained by shared memory: - static inline + inline unsigned local_block_size( const FunctorType & f ) { // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 (16 warps) @@ -2114,7 +1838,7 @@ public: // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit testing unsigned n = CudaTraits::WarpSize * 4 ; - while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } + while ( n && unsigned(m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock) < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } return n ; } @@ -2140,18 +1864,18 @@ public: // How many block are really needed for this much work: const int grid_x = ( nwork + work_per_block - 1 ) / work_per_block ; - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * grid_x ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 ); + m_scratch_space = cuda_internal_scratch_space( m_policy.space(), ValueTraits::value_size( m_functor ) * grid_x ); + m_scratch_flags = cuda_internal_scratch_flags( m_policy.space(), sizeof(size_type) * 1 ); const dim3 grid( grid_x , 1 , 1 ); const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 ) const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 ); m_final = false ; - CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute m_final = true ; - CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute } } @@ -2173,9 +1897,10 @@ class ParallelScanWithTotal< FunctorType , Kokkos::Cuda > { -private: - +public: typedef Kokkos::RangePolicy< Traits ... > Policy ; + +private: typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::WorkRange WorkRange ; @@ -2332,7 +2057,7 @@ public: } // Determine block size constrained by shared memory: - static inline + inline unsigned local_block_size( const FunctorType & f ) { // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 (16 warps) @@ -2341,7 +2066,7 @@ public: // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit testing unsigned n = CudaTraits::WarpSize * 4 ; - while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } + while ( n && unsigned(m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock) < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } return n ; } @@ -2367,18 +2092,18 @@ public: // How many block are really needed for this much work: const int grid_x = ( nwork + work_per_block - 1 ) / work_per_block ; - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * grid_x ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 ); + m_scratch_space = cuda_internal_scratch_space( m_policy.space(), ValueTraits::value_size( m_functor ) * grid_x ); + m_scratch_flags = cuda_internal_scratch_flags( m_policy.space(), sizeof(size_type) * 1 ); const dim3 grid( grid_x , 1 , 1 ); const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 ) const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 ); m_final = false ; - CudaParallelLaunch< ParallelScanWithTotal, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelScanWithTotal, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute m_final = true ; - CudaParallelLaunch< ParallelScanWithTotal, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelScanWithTotal, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute const int size = ValueTraits::value_size( m_functor ); DeepCopy( &m_returnvalue, m_scratch_space + (grid_x - 1)*size/sizeof(int), size ); diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index d09854c3a5..c39dddb198 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -376,13 +376,13 @@ template< class ReducerType > __device__ inline typename std::enable_if< Kokkos::is_reducer::value >::type cuda_intra_warp_reduction( const ReducerType& reducer, + typename ReducerType::value_type& result, const uint32_t max_active_thread = blockDim.y) { typedef typename ReducerType::value_type ValueType; unsigned int shift = 1; - ValueType result = reducer.reference(); //Reduce over values from threads with different threadIdx.y while(blockDim.x * shift < 32 ) { const ValueType tmp = shfl_down(result, blockDim.x*shift,32u); @@ -400,6 +400,7 @@ template< class ReducerType > __device__ inline typename std::enable_if< Kokkos::is_reducer::value >::type cuda_inter_warp_reduction( const ReducerType& reducer, + typename ReducerType::value_type value, const int max_active_thread = blockDim.y) { typedef typename ReducerType::value_type ValueType; @@ -410,7 +411,6 @@ cuda_inter_warp_reduction( const ReducerType& reducer, // could lead to race conditions __shared__ double sh_result[(sizeof(ValueType)+7)/8*STEP_WIDTH]; ValueType* result = (ValueType*) & sh_result; - ValueType value = reducer.reference(); const int step = 32 / blockDim.x; int shift = STEP_WIDTH; const int id = threadIdx.y%step==0?threadIdx.y/step:65000; @@ -438,9 +438,18 @@ template< class ReducerType > __device__ inline typename std::enable_if< Kokkos::is_reducer::value >::type cuda_intra_block_reduction( const ReducerType& reducer, + typename ReducerType::value_type value, const int max_active_thread = blockDim.y) { - cuda_intra_warp_reduction(reducer,max_active_thread); - cuda_inter_warp_reduction(reducer,max_active_thread); + cuda_intra_warp_reduction(reducer,value,max_active_thread); + cuda_inter_warp_reduction(reducer,value,max_active_thread); +} + +template< class ReducerType > +__device__ inline +typename std::enable_if< Kokkos::is_reducer::value >::type +cuda_intra_block_reduction( const ReducerType& reducer, + const int max_active_thread = blockDim.y) { + cuda_intra_block_reduction(reducer,reducer.reference(),max_active_thread); } template< class ReducerType> diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp index ee949583f1..ac36cfd67e 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -54,194 +54,8 @@ namespace Kokkos { namespace Impl { -template class TaskQueue< Kokkos::Cuda > ; - -//---------------------------------------------------------------------------- - -__device__ -void TaskQueueSpecialization< Kokkos::Cuda >::driver - ( TaskQueueSpecialization< Kokkos::Cuda >::queue_type * const queue - , int32_t shmem_per_warp ) -{ - using Member = TaskExec< Kokkos::Cuda > ; - using Queue = TaskQueue< Kokkos::Cuda > ; - using task_root_type = TaskBase< void , void , void > ; - - extern __shared__ int32_t shmem_all[]; - - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - - int32_t * const warp_shmem = - shmem_all + ( threadIdx.z * shmem_per_warp ) / sizeof(int32_t); - - task_root_type * const task_shmem = (task_root_type *) warp_shmem ; - - const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x ; - - Member single_exec( warp_shmem , 1 ); - Member team_exec( warp_shmem , blockDim.y ); - - task_root_type * task_ptr ; - - // Loop until all queues are empty and no tasks in flight - - do { - - // Each team lead attempts to acquire either a thread team task - // or collection of single thread tasks for the team. - - if ( 0 == warp_lane ) { - - task_ptr = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; - - // Loop by priority and then type - for ( int i = 0 ; i < Queue::NumQueue && end == task_ptr ; ++i ) { - for ( int j = 0 ; j < 2 && end == task_ptr ; ++j ) { - task_ptr = Queue::pop_ready_task( & queue->m_ready[i][j] ); - } - } - -#if 0 -printf("TaskQueue::driver(%d,%d) task(%lx)\n",threadIdx.z,blockIdx.x - , uintptr_t(task_ptr)); -#endif - - } - - // Synchronize warp with memory fence before broadcasting task pointer: - - // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "A" ); - KOKKOS_IMPL_CUDA_SYNCWARP ; - - // Broadcast task pointer: - - ((int*) & task_ptr )[0] = KOKKOS_IMPL_CUDA_SHFL( ((int*) & task_ptr )[0] , 0 , 32 ); - ((int*) & task_ptr )[1] = KOKKOS_IMPL_CUDA_SHFL( ((int*) & task_ptr )[1] , 0 , 32 ); - -#if defined( KOKKOS_DEBUG ) - KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "TaskQueue CUDA task_ptr" ); -#endif - - if ( 0 == task_ptr ) break ; // 0 == queue->m_ready_count - - if ( end != task_ptr ) { - - // Whole warp copy task's closure to/from shared memory. - // Use all threads of warp for coalesced read/write. - - int32_t const b = sizeof(task_root_type) / sizeof(int32_t); - int32_t const e = *((int32_t volatile *)( & task_ptr->m_alloc_size )) / sizeof(int32_t); - - int32_t volatile * const task_mem = (int32_t volatile *) task_ptr ; - - // copy task closure from global to shared memory: - - for ( int32_t i = warp_lane ; i < e ; i += CudaTraits::WarpSize ) { - warp_shmem[i] = task_mem[i] ; - } - - // Synchronize threads of the warp and insure memory - // writes are visible to all threads in the warp. - - // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "B" ); - KOKKOS_IMPL_CUDA_SYNCWARP ; - - if ( task_root_type::TaskTeam == task_shmem->m_task_type ) { - // Thread Team Task - (*task_shmem->m_apply)( task_shmem , & team_exec ); - } - else if ( 0 == threadIdx.y ) { - // Single Thread Task - (*task_shmem->m_apply)( task_shmem , & single_exec ); - } - - // Synchronize threads of the warp and insure memory - // writes are visible to all threads in the warp. - - // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "C" ); - KOKKOS_IMPL_CUDA_SYNCWARP ; - - // copy task closure from shared to global memory: - - for ( int32_t i = b + warp_lane ; i < e ; i += CudaTraits::WarpSize ) { - task_mem[i] = warp_shmem[i] ; - } - - // Synchronize threads of the warp and insure memory - // writes are visible to root thread of the warp for - // respawn or completion. - - // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "D" ); - KOKKOS_IMPL_CUDA_SYNCWARP ; - - // If respawn requested copy respawn data back to main memory - - if ( 0 == warp_lane ) { - - if ( ((task_root_type *) task_root_type::LockTag) != task_shmem->m_next ) { - ( (volatile task_root_type *) task_ptr )->m_next = task_shmem->m_next ; - ( (volatile task_root_type *) task_ptr )->m_priority = task_shmem->m_priority ; - } - - queue->complete( task_ptr ); - } - } - } while(1); -} - -namespace { - -__global__ -void cuda_task_queue_execute( TaskQueue< Kokkos::Cuda > * queue - , int32_t shmem_size ) -{ TaskQueueSpecialization< Kokkos::Cuda >::driver( queue , shmem_size ); } - -} - -void TaskQueueSpecialization< Kokkos::Cuda >::execute - ( TaskQueue< Kokkos::Cuda > * const queue ) -{ - const int shared_per_warp = 2048 ; - const int warps_per_block = 4 ; - const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); - const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block ); - const int shared_total = shared_per_warp * warps_per_block ; - const cudaStream_t stream = 0 ; - - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - -#if 0 -printf("cuda_task_queue_execute before\n"); -#endif - - // Query the stack size, in bytes: - - size_t previous_stack_size = 0 ; - CUDA_SAFE_CALL( cudaDeviceGetLimit( & previous_stack_size , cudaLimitStackSize ) ); - - // If not large enough then set the stack size, in bytes: - - const size_t larger_stack_size = 2048 ; - - if ( previous_stack_size < larger_stack_size ) { - CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , larger_stack_size ) ); - } - - cuda_task_queue_execute<<< grid , block , shared_total , stream >>>( queue , shared_per_warp ); - - CUDA_SAFE_CALL( cudaGetLastError() ); - - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - - if ( previous_stack_size < larger_stack_size ) { - CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , previous_stack_size ) ); - } - -#if 0 -printf("cuda_task_queue_execute after\n"); -#endif - -} +template class TaskQueue< Kokkos::Cuda, Impl::default_tasking_memory_space_for_execution_space_t > ; +template class TaskQueueMultiple< Kokkos::Cuda, Impl::default_tasking_memory_space_for_execution_space_t > ; }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp index 8fa1192567..c35987e49e 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -50,6 +50,14 @@ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#include + +#include +#include // CUDA_SAFE_CALL +#include + +//---------------------------------------------------------------------------- + namespace Kokkos { namespace Impl { namespace { @@ -57,54 +65,498 @@ namespace { template< typename TaskType > __global__ void set_cuda_task_base_apply_function_pointer - ( TaskBase::function_type * ptr ) -{ *ptr = TaskType::apply ; } + ( typename TaskType::function_type * ptr, typename TaskType::destroy_type* dtor ) +{ + *ptr = TaskType::apply; + *dtor = TaskType::destroy; +} + +template< typename Scheduler > +__global__ +void cuda_task_queue_execute( Scheduler scheduler, int32_t shmem_size ) { + TaskQueueSpecialization< Scheduler >::driver( std::move(scheduler) , shmem_size ); +} } -template< class > class TaskExec ; +template class TaskExec ; -template<> -class TaskQueueSpecialization< Kokkos::Cuda > +template +class TaskQueueSpecialization< + SimpleTaskScheduler +> { public: - using execution_space = Kokkos::Cuda ; - using memory_space = Kokkos::CudaUVMSpace ; - using queue_type = TaskQueue< execution_space > ; - using member_type = TaskExec< Kokkos::Cuda > ; + using scheduler_type = SimpleTaskScheduler; + using execution_space = Kokkos::Cuda; + using memory_space = Kokkos::CudaUVMSpace; + using member_type = TaskExec ; + enum : long { max_league_size = 16 }; + enum : int { warps_per_block = 4 }; + + KOKKOS_INLINE_FUNCTION static - void iff_single_thread_recursive_execute( queue_type * const ) {} + void iff_single_thread_recursive_execute( scheduler_type const& ) {} + + static int get_max_team_count( + execution_space const& + ) { + return Kokkos::Impl::cuda_internal_multiprocessor_count() * warps_per_block; + } __device__ - static void driver( queue_type * const , int32_t ); + static void driver(scheduler_type scheduler, int32_t shmem_per_warp) + { + using queue_type = typename scheduler_type::task_queue_type; + using task_base_type = typename scheduler_type::task_base_type; + using runnable_task_base_type = typename scheduler_type::runnable_task_base_type; + using scheduling_info_storage_type = + SchedulingInfoStorage< + runnable_task_base_type, + typename scheduler_type::task_scheduling_info_type + >; + + extern __shared__ int32_t shmem_all[]; + + int32_t* const warp_shmem = shmem_all + (threadIdx.z * shmem_per_warp) / sizeof(int32_t); + + task_base_type* const shared_memory_task_copy = (task_base_type*)warp_shmem; + + const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x; + + member_type single_exec(scheduler, warp_shmem, 1); + member_type team_exec(scheduler, warp_shmem, blockDim.y); + + auto& queue = scheduler.queue(); + auto& team_scheduler = team_exec.scheduler(); + + auto current_task = OptionalRef(); + + // Loop until all queues are empty and no tasks in flight + while(not queue.is_done()) { + + if(warp_lane == 0) { // should be (?) same as team_exec.team_rank() == 0 + // pop off a task + current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info()); + } + + // Broadcast task pointer: + + // Sync before the broadcast + KOKKOS_IMPL_CUDA_SYNCWARP; + + // pretend it's an int* for shuffle purposes + ((int*) ¤t_task)[0] = KOKKOS_IMPL_CUDA_SHFL(((int*) ¤t_task)[0], 0, 32); + ((int*) ¤t_task)[1] = KOKKOS_IMPL_CUDA_SHFL(((int*) ¤t_task)[1], 0, 32); + + if(current_task) { + + KOKKOS_ASSERT(!current_task->as_runnable_task().get_respawn_flag()); + + int32_t b = sizeof(scheduling_info_storage_type) / sizeof(int32_t); + static_assert( + sizeof(scheduling_info_storage_type) % sizeof(int32_t) == 0, + "bad task size" + ); + int32_t const e = current_task->get_allocation_size() / sizeof(int32_t); + KOKKOS_ASSERT(current_task->get_allocation_size() % sizeof(int32_t) == 0); + + int32_t volatile* const task_mem = (int32_t volatile*)current_task.get(); + + // do a coordinated copy of the task closure from global to shared memory: + for(int32_t i = warp_lane; i < e; i += CudaTraits::WarpSize) { + warp_shmem[i] = task_mem[i]; + } + + // Synchronize threads of the warp and insure memory + // writes are visible to all threads in the warp. + KOKKOS_IMPL_CUDA_SYNCWARP; + + if(shared_memory_task_copy->is_team_runnable()) { + // Thread Team Task + shared_memory_task_copy->as_runnable_task().run(team_exec); + } + else if(threadIdx.y == 0) { + // TODO @tasking @optimization DSH Change this to warp_lane == 0 when we allow blockDim.x to be more than 1 + // Single Thread Task + shared_memory_task_copy->as_runnable_task().run(single_exec); + } + + // Synchronize threads of the warp and insure memory + // writes are visible to all threads in the warp. + + KOKKOS_IMPL_CUDA_SYNCWARP; + + //if(warp_lane < b % CudaTraits::WarpSize) b += CudaTraits::WarpSize; + //b -= b % CudaTraits::WarpSize; + + // copy task closure from shared to global memory: + for (int32_t i = b + warp_lane; i < e; i += CudaTraits::WarpSize) { + task_mem[i] = warp_shmem[i]; + } + + // Synchronize threads of the warp and insure memory + // writes are visible to root thread of the warp for + // respawn or completion. + + KOKKOS_IMPL_CUDA_SYNCWARP; + + + if (warp_lane == 0) { + // If respawn requested copy respawn data back to main memory + if(shared_memory_task_copy->as_runnable_task().get_respawn_flag()) { + if(shared_memory_task_copy->as_runnable_task().has_predecessor()) { + // It's not necessary to make this a volatile write because + // the next read of the predecessor is on this thread in complete, + // and the predecessor is cleared there (using a volatile write) + current_task->as_runnable_task().acquire_predecessor_from( + shared_memory_task_copy->as_runnable_task() + ); + } + + // It may not necessary to make this a volatile write, since the + // next read will be done by this thread in complete where the + // rescheduling occurs, but since the task could be stolen later + // before this is written again, we should do the volatile write + // here. (It might not be necessary though because I don't know + // where else the priority would be read after it is scheduled + // by this thread; for now, we leave it volatile, but we should + // benchmark the cost of this.) + current_task.as_volatile()->set_priority(shared_memory_task_copy->get_priority()); + + // It's not necessary to make this a volatile write, since the + // next read of it (if true) will be by this thread in `complete()`, + // which will unset the flag (using volatile) once it has handled + // the respawn + current_task->as_runnable_task().set_respawn_flag(); + + } + + queue.complete( + (*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info() + ); + } + + } + } + } static - void execute( queue_type * const ); + void execute(scheduler_type const& scheduler) + { + const int shared_per_warp = 2048 ; + const dim3 grid(Kokkos::Impl::cuda_internal_multiprocessor_count(), 1, 1); + const dim3 block(1, Kokkos::Impl::CudaTraits::WarpSize, warps_per_block); + const int shared_total = shared_per_warp * warps_per_block; + const cudaStream_t stream = nullptr; + + KOKKOS_ASSERT( + static_cast(grid.x * grid.y * grid.z * block.x * block.y * block.z) + == static_cast(get_max_team_count(scheduler.get_execution_space()) * Kokkos::Impl::CudaTraits::WarpSize) + ); + + auto& queue = scheduler.queue(); + + CUDA_SAFE_CALL(cudaDeviceSynchronize()); + + // Query the stack size, in bytes: + + size_t previous_stack_size = 0; + CUDA_SAFE_CALL(cudaDeviceGetLimit(&previous_stack_size, cudaLimitStackSize)); + + // If not large enough then set the stack size, in bytes: + + const size_t larger_stack_size = 1 << 11; + + if (previous_stack_size < larger_stack_size) { + CUDA_SAFE_CALL(cudaDeviceSetLimit(cudaLimitStackSize, larger_stack_size)); + } + + cuda_task_queue_execute<<>>(scheduler, shared_per_warp); + + CUDA_SAFE_CALL(cudaGetLastError()); + + CUDA_SAFE_CALL(cudaDeviceSynchronize()); + + if (previous_stack_size < larger_stack_size) { + CUDA_SAFE_CALL(cudaDeviceSetLimit(cudaLimitStackSize, previous_stack_size)); + } + } + + template + static + // TODO @tasking @optimiazation DSH specialize this for trivially destructible types + void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) + { + using function_type = typename TaskType::function_type; + using destroy_type = typename TaskType::destroy_type; + + // TODO @tasking @minor DSH make sure there aren't any alignment concerns? + void* storage = cuda_internal_scratch_unified( + Kokkos::Cuda(), + sizeof(function_type) + sizeof(destroy_type) + ); + function_type* ptr_ptr = (function_type*)storage; + destroy_type* dtor_ptr = (destroy_type*)((char*)storage + sizeof(function_type)); + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + set_cuda_task_base_apply_function_pointer<<<1,1>>>(ptr_ptr, dtor_ptr); + + CUDA_SAFE_CALL( cudaGetLastError() ); + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + ptr = *ptr_ptr; + dtor = *dtor_ptr; + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +class TaskQueueSpecializationConstrained< + Scheduler, + typename std::enable_if< + std::is_same::value + >::type +> +{ +public: + + using scheduler_type = Scheduler; + using execution_space = Kokkos::Cuda; + using memory_space = Kokkos::CudaUVMSpace; + using member_type = TaskExec ; + + enum : long { max_league_size = 16 }; + + KOKKOS_INLINE_FUNCTION + static + void iff_single_thread_recursive_execute( scheduler_type const& ) {} + + __device__ + static void driver(scheduler_type scheduler, int32_t shmem_per_warp) + { + using queue_type = typename scheduler_type::queue_type; + using task_root_type = TaskBase; + + extern __shared__ int32_t shmem_all[]; + + task_root_type* const end = (task_root_type *) task_root_type::EndTag ; + task_root_type* const no_more_tasks_sentinel = nullptr; + + int32_t * const warp_shmem = + shmem_all + ( threadIdx.z * shmem_per_warp ) / sizeof(int32_t); + + task_root_type * const task_shmem = (task_root_type *) warp_shmem ; + + const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x ; + + member_type single_exec(scheduler, warp_shmem, 1); + member_type team_exec(scheduler, warp_shmem, blockDim.y); + + auto& team_queue = team_exec.scheduler().queue(); + + task_root_type * task_ptr = no_more_tasks_sentinel; + + // Loop until all queues are empty and no tasks in flight + + do { + + // Each team lead attempts to acquire either a thread team task + // or collection of single thread tasks for the team. + + if ( 0 == warp_lane ) { + + if( *((volatile int *) & team_queue.m_ready_count) > 0 ) { + task_ptr = end; + // Attempt to acquire a task + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task_ptr ; ++i ) { + for ( int j = 0 ; j < 2 && end == task_ptr ; ++j ) { + task_ptr = queue_type::pop_ready_task( & team_queue.m_ready[i][j] ); + } + } + } + else { + // returns nullptr if and only if all other queues have a ready + // count of 0 also. Otherwise, returns a task from another queue + // or `end` if one couldn't be popped + task_ptr = team_queue.attempt_to_steal_task(); + #if 0 + if(task != no_more_tasks_sentinel && task != end) { + std::printf("task stolen on rank %d\n", team_exec.league_rank()); + } + #endif + } + + } + + // Synchronize warp with memory fence before broadcasting task pointer: + + // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "A" ); + KOKKOS_IMPL_CUDA_SYNCWARP ; + + // Broadcast task pointer: + + ((int*) & task_ptr )[0] = KOKKOS_IMPL_CUDA_SHFL( ((int*) & task_ptr )[0] , 0 , 32 ); + ((int*) & task_ptr )[1] = KOKKOS_IMPL_CUDA_SHFL( ((int*) & task_ptr )[1] , 0 , 32 ); + + #if defined( KOKKOS_DEBUG ) + KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "TaskQueue CUDA task_ptr" ); + #endif + + if ( 0 == task_ptr ) break ; // 0 == queue->m_ready_count + + if ( end != task_ptr ) { + + // Whole warp copy task's closure to/from shared memory. + // Use all threads of warp for coalesced read/write. + + int32_t const b = sizeof(task_root_type) / sizeof(int32_t); + int32_t const e = *((int32_t volatile *)( & task_ptr->m_alloc_size )) / sizeof(int32_t); + + int32_t volatile * const task_mem = (int32_t volatile *) task_ptr ; + + KOKKOS_ASSERT(e * sizeof(int32_t) < shmem_per_warp); + + // copy task closure from global to shared memory: + + for ( int32_t i = warp_lane ; i < e ; i += CudaTraits::WarpSize ) { + warp_shmem[i] = task_mem[i] ; + } + + // Synchronize threads of the warp and insure memory + // writes are visible to all threads in the warp. + + // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "B" ); + KOKKOS_IMPL_CUDA_SYNCWARP ; + + if ( task_root_type::TaskTeam == task_shmem->m_task_type ) { + // Thread Team Task + (*task_shmem->m_apply)( task_shmem , & team_exec ); + } + else if ( 0 == threadIdx.y ) { + // Single Thread Task + (*task_shmem->m_apply)( task_shmem , & single_exec ); + } + + // Synchronize threads of the warp and insure memory + // writes are visible to all threads in the warp. + + // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "C" ); + KOKKOS_IMPL_CUDA_SYNCWARP ; + + // copy task closure from shared to global memory: + + for ( int32_t i = b + warp_lane ; i < e ; i += CudaTraits::WarpSize ) { + task_mem[i] = warp_shmem[i] ; + } + + // Synchronize threads of the warp and insure memory + // writes are visible to root thread of the warp for + // respawn or completion. + + // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "D" ); + KOKKOS_IMPL_CUDA_SYNCWARP ; + + // If respawn requested copy respawn data back to main memory + + if ( 0 == warp_lane ) { + + if ( ((task_root_type *) task_root_type::LockTag) != task_shmem->m_next ) { + ( (volatile task_root_type *) task_ptr )->m_next = task_shmem->m_next ; + ( (volatile task_root_type *) task_ptr )->m_priority = task_shmem->m_priority ; + } + + team_queue.complete( task_ptr ); + } + + } + } while(1); + } + + static + void execute(scheduler_type const& scheduler) + { + const int shared_per_warp = 2048 ; + const int warps_per_block = 4 ; + const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); + //const dim3 grid( 1 , 1 , 1 ); + const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block ); + const int shared_total = shared_per_warp * warps_per_block ; + const cudaStream_t stream = 0 ; + + auto& queue = scheduler.queue(); + queue.initialize_team_queues(warps_per_block * grid.x); + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + // Query the stack size, in bytes: + + size_t previous_stack_size = 0 ; + CUDA_SAFE_CALL( cudaDeviceGetLimit( & previous_stack_size , cudaLimitStackSize ) ); + + // If not large enough then set the stack size, in bytes: + + const size_t larger_stack_size = 2048 ; + + if ( previous_stack_size < larger_stack_size ) { + CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , larger_stack_size ) ); + } + + cuda_task_queue_execute<<< grid , block , shared_total , stream >>>( scheduler , shared_per_warp ); + + CUDA_SAFE_CALL( cudaGetLastError() ); + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + if ( previous_stack_size < larger_stack_size ) { + CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , previous_stack_size ) ); + } + + } template< typename TaskType > static - typename TaskType::function_type - get_function_pointer() + void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) { - using function_type = typename TaskType::function_type ; + using function_type = typename TaskType::function_type; + using destroy_type = typename TaskType::destroy_type; - function_type * const ptr = - (function_type*) cuda_internal_scratch_unified( sizeof(function_type) ); + void* storage = cuda_internal_scratch_unified( + Kokkos::Cuda(), + sizeof(function_type) + sizeof(destroy_type) + ); + function_type* ptr_ptr = (function_type*)storage; + destroy_type* dtor_ptr = (destroy_type*)((char*)storage + sizeof(function_type)); CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - set_cuda_task_base_apply_function_pointer<<<1,1>>>(ptr); + set_cuda_task_base_apply_function_pointer<<<1,1>>>(ptr_ptr, dtor_ptr); CUDA_SAFE_CALL( cudaGetLastError() ); CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - return *ptr ; + ptr = *ptr_ptr; + dtor = *dtor_ptr; + } }; -extern template class TaskQueue< Kokkos::Cuda > ; +extern template class TaskQueue< Kokkos::Cuda, default_tasking_memory_space_for_execution_space_t > ; }} /* namespace Kokkos::Impl */ @@ -136,8 +588,8 @@ namespace Impl { * When executing a single thread task the syncwarp or other * warp synchronizing functions must not be called. */ -template<> -class TaskExec< Kokkos::Cuda > +template +class TaskExec { private: @@ -148,24 +600,39 @@ private: TaskExec & operator = ( TaskExec && ) = delete ; TaskExec & operator = ( TaskExec const & ) = delete ; - friend class Kokkos::Impl::TaskQueue< Kokkos::Cuda > ; - friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::Cuda > ; + friend class Kokkos::Impl::TaskQueue< Kokkos::Cuda, default_tasking_memory_space_for_execution_space_t > ; + template + friend class Kokkos::Impl::TaskQueueSpecializationConstrained; + template + friend class Kokkos::Impl::TaskQueueSpecialization; int32_t * m_team_shmem ; const int m_team_size ; + Scheduler m_scheduler; // If constructed with arg_team_size == 1 the object // can only be used by 0 == threadIdx.y. - __device__ - TaskExec( int32_t * arg_team_shmem , int arg_team_size = blockDim.y ) - : m_team_shmem( arg_team_shmem ) - , m_team_size( arg_team_size ) {} + KOKKOS_INLINE_FUNCTION + TaskExec( + Scheduler const& parent_scheduler, + int32_t* arg_team_shmem, + int arg_team_size = blockDim.y + ) + : m_team_shmem(arg_team_shmem), + m_team_size(arg_team_size), + m_scheduler(parent_scheduler.get_team_scheduler(league_rank())) + { } public: + using thread_team_member = TaskExec; + #if defined( __CUDA_ARCH__ ) - __device__ int team_rank() const { return threadIdx.y ; } - __device__ int team_size() const { return m_team_size ; } + __device__ int team_rank() const { return threadIdx.y ; } + __device__ int team_size() const { return m_team_size ; } + //__device__ int league_rank() const { return threadIdx.z; } + __device__ int league_rank() const { return blockIdx.x * blockDim.z + threadIdx.z; } + __device__ int league_size() const { return blockDim.z * gridDim.x; } __device__ void team_barrier() const { @@ -186,13 +653,18 @@ public: } #else - __host__ int team_rank() const { return 0 ; } - __host__ int team_size() const { return 0 ; } + __host__ int team_rank() const { return 0 ; } + __host__ int team_size() const { return 0 ; } + __host__ int league_rank() const { return 0; } + __host__ int league_size() const { return 0; } __host__ void team_barrier() const {} template< class ValueType > __host__ void team_broadcast( ValueType & , const int ) const {} #endif + KOKKOS_INLINE_FUNCTION Scheduler const& scheduler() const noexcept { return m_scheduler; } + KOKKOS_INLINE_FUNCTION Scheduler& scheduler() noexcept { return m_scheduler; } + }; }} /* namespace Kokkos::Impl */ @@ -203,20 +675,22 @@ public: namespace Kokkos { namespace Impl { -template -struct TeamThreadRangeBoundariesStruct > +template +struct TeamThreadRangeBoundariesStruct> { - typedef iType index_type; + using index_type = iType; + using member_type = TaskExec; + const iType start ; const iType end ; const iType increment ; - const TaskExec< Kokkos::Cuda > & thread; + member_type const& thread; #if defined( __CUDA_ARCH__ ) __device__ inline TeamThreadRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const iType& arg_count) + ( member_type const& arg_thread, const iType& arg_count) : start( threadIdx.y ) , end(arg_count) , increment( blockDim.y ) @@ -225,7 +699,7 @@ struct TeamThreadRangeBoundariesStruct > __device__ inline TeamThreadRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread + ( member_type const& arg_thread , const iType & arg_start , const iType & arg_end ) @@ -238,10 +712,10 @@ struct TeamThreadRangeBoundariesStruct > #else TeamThreadRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const iType& arg_count); + ( member_type const& arg_thread, const iType& arg_count); TeamThreadRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread + ( member_type const& arg_thread , const iType & arg_start , const iType & arg_end ); @@ -252,20 +726,22 @@ struct TeamThreadRangeBoundariesStruct > //---------------------------------------------------------------------------- -template -struct ThreadVectorRangeBoundariesStruct > +template +struct ThreadVectorRangeBoundariesStruct > { - typedef iType index_type; + using index_type = iType; + using member_type = TaskExec; + const index_type start ; const index_type end ; const index_type increment ; - const TaskExec< Kokkos::Cuda > & thread; + const member_type& thread; #if defined( __CUDA_ARCH__ ) __device__ inline ThreadVectorRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const index_type& arg_count ) + ( member_type const& arg_thread, const index_type& arg_count ) : start( threadIdx.x ) , end(arg_count) , increment( blockDim.x ) @@ -274,9 +750,9 @@ struct ThreadVectorRangeBoundariesStruct > __device__ inline ThreadVectorRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const index_type& arg_begin, const index_type& arg_end ) + ( member_type const& arg_thread, const index_type& arg_begin, const index_type& arg_end ) : start( arg_begin + threadIdx.x ) - , end(arg_count) + , end(arg_end) , increment( blockDim.x ) , thread(arg_thread) {} @@ -284,10 +760,10 @@ struct ThreadVectorRangeBoundariesStruct > #else ThreadVectorRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const index_type& arg_count ); + ( member_type const& arg_thread, const index_type& arg_count ); ThreadVectorRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const index_type& arg_begin, const index_type& arg_end); + ( member_type const& arg_thread, const index_type& arg_begin, const index_type& arg_end); #endif @@ -299,69 +775,69 @@ struct ThreadVectorRangeBoundariesStruct > namespace Kokkos { -template -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > > -TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( thread, count ); -} +//template +//KOKKOS_INLINE_FUNCTION +//Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > > +//TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & count ) +//{ +// return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( thread, count ); +//} +// +//template +//KOKKOS_INLINE_FUNCTION +//Impl::TeamThreadRangeBoundariesStruct +// < typename std::common_type::type +// , Impl::TaskExec< Kokkos::Cuda > > +//TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread +// , const iType1 & begin, const iType2 & end ) +//{ +// typedef typename std::common_type< iType1, iType2 >::type iType; +// return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( +// thread, iType(begin), iType(end) ); +//} +// +//template +//KOKKOS_INLINE_FUNCTION +//Impl::ThreadVectorRangeBoundariesStruct > +//ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread +// , const iType & count ) +//{ +// return Impl::ThreadVectorRangeBoundariesStruct >(thread,count); +//} +// +//template +//KOKKOS_INLINE_FUNCTION +//Impl::ThreadVectorRangeBoundariesStruct > +//ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread +// , const iType & arg_begin +// , const iType & arg_end ) +//{ +// return Impl::ThreadVectorRangeBoundariesStruct >(thread,arg_begin,arg_end); +//} -template -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct - < typename std::common_type::type - , Impl::TaskExec< Kokkos::Cuda > > -TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread - , const iType1 & begin, const iType2 & end ) -{ - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( - thread, iType(begin), iType(end) ); -} +// KOKKOS_INLINE_FUNCTION +// Impl::ThreadSingleStruct > +// PerTeam(const Impl::TaskExec< Kokkos::Cuda >& thread) +// { +// return Impl::ThreadSingleStruct >(thread); +// } -template -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread - , const iType & count ) -{ - return Impl::ThreadVectorRangeBoundariesStruct >(thread,count); -} - -template -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread - , const iType & arg_begin - , const iType & arg_end ) -{ - return Impl::ThreadVectorRangeBoundariesStruct >(thread,arg_begin,arg_end); -} - -KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct > -PerTeam(const Impl::TaskExec< Kokkos::Cuda >& thread) -{ - return Impl::ThreadSingleStruct >(thread); -} - -KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct > -PerThread(const Impl::TaskExec< Kokkos::Cuda >& thread) -{ - return Impl::VectorSingleStruct >(thread); -} +// KOKKOS_INLINE_FUNCTION +// Impl::VectorSingleStruct > +// PerThread(const Impl::TaskExec< Kokkos::Cuda >& thread) +// { +// return Impl::VectorSingleStruct >(thread); +// } /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. * * The range i=0..N-1 is mapped to all threads of the the calling thread team. * This functionality requires C++11 support. */ -template +template KOKKOS_INLINE_FUNCTION void parallel_for - ( const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries + ( const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries , const Lambda& lambda ) { @@ -370,10 +846,10 @@ void parallel_for } } -template< typename iType, class Lambda > +template< typename iType, class Lambda, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_for - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda) { for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { lambda(i); @@ -459,14 +935,14 @@ void parallel_reduce // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Lambda, typename ValueType > +template< typename iType, class Lambda, typename ValueType, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, + (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, ValueType& initialized_result) { - //TODO what is the point of creating this temporary? + //TODO @internal_documentation what is the point of creating this temporary? ValueType result = initialized_result; for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { lambda(i,result); @@ -487,15 +963,15 @@ void parallel_reduce } } -template< typename iType, class Lambda, typename ReducerType > +template< typename iType, class Lambda, typename ReducerType, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, + (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { typedef typename ReducerType::value_type ValueType; - //TODO what is the point of creating this temporary? + //TODO @internal_documentation what is the point of creating this temporary? ValueType result = ValueType(); reducer.init(result); @@ -549,10 +1025,10 @@ void parallel_reduce // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Lambda, typename ValueType > +template< typename iType, class Lambda, typename ValueType, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, ValueType& initialized_result) { @@ -576,10 +1052,10 @@ void parallel_reduce } } -template< typename iType, class Lambda, typename ReducerType > +template< typename iType, class Lambda, typename ReducerType, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { @@ -611,10 +1087,10 @@ void parallel_reduce // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Closure > +template< typename iType, class Closure, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_scan - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, + (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, const Closure & closure ) { // Extract value_type from closure @@ -676,10 +1152,10 @@ void parallel_scan // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Closure > +template< typename iType, class Closure, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_scan - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Closure & closure ) { // Extract value_type from closure @@ -735,25 +1211,25 @@ void parallel_scan namespace Kokkos { - template + template KOKKOS_INLINE_FUNCTION - void single(const Impl::VectorSingleStruct >& , const FunctorType& lambda) { + void single(const Impl::VectorSingleStruct >& , const FunctorType& lambda) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0) lambda(); #endif } - template + template KOKKOS_INLINE_FUNCTION - void single(const Impl::ThreadSingleStruct >& , const FunctorType& lambda) { + void single(const Impl::ThreadSingleStruct >& , const FunctorType& lambda) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0 && threadIdx.y == 0) lambda(); #endif } - template + template KOKKOS_INLINE_FUNCTION - void single(const Impl::VectorSingleStruct >& s , const FunctorType& lambda, ValueType& val) { + void single(const Impl::VectorSingleStruct >& s , const FunctorType& lambda, ValueType& val) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0) lambda(val); if ( 1 < s.team_member.team_size() ) { @@ -762,9 +1238,9 @@ namespace Kokkos { #endif } - template + template KOKKOS_INLINE_FUNCTION - void single(const Impl::ThreadSingleStruct >& single_struct, const FunctorType& lambda, ValueType& val) { + void single(const Impl::ThreadSingleStruct >& single_struct, const FunctorType& lambda, ValueType& val) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0 && threadIdx.y == 0) { lambda(val); diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp index 18271a5146..587ad6001d 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp @@ -56,9 +56,9 @@ #include #include -#include +#include #include -#include +#include #include #if defined(KOKKOS_ENABLE_PROFILING) @@ -101,11 +101,13 @@ struct CudaJoinFunctor { * total available shared memory must be partitioned among teams. */ class CudaTeamMember { -private: +public: typedef Kokkos::Cuda execution_space ; typedef execution_space::scratch_memory_space scratch_memory_space ; +private: + mutable void * m_team_reduce ; scratch_memory_space m_team_shared ; int m_team_reduce_size ; @@ -221,12 +223,21 @@ public: KOKKOS_INLINE_FUNCTION typename std::enable_if< is_reducer< ReducerType >::value >::type team_reduce( ReducerType const & reducer ) const noexcept + { + team_reduce(reducer,reducer.reference()); + } + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer, typename ReducerType::value_type& value ) const noexcept { #ifdef __CUDA_ARCH__ - cuda_intra_block_reduction(reducer,blockDim.y); + cuda_intra_block_reduction(reducer,value,blockDim.y); #endif /* #ifdef __CUDA_ARCH__ */ } + //-------------------------------------------------------------------------- /** \brief Intra-team exclusive prefix sum with team_rank() ordering * with intra-team non-deterministic ordering accumulation. @@ -281,20 +292,28 @@ public: template< typename ReducerType > KOKKOS_INLINE_FUNCTION static typename std::enable_if< is_reducer< ReducerType >::value >::type - vector_reduce( ReducerType const & reducer ) + vector_reduce( ReducerType const & reducer ) { + vector_reduce(reducer,reducer.reference()); + } + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION static + typename std::enable_if< is_reducer< ReducerType >::value >::type + vector_reduce( ReducerType const & reducer, typename ReducerType::value_type& value ) { #ifdef __CUDA_ARCH__ if(blockDim.x == 1) return; // Intra vector lane shuffle reduction: - typename ReducerType::value_type tmp ( reducer.reference() ); + typename ReducerType::value_type tmp ( value ); + typename ReducerType::value_type tmp2 = tmp; unsigned mask = blockDim.x==32?0xffffffff:((1<>= 1 ) ; ) { - cuda_shfl_down( reducer.reference() , tmp , i , blockDim.x , mask ); - if ( (int)threadIdx.x < i ) { reducer.join( tmp , reducer.reference() ); } + cuda_shfl_down( tmp2 , tmp , i , blockDim.x , mask ); + if ( (int)threadIdx.x < i ) { reducer.join( tmp , tmp2 ); } } // Broadcast from root lane to all other lanes. @@ -302,7 +321,9 @@ public: // because floating point summation is not associative // and thus different threads could have different results. - cuda_shfl( reducer.reference() , tmp , 0 , blockDim.x , mask ); + cuda_shfl( tmp2 , tmp , 0 , blockDim.x , mask ); + value = tmp2; + reducer.reference() = tmp2; #endif } @@ -543,19 +564,37 @@ struct TeamThreadRangeBoundariesStruct { const iType end; KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count) + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, iType count) : member(thread_) , start( 0 ) , end( count ) {} KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_) + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, iType begin_, iType end_) : member(thread_) , start( begin_ ) , end( end_ ) {} }; +template +struct TeamVectorRangeBoundariesStruct { + typedef iType index_type; + const CudaTeamMember& member; + const iType start; + const iType end; + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count) + : member(thread_) + , start( 0 ) + , end( count ) {} + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_) + : member(thread_) + , start( begin_ ) + , end( end_ ) {} +}; template struct ThreadVectorRangeBoundariesStruct { @@ -564,19 +603,19 @@ struct ThreadVectorRangeBoundariesStruct { const index_type end; KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const index_type& count) + ThreadVectorRangeBoundariesStruct (const CudaTeamMember, index_type count) : start( static_cast(0) ), end( count ) {} KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const index_type& count) + ThreadVectorRangeBoundariesStruct (index_type count) : start( static_cast(0) ), end( count ) {} KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const index_type& arg_begin, const index_type& arg_end) + ThreadVectorRangeBoundariesStruct (const CudaTeamMember, index_type arg_begin, index_type arg_end) : start( arg_begin ), end( arg_end ) {} KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const index_type& arg_begin, const index_type& arg_end) + ThreadVectorRangeBoundariesStruct (index_type arg_begin, index_type arg_end) : start( arg_begin ), end( arg_end ) {} }; @@ -585,7 +624,7 @@ struct ThreadVectorRangeBoundariesStruct { template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember > -TeamThreadRange( const Impl::CudaTeamMember & thread, const iType & count ) { +TeamThreadRange( const Impl::CudaTeamMember & thread, iType count ) { return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, count ); } @@ -593,22 +632,38 @@ template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, Impl::CudaTeamMember > -TeamThreadRange( const Impl::CudaTeamMember & thread, const iType1 & begin, const iType2 & end ) { +TeamThreadRange( const Impl::CudaTeamMember & thread, iType1 begin, iType2 end ) { typedef typename std::common_type< iType1, iType2 >::type iType; return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, iType(begin), iType(end) ); } +template +KOKKOS_INLINE_FUNCTION +Impl::TeamVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember > +TeamVectorRange( const Impl::CudaTeamMember & thread, const iType & count ) { + return Impl::TeamVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, count ); +} + +template< typename iType1, typename iType2 > +KOKKOS_INLINE_FUNCTION +Impl::TeamVectorRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::CudaTeamMember > +TeamVectorRange( const Impl::CudaTeamMember & thread, const iType1 & begin, const iType2 & end ) { + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, iType(begin), iType(end) ); +} + template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) { +ThreadVectorRange(const Impl::CudaTeamMember& thread, iType count) { return Impl::ThreadVectorRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& arg_begin, const iType& arg_end) { +ThreadVectorRange(const Impl::CudaTeamMember& thread, iType arg_begin, iType arg_end) { return Impl::ThreadVectorRangeBoundariesStruct(thread,arg_begin,arg_end); } @@ -667,16 +722,16 @@ parallel_reduce ) { #ifdef __CUDA_ARCH__ - - reducer.init( reducer.reference() ); + typename ReducerType::value_type value; + reducer.init( value ); for( iType i = loop_boundaries.start + threadIdx.y ; i < loop_boundaries.end ; i += blockDim.y ) { - closure(i,reducer.reference()); + closure(i,value); } - loop_boundaries.member.team_reduce( reducer ); + loop_boundaries.member.team_reduce( reducer, value ); #endif } @@ -701,19 +756,88 @@ parallel_reduce ) { #ifdef __CUDA_ARCH__ - - Kokkos::Sum reducer(result); + ValueType val; + Kokkos::Sum reducer(val); reducer.init( reducer.reference() ); for( iType i = loop_boundaries.start + threadIdx.y ; i < loop_boundaries.end ; i += blockDim.y ) { - closure(i,result); + closure(i,val); } - loop_boundaries.member.team_reduce( reducer ); + loop_boundaries.member.team_reduce( reducer , val); + result = reducer.reference(); +#endif +} +template +KOKKOS_INLINE_FUNCTION +void parallel_for + ( const Impl::TeamVectorRangeBoundariesStruct& + loop_boundaries + , const Closure & closure + ) +{ + #ifdef __CUDA_ARCH__ + for( iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.y*blockDim.x ) + closure(i); + #endif +} + +template< typename iType, class Closure, class ReducerType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +parallel_reduce + ( const Impl::TeamVectorRangeBoundariesStruct & + loop_boundaries + , const Closure & closure + , const ReducerType & reducer + ) +{ +#ifdef __CUDA_ARCH__ + typename ReducerType::value_type value; + reducer.init( value ); + + for( iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.y * blockDim.x ) { + closure(i,value); + } + + loop_boundaries.member.vector_reduce( reducer, value ); + loop_boundaries.member.team_reduce( reducer, value ); +#endif +} + +template< typename iType, class Closure, typename ValueType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< ! Kokkos::is_reducer< ValueType >::value >::type +parallel_reduce + ( const Impl::TeamVectorRangeBoundariesStruct & + loop_boundaries + , const Closure & closure + , ValueType & result + ) +{ +#ifdef __CUDA_ARCH__ + ValueType val; + Kokkos::Sum reducer(val); + + reducer.init( reducer.reference() ); + + for( iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.y * blockDim.x ) { + closure(i,val); + } + + loop_boundaries.member.vector_reduce( reducer ); + loop_boundaries.member.team_reduce( reducer ); + result = reducer.reference(); #endif } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index af2aff8b35..2fe9d8ccf7 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -241,7 +241,7 @@ class ViewDataHandle< Traits , sizeof(typename Traits::const_value_type) == 16 ) && // Random access trait - ( Traits::memory_traits::RandomAccess != 0 ) + ( Traits::memory_traits::is_random_access != 0 ) )>::type > { public: diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp index 94e293d7c7..9c0ac470c8 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp @@ -102,9 +102,8 @@ public: const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block ); const int shared = 0 ; - const cudaStream_t stream = 0 ; - Kokkos::Impl::CudaParallelLaunch(*this, grid, block, shared, stream); + Kokkos::Impl::CudaParallelLaunch(*this, grid, block, shared, Cuda().impl_internal_space_instance() , false ); } inline diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp b/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp new file mode 100644 index 0000000000..da9783467c --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp @@ -0,0 +1,152 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#include + +#ifdef KOKKOS_ENABLE_HPX +#include + +#include + +namespace Kokkos { +namespace Experimental { + +bool HPX::m_hpx_initialized = false; +Kokkos::Impl::thread_buffer HPX::m_buffer; +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) +hpx::future HPX::m_future = hpx::make_ready_future(); +#endif + +int HPX::concurrency() { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + return hpx::threads::hardware_concurrency(); + } else { + if (hpx::threads::get_self_ptr() == nullptr) { + return hpx::resource::get_thread_pool(0).get_os_thread_count(); + } else { + return hpx::this_thread::get_pool()->get_os_thread_count(); + } + } +} + +void HPX::impl_initialize(int thread_count) { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + std::vector config = { + "hpx.os_threads=" + std::to_string(thread_count), +#ifdef KOKKOS_DEBUG + "--hpx:attach-debugger=exception", +#endif + }; + int argc_hpx = 1; + char name[] = "kokkos_hpx"; + char *argv_hpx[] = {name, nullptr}; + hpx::start(nullptr, argc_hpx, argv_hpx, config); + + // NOTE: Wait for runtime to start. hpx::start returns as soon as + // possible, meaning some operations are not allowed immediately + // after hpx::start. Notably, hpx::stop needs state_running. This + // needs to be fixed in HPX itself. + + // Get runtime pointer again after it has been started. + rt = hpx::get_runtime_ptr(); + hpx::util::yield_while( + [rt]() { return rt->get_state() < hpx::state_running; }); + + m_hpx_initialized = true; + } +} + +void HPX::impl_initialize() { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + std::vector config = { +#ifdef KOKKOS_DEBUG + "--hpx:attach-debugger=exception", +#endif + }; + int argc_hpx = 1; + char name[] = "kokkos_hpx"; + char *argv_hpx[] = {name, nullptr}; + hpx::start(nullptr, argc_hpx, argv_hpx, config); + + // NOTE: Wait for runtime to start. hpx::start returns as soon as + // possible, meaning some operations are not allowed immediately + // after hpx::start. Notably, hpx::stop needs state_running. This + // needs to be fixed in HPX itself. + + // Get runtime pointer again after it has been started. + rt = hpx::get_runtime_ptr(); + hpx::util::yield_while( + [rt]() { return rt->get_state() < hpx::state_running; }); + + m_hpx_initialized = true; + } +} + +bool HPX::impl_is_initialized() noexcept { + hpx::runtime *rt = hpx::get_runtime_ptr(); + return rt != nullptr; +} + +void HPX::impl_finalize() { + if (m_hpx_initialized) { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt != nullptr) { + hpx::apply([]() { hpx::finalize(); }); + hpx::stop(); + } else { + Kokkos::abort("Kokkos::Experimental::HPX::impl_finalize: Kokkos started " + "HPX but something else already stopped HPX\n"); + } + } +} + +} // namespace Experimental +} // namespace Kokkos + +#else +void KOKKOS_CORE_SRC_IMPL_HPX_PREVENT_LINK_ERROR() {} +#endif //#ifdef KOKKOS_ENABLE_HPX diff --git a/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp similarity index 76% rename from lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp rename to lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp index d001e0a88c..df7c403685 100644 --- a/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp @@ -41,38 +41,25 @@ //@HEADER */ -#ifndef KOKKOS_STATICASSERT_HPP -#define KOKKOS_STATICASSERT_HPP +#include +#if defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_TASKDAG) + +#include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { -template < bool , class T = void > -struct StaticAssert ; - -template< class T > -struct StaticAssert< true , T > { - typedef T type ; - static const bool value = true ; -}; - -template < class A , class B > -struct StaticAssertSame ; - -template < class A > -struct StaticAssertSame { typedef A type ; }; - -template < class A , class B > -struct StaticAssertAssignable ; - -template < class A > -struct StaticAssertAssignable { typedef A type ; }; - -template < class A > -struct StaticAssertAssignable< const A , A > { typedef const A type ; }; +template class TaskQueue; } // namespace Impl } // namespace Kokkos -#endif /* KOKKOS_STATICASSERT_HPP */ - +#else +void KOKKOS_CORE_SRC_IMPL_HPX_TASK_PREVENT_LINK_ERROR() {} +#endif // #if defined( KOKKOS_ENABLE_HPX ) && defined( KOKKOS_ENABLE_TASKDAG ) diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp new file mode 100644 index 0000000000..c3a14efee6 --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp @@ -0,0 +1,298 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_TASK_HPP +#define KOKKOS_HPX_TASK_HPP + +#include +#if defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_TASKDAG) + +#include + +#include + +#include +#include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +class TaskQueueSpecialization< + SimpleTaskScheduler> { +public: + using execution_space = Kokkos::Experimental::HPX; + using scheduler_type = + SimpleTaskScheduler; + using member_type = + TaskTeamMemberAdapter; + using memory_space = Kokkos::HostSpace; + + static void execute(scheduler_type const &scheduler) { + // NOTE: We create an instance so that we can use dispatch_execute_task. + // This is not necessarily the most efficient, but can be improved later. + TaskQueueSpecialization task_queue; + task_queue.scheduler = &scheduler; + Kokkos::Impl::dispatch_execute_task(&task_queue); + Kokkos::Experimental::HPX().fence(); + } + + // Must provide task queue execution function + void execute_task() const { + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + using task_base_type = typename scheduler_type::task_base_type; + + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, 512); + + auto &queue = scheduler->queue(); + + counting_semaphore sem(0); + + for (int thread = 0; thread < num_worker_threads; ++thread) { + apply([this, &sem, &queue, &buffer, num_worker_threads, thread]() { + // NOTE: This implementation has been simplified based on the + // assumption that team_size = 1. The HPX backend currently only + // supports a team size of 1. + std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); + + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()); + HPXTeamMember member(TeamPolicyInternal( + Kokkos::Experimental::HPX(), num_worker_threads, 1), + 0, t, buffer.get(t), 512); + + member_type single_exec(*scheduler, member); + member_type &team_exec = single_exec; + + auto &team_scheduler = team_exec.scheduler(); + auto current_task = OptionalRef(nullptr); + + while (!queue.is_done()) { + current_task = + queue.pop_ready_task(team_scheduler.team_scheduler_info()); + + if (current_task) { + KOKKOS_ASSERT(current_task->is_single_runnable() || + current_task->is_team_runnable()); + current_task->as_runnable_task().run(single_exec); + queue.complete((*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info()); + } + } + + sem.signal(1); + }); + } + + sem.wait(num_worker_threads); + } + + static uint32_t get_max_team_count(execution_space const &espace) { + return static_cast(espace.concurrency()); + } + + template + static void get_function_pointer(typename TaskType::function_type &ptr, + typename TaskType::destroy_type &dtor) { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } + +private: + const scheduler_type *scheduler; +}; + +template +class TaskQueueSpecializationConstrained< + Scheduler, typename std::enable_if< + std::is_same::value>::type> { +public: + using execution_space = Kokkos::Experimental::HPX; + using scheduler_type = Scheduler; + using member_type = + TaskTeamMemberAdapter; + using memory_space = Kokkos::HostSpace; + + static void + iff_single_thread_recursive_execute(scheduler_type const &scheduler) { + using task_base_type = typename scheduler_type::task_base; + using queue_type = typename scheduler_type::queue_type; + + if (1 == Kokkos::Experimental::HPX::concurrency()) { + task_base_type *const end = (task_base_type *)task_base_type::EndTag; + task_base_type *task = end; + + HPXTeamMember member(TeamPolicyInternal( + Kokkos::Experimental::HPX(), 1, 1), + 0, 0, nullptr, 0); + member_type single_exec(scheduler, member); + + do { + task = end; + + // Loop by priority and then type + for (int i = 0; i < queue_type::NumQueue && end == task; ++i) { + for (int j = 0; j < 2 && end == task; ++j) { + task = + queue_type::pop_ready_task(&scheduler.m_queue->m_ready[i][j]); + } + } + + if (end == task) + break; + + (*task->m_apply)(task, &single_exec); + + scheduler.m_queue->complete(task); + + } while (true); + } + } + + static void execute(scheduler_type const &scheduler) { + // NOTE: We create an instance so that we can use dispatch_execute_task. + // This is not necessarily the most efficient, but can be improved later. + TaskQueueSpecializationConstrained task_queue; + task_queue.scheduler = &scheduler; + Kokkos::Impl::dispatch_execute_task(&task_queue); + Kokkos::Experimental::HPX().fence(); + } + + // Must provide task queue execution function + void execute_task() const { + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + using task_base_type = typename scheduler_type::task_base; + using queue_type = typename scheduler_type::queue_type; + + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + static task_base_type *const end = (task_base_type *)task_base_type::EndTag; + constexpr task_base_type *no_more_tasks_sentinel = nullptr; + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, 512); + + auto &queue = scheduler->queue(); + queue.initialize_team_queues(num_worker_threads); + + counting_semaphore sem(0); + + for (int thread = 0; thread < num_worker_threads; ++thread) { + apply([this, &sem, &buffer, num_worker_threads, thread]() { + // NOTE: This implementation has been simplified based on the assumption + // that team_size = 1. The HPX backend currently only supports a team + // size of 1. + std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); + + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()); + HPXTeamMember member( + TeamPolicyInternal( + Kokkos::Experimental::HPX(), num_worker_threads, 1), + 0, t, buffer.get(t), 512); + + member_type single_exec(*scheduler, member); + member_type &team_exec = single_exec; + + auto &team_queue = team_exec.scheduler().queue(); + task_base_type *task = no_more_tasks_sentinel; + + do { + if (task != no_more_tasks_sentinel && task != end) { + team_queue.complete(task); + } + + if (*((volatile int *)&team_queue.m_ready_count) > 0) { + task = end; + for (int i = 0; i < queue_type::NumQueue && end == task; ++i) { + for (int j = 0; j < 2 && end == task; ++j) { + task = queue_type::pop_ready_task(&team_queue.m_ready[i][j]); + } + } + } else { + task = team_queue.attempt_to_steal_task(); + } + + if (task != no_more_tasks_sentinel && task != end) { + (*task->m_apply)(task, &single_exec); + } + } while (task != no_more_tasks_sentinel); + + sem.signal(1); + }); + } + + sem.wait(num_worker_threads); + } + + template + static void get_function_pointer(typename TaskType::function_type &ptr, + typename TaskType::destroy_type &dtor) { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } + +private: + const scheduler_type *scheduler; +}; + +extern template class TaskQueue< + Kokkos::Experimental::HPX, + typename Kokkos::Experimental::HPX::memory_space>; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_HPX_TASK_HPP */ diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp new file mode 100644 index 0000000000..bbc1b33bf9 --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_VIEWETIAVAIL_HPP +#define KOKKOS_HPX_VIEWETIAVAIL_HPP + +namespace Kokkos { +namespace Impl { +#define KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE Kokkos::Experimental::HPX + +#include + +#undef KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE +} +} +#endif + diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp new file mode 100644 index 0000000000..aa1c2f1518 --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_VIEWETIDECL_HPP +#define KOKKOS_HPX_VIEWETIDECL_HPP + +namespace Kokkos { +namespace Impl { +#define KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE Kokkos::Experimental::HPX + +#include + +#undef KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE +} +} +#endif + diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp new file mode 100644 index 0000000000..4dd28dd994 --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp @@ -0,0 +1,116 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_WORKGRAPHPOLICY_HPP +#define KOKKOS_HPX_WORKGRAPHPOLICY_HPP + +#include +#include + +namespace Kokkos { +namespace Impl { + +template +class ParallelFor, + Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::WorkGraphPolicy; + using WorkTag = typename Policy::work_tag; + + Policy m_policy; + FunctorType m_functor; + + template + typename std::enable_if::value>::type + execute_functor(const std::int32_t w) const noexcept { + m_functor(w); + } + + template + typename std::enable_if::value>::type + execute_functor(const std::int32_t w) const noexcept { + const TagType t{}; + m_functor(t, w); + } + +public: + void execute() const { + dispatch_execute_task(this); + Kokkos::Experimental::HPX().fence(); + } + + void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + + for (int thread = 0; thread < num_worker_threads; ++thread) { + apply([this, &sem]() { + std::int32_t w = m_policy.pop_work(); + while (w != Policy::COMPLETED_TOKEN) { + if (w != Policy::END_TOKEN) { + execute_functor(w); + m_policy.completed_work(w); + } + + w = m_policy.pop_work(); + } + + sem.signal(1); + }); + } + + sem.wait(num_worker_threads); + } + + inline ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) + : m_policy(arg_policy), m_functor(arg_functor) {} +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* #define KOKKOS_HPX_WORKGRAPHPOLICY_HPP */ diff --git a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp index fb0d6cde84..1972aa485b 100644 --- a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp +++ b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -125,6 +125,8 @@ struct MDRangePolicy using traits = Kokkos::Impl::PolicyTraits; using range_policy = RangePolicy; + typename traits::execution_space m_space; + using impl_range_policy = RangePolicy< typename traits::execution_space , typename traits::schedule_type , typename traits::index_type @@ -132,6 +134,9 @@ struct MDRangePolicy typedef MDRangePolicy execution_policy; // needed for is_execution_space interrogation + template + friend struct MDRangePolicy; + static_assert( !std::is_same::value , "Kokkos Error: MD iteration pattern not defined" ); @@ -192,13 +197,54 @@ struct MDRangePolicy static constexpr int Right = static_cast( Iterate::Right ); static constexpr int Left = static_cast( Iterate::Left ); + KOKKOS_INLINE_FUNCTION const typename traits::execution_space & space() const { return m_space ; } + template < typename LT , typename UT , typename TT = array_index_type > + MDRangePolicy(std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) + : m_space() { + init(lower, upper, tile); + } + + template < typename LT , typename UT , typename TT = array_index_type > + MDRangePolicy(const typename traits::execution_space & work_space, + std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) + : m_space( work_space ) { + init(lower, upper, tile); + } + MDRangePolicy( point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} ) - : m_lower(lower) + : m_space() + , m_lower(lower) , m_upper(upper) , m_tile(tile) , m_num_tiles(1) - , m_prod_tile_dims(1) - { + , m_prod_tile_dims(1) { + init(); + } + + MDRangePolicy( const typename traits::execution_space & work_space, + point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} ) + : m_space( work_space ) + , m_lower(lower) + , m_upper(upper) + , m_tile(tile) + , m_num_tiles(1) + , m_prod_tile_dims(1) { + init(); + } + + template + MDRangePolicy( const MDRangePolicy p ): + m_space(p.m_space), + m_lower(p.m_lower), + m_upper(p.m_upper), + m_tile(p.m_tile), + m_tile_end(p.m_tile_end), + m_num_tiles(p.m_num_tiles), + m_prod_tile_dims(p.m_prod_tile_dims) {} + +private: + + void init() { // Host if ( true #if defined(KOKKOS_ENABLE_CUDA) @@ -211,7 +257,7 @@ struct MDRangePolicy { index_type span; for (int i=0; i 0)) ) @@ -311,11 +357,9 @@ struct MDRangePolicy #endif } - template < typename LT , typename UT , typename TT = array_index_type > - MDRangePolicy( std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) + void init( std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) { - if(static_cast(m_lower.size()) != rank || static_cast(m_upper.size()) != rank) Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size"); @@ -589,5 +633,26 @@ void md_parallel_reduce( const std::string& str } } // namespace Kokkos::Experimental #endif +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template +struct PolicyPropertyAdaptor,MDRangePolicy> { + typedef MDRangePolicy policy_in_t; + typedef MDRangePolicy> policy_out_t; +}; + +} +} +} + + #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP diff --git a/lib/kokkos/core/src/Kokkos_Atomic.hpp b/lib/kokkos/core/src/Kokkos_Atomic.hpp index cf0f25969d..c2268bd35f 100644 --- a/lib/kokkos/core/src/Kokkos_Atomic.hpp +++ b/lib/kokkos/core/src/Kokkos_Atomic.hpp @@ -90,6 +90,7 @@ #if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \ ! defined( KOKKOS_ENABLE_INTEL_ATOMICS ) && \ ! defined( KOKKOS_ENABLE_OPENMP_ATOMICS ) && \ + ! defined( KOKKOS_ENABLE_STD_ATOMICS ) && \ ! defined( KOKKOS_ENABLE_SERIAL_ATOMICS ) // Compiling for non-Cuda atomic implementation has not been pre-selected. @@ -168,6 +169,12 @@ const char * atomic_query_version() } // namespace Kokkos +//---------------------------------------------------------------------------- +// Atomic Memory Orders +// +// Implements Strongly-typed analogs of C++ standard memory orders +#include "impl/Kokkos_Atomic_Memory_Order.hpp" + #if defined( KOKKOS_ENABLE_ROCM ) namespace Kokkos { namespace Impl { @@ -287,6 +294,14 @@ void unlock_address_rocm_space(void* ptr); #ifndef _WIN32 #include "impl/Kokkos_Atomic_Generic.hpp" #endif + +//---------------------------------------------------------------------------- +// Provide atomic loads and stores with memory order semantics + +#include "impl/Kokkos_Atomic_Load.hpp" +#include "impl/Kokkos_Atomic_Store.hpp" + + //---------------------------------------------------------------------------- // This atomic-style macro should be an inlined function, not a macro diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp index 08cbba3b31..a3ada5d55e 100644 --- a/lib/kokkos/core/src/Kokkos_Complex.hpp +++ b/lib/kokkos/core/src/Kokkos_Complex.hpp @@ -631,8 +631,10 @@ RealType real (const complex& x) { template KOKKOS_INLINE_FUNCTION RealType abs (const complex& x) { - // FIXME (mfh 31 Oct 2014) Scale to avoid unwarranted overflow. - return std::sqrt (real (x) * real (x) + imag (x) * imag (x)); +#ifndef __CUDA_ARCH__ + using std::hypot; +#endif + return hypot(x.real(),x.imag()); } //! Power of a complex number diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp index 117469b0a2..98ae141de4 100644 --- a/lib/kokkos/core/src/Kokkos_Concepts.hpp +++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp @@ -79,6 +79,45 @@ struct IndexType using type = T; }; +namespace Experimental { + struct WorkItemProperty { + template + struct ImplWorkItemProperty { + static const unsigned value = Property; + using work_item_property = ImplWorkItemProperty; + }; + + constexpr static const ImplWorkItemProperty<0> None = ImplWorkItemProperty<0>(); + constexpr static const ImplWorkItemProperty<1> HintLightWeight = ImplWorkItemProperty<1>(); + constexpr static const ImplWorkItemProperty<2> HintHeavyWeight = ImplWorkItemProperty<2>(); + constexpr static const ImplWorkItemProperty<4> HintRegular = ImplWorkItemProperty<4>(); + constexpr static const ImplWorkItemProperty<8> HintIrregular = ImplWorkItemProperty<8>(); + typedef ImplWorkItemProperty<0> None_t; + typedef ImplWorkItemProperty<1> HintLightWeight_t; + typedef ImplWorkItemProperty<2> HintHeavyWeight_t; + typedef ImplWorkItemProperty<4> HintRegular_t; + typedef ImplWorkItemProperty<8> HintIrregular_t; + }; + +template +inline constexpr WorkItemProperty::ImplWorkItemProperty operator | + (WorkItemProperty::ImplWorkItemProperty, WorkItemProperty::ImplWorkItemProperty) { + return WorkItemProperty::ImplWorkItemProperty(); +} + +template +inline constexpr WorkItemProperty::ImplWorkItemProperty operator & + (WorkItemProperty::ImplWorkItemProperty, WorkItemProperty::ImplWorkItemProperty) { + return WorkItemProperty::ImplWorkItemProperty(); +} + +template +inline constexpr bool operator == (WorkItemProperty::ImplWorkItemProperty, WorkItemProperty::ImplWorkItemProperty) { + return pv1 == pv2; +} + +} + /**\brief Specify Launch Bounds for CUDA execution. * * If no launch bounds specified then do not set launch bounds. @@ -105,9 +144,13 @@ namespace Kokkos { template< typename T > struct is_ ## CONCEPT { \ private: \ template< typename , typename = std::true_type > struct have : std::false_type {}; \ - template< typename U > struct have::type, \ - typename std::remove_cv::type \ + template< typename U > struct have::type, \ + typename std::remove_cv::type \ + >::type> : std::true_type {}; \ + template< typename U > struct have::type, \ + typename std::remove_cv::type \ >::type> : std::true_type {}; \ public: \ enum { value = is_ ## CONCEPT::template have::value }; \ @@ -121,6 +164,9 @@ KOKKOS_IMPL_IS_CONCEPT( execution_space ) KOKKOS_IMPL_IS_CONCEPT( execution_policy ) KOKKOS_IMPL_IS_CONCEPT( array_layout ) KOKKOS_IMPL_IS_CONCEPT( reducer ) +namespace Experimental { +KOKKOS_IMPL_IS_CONCEPT( work_item_property ) +} namespace Impl { @@ -138,6 +184,8 @@ KOKKOS_IMPL_IS_CONCEPT( iteration_pattern ) KOKKOS_IMPL_IS_CONCEPT( schedule_type ) KOKKOS_IMPL_IS_CONCEPT( index_type ) KOKKOS_IMPL_IS_CONCEPT( launch_bounds ) +KOKKOS_IMPL_IS_CONCEPT( thread_team_member ) +KOKKOS_IMPL_IS_CONCEPT( host_thread_team_member ) } diff --git a/lib/kokkos/core/src/Kokkos_CopyViews.hpp b/lib/kokkos/core/src/Kokkos_CopyViews.hpp index 31605c9d39..f919fdb755 100644 --- a/lib/kokkos/core/src/Kokkos_CopyViews.hpp +++ b/lib/kokkos/core/src/Kokkos_CopyViews.hpp @@ -186,9 +186,9 @@ struct ViewFill typedef Kokkos::RangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-1D",policy_type(0,a.extent(0)),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -206,10 +206,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-2D", policy_type({0,0},{a.extent(0),a.extent(1)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -227,10 +227,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-3D", policy_type({0,0,0},{a.extent(0),a.extent(1),a.extent(2)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -248,10 +248,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-4D", policy_type({0,0,0,0},{a.extent(0),a.extent(1),a.extent(2),a.extent(3)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -269,10 +269,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-5D", policy_type({0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2),a.extent(3),a.extent(4)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -290,10 +290,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-6D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2),a.extent(3),a.extent(4),a.extent(5)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -311,11 +311,11 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-7D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2),a.extent(3), a.extent(5),a.extent(6)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -335,11 +335,11 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-8D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(3), a.extent(5),a.extent(6),a.extent(7)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -437,10 +437,10 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-1D", policy_type(0,a.extent(0)),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -459,10 +459,10 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-2D", policy_type({0,0},{a.extent(0),a.extent(1)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -482,10 +482,10 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-3D", policy_type({0,0,0},{a.extent(0),a.extent(1),a.extent(2)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -505,11 +505,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-4D", policy_type({0,0,0,0},{a.extent(0),a.extent(1),a.extent(2), a.extent(3)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -530,11 +530,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-5D", policy_type({0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2), a.extent(3),a.extent(4)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -555,11 +555,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-6D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2), a.extent(3),a.extent(4),a.extent(5)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -581,11 +581,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-7D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(3), a.extent(4),a.extent(5),a.extent(6)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -607,11 +607,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-8D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(3), a.extent(5),a.extent(6),a.extent(7)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -1538,6 +1538,779 @@ void deep_copy } } +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +namespace Experimental { +/** \brief A local deep copy between views of the default specialization, compatible type, + * same non-zero rank. + */ +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(const TeamType& team, const View & dst, const View & src) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, src.span()), [&] (const int& i) { + dst.data()[i] = src.data()[i]; + }); +} +//---------------------------------------------------------------------------- +template< class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(const View & dst, const View & src) { + + for(size_t i=0;i +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 1 && + unsigned(ViewTraits::rank) == 1 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0); + + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + dst(i) = src(i); + }); + team.team_barrier(); +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 2 && + unsigned(ViewTraits::rank) == 2 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int i1 = i/dst.extent(0); + dst(i0,i1) = src(i0,i1); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 3 && + unsigned(ViewTraits::rank) == 3 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + int i2 = itmp/dst.extent(1); + dst(i0,i1,i2) = src(i0,i1,i2); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 4 && + unsigned(ViewTraits::rank) == 4 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + int i3 = itmp/dst.extent(2); + dst(i0,i1,i2,i3) = src(i0,i1,i2,i3); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 5 && + unsigned(ViewTraits::rank) == 5 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + int i4 = itmp/dst.extent(3); + dst(i0,i1,i2,i3,i4) = src(i0,i1,i2,i3,i4); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 6 && + unsigned(ViewTraits::rank) == 6 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4)*dst.extent(5); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + itmp = itmp/dst.extent(3); + int i4 = itmp%dst.extent(4); + int i5 = itmp/dst.extent(4); + dst(i0,i1,i2,i3,i4,i5) = src(i0,i1,i2,i3,i4,i5); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 7 && + unsigned(ViewTraits::rank) == 7 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4)*dst.extent(5)*dst.extent(6); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + itmp = itmp/dst.extent(3); + int i4 = itmp%dst.extent(4); + itmp = itmp/dst.extent(4); + int i5 = itmp%dst.extent(5); + int i6 = itmp/dst.extent(5); + dst(i0,i1,i2,i3,i4,i5,i6) = src(i0,i1,i2,i3,i4,i5,i6); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 1 && + unsigned(ViewTraits::rank) == 1 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0); + + + for(size_t i=0;i +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 2 && + unsigned(ViewTraits::rank) == 2 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 3 && + unsigned(ViewTraits::rank) == 3 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 4 && + unsigned(ViewTraits::rank) == 4 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 5 && + unsigned(ViewTraits::rank) == 5 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 6 && + unsigned(ViewTraits::rank) == 6 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 7 && + unsigned(ViewTraits::rank) == 7 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(const TeamType& team, const View & dst, typename ViewTraits::const_value_type & value) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, dst.span()), [&] (const int& i) { + dst.data()[i] = value; + }); +} +//---------------------------------------------------------------------------- +template< class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(const View & dst, typename ViewTraits::const_value_type & value) { + + for(size_t i=0;i +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 1 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0); + + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + dst(i) = value; + }); + team.team_barrier(); +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 2 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int i1 = i/dst.extent(0); + dst(i0,i1) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 3 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + int i2 = itmp/dst.extent(1); + dst(i0,i1,i2) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 4 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + int i3 = itmp/dst.extent(2); + dst(i0,i1,i2,i3) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 5 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + int i4 = itmp/dst.extent(3); + dst(i0,i1,i2,i3,i4) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 6 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4)*dst.extent(5); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + itmp = itmp/dst.extent(3); + int i4 = itmp%dst.extent(4); + int i5 = itmp/dst.extent(4); + dst(i0,i1,i2,i3,i4,i5) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 7 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4)*dst.extent(5)*dst.extent(6); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + itmp = itmp/dst.extent(3); + int i4 = itmp%dst.extent(4); + itmp = itmp/dst.extent(4); + int i5 = itmp%dst.extent(5); + int i6 = itmp/dst.extent(5); + dst(i0,i1,i2,i3,i4,i5,i6) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 1 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0); + + + for(size_t i=0;i +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 2 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 3 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 4 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 5 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 6 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 7 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0::value_type >::value , "deep_copy requires non-const type" ); - ExecSpace::fence(); + ExecSpace().fence(); typedef typename View::uniform_runtime_nomemspace_type ViewTypeUniform; Kokkos::Impl::ViewFill< ViewTypeUniform >( dst , value ); - ExecSpace::fence(); + ExecSpace().fence(); } /** \brief Deep copy into a value in Host memory from a view. */ @@ -2184,6 +2957,25 @@ create_mirror_view_and_copy(const Space& , const Kokkos::View & src deep_copy(mirror, src); return mirror; } + +// Create a mirror view in a new space without initializing (specialization for same space) +template +typename Impl::MirrorViewType::view_type +create_mirror_view(const Space& , const Kokkos::View & src + , Kokkos::Impl::WithoutInitializing_t + , typename std::enable_if::is_same_memspace>::type* = 0 ) { + return src; +} + +// Create a mirror view in a new space without initializing (specialization for different space) +template +typename Impl::MirrorViewType::view_type +create_mirror_view(const Space& , const Kokkos::View & src + , Kokkos::Impl::WithoutInitializing_t + , typename std::enable_if::is_same_memspace>::type* = 0 ) { + using Mirror = typename Impl::MirrorViewType::view_type; + return Mirror(Kokkos::ViewAllocateWithoutInitializing(src.label()), src.layout()); +} } /* namespace Kokkos */ diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp index 4d0625ee1b..9fbba0abfa 100644 --- a/lib/kokkos/core/src/Kokkos_Core.hpp +++ b/lib/kokkos/core/src/Kokkos_Core.hpp @@ -66,6 +66,10 @@ #include #endif +#if defined( KOKKOS_ENABLE_HPX ) +#include +#endif + #if defined( KOKKOS_ENABLE_THREADS ) #include #endif @@ -87,6 +91,7 @@ #include #include #include +#include #include diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp index 150865d0f5..55c6a5494a 100644 --- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp +++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -100,6 +100,12 @@ class Serial; ///< Execution space main process on CPU. class Qthreads; ///< Execution space with Qthreads back-end. #endif +#if defined( KOKKOS_ENABLE_HPX ) +namespace Experimental { +class HPX; ///< Execution space with HPX back-end. +} +#endif + #if defined( KOKKOS_ENABLE_THREADS ) class Threads; ///< Execution space with pthreads back-end. #endif @@ -156,6 +162,8 @@ namespace Kokkos { typedef Threads DefaultExecutionSpace; //#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) // typedef Qthreads DefaultExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX ) + typedef Kokkos::Experimental::HPX DefaultExecutionSpace; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) typedef Serial DefaultExecutionSpace; #else @@ -176,6 +184,8 @@ namespace Kokkos { typedef Threads DefaultHostExecutionSpace; //#elif defined( KOKKOS_ENABLE_QTHREADS ) // typedef Qthreads DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_HPX ) + typedef Kokkos::Experimental::HPX DefaultHostExecutionSpace; #elif defined( KOKKOS_ENABLE_SERIAL ) typedef Serial DefaultHostExecutionSpace; #else diff --git a/lib/kokkos/core/src/Kokkos_Crs.hpp b/lib/kokkos/core/src/Kokkos_Crs.hpp index ccc3944d86..8412ced921 100644 --- a/lib/kokkos/core/src/Kokkos_Crs.hpp +++ b/lib/kokkos/core/src/Kokkos_Crs.hpp @@ -187,7 +187,7 @@ class GetCrsTransposeCounts { using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this, policy_type(0, index_type(in.entries.size()))); closure.execute(); - execution_space::fence(); + execution_space().fence(); } }; @@ -266,7 +266,7 @@ class FillCrsTransposeEntries { using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this, policy_type(0, index_type(in.numRows()))); closure.execute(); - execution_space::fence(); + execution_space().fence(); } }; diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp index 726a574961..4eb8ab4d4b 100644 --- a/lib/kokkos/core/src/Kokkos_Cuda.hpp +++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp @@ -52,6 +52,7 @@ #include #include +#include #include #include @@ -67,6 +68,7 @@ namespace Kokkos { namespace Impl { class CudaExec ; +class CudaInternal ; } // namespace Impl } // namespace Kokkos @@ -74,6 +76,23 @@ class CudaExec ; namespace Kokkos { +namespace Impl { + namespace Experimental { + enum class CudaLaunchMechanism:unsigned{Default=0,ConstantMemory=1,GlobalMemory=2,LocalMemory=4}; + + constexpr inline CudaLaunchMechanism operator | (CudaLaunchMechanism p1, CudaLaunchMechanism p2) { + return static_cast(static_cast(p1) | static_cast(p2)); + } + constexpr inline CudaLaunchMechanism operator & (CudaLaunchMechanism p1, CudaLaunchMechanism p2) { + return static_cast(static_cast(p1) & static_cast(p2)); + } + + template + struct CudaDispatchProperties { + CudaLaunchMechanism launch_mechanism = l; + }; + } +} /// \class Cuda /// \brief Kokkos Execution Space that uses CUDA to run on GPUs. /// @@ -153,7 +172,13 @@ public: /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. + static void impl_static_fence(); + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence(); + #else + void fence() const; + #endif /** \brief Return the maximum amount of concurrency. */ static int concurrency(); @@ -165,15 +190,18 @@ public: //-------------------------------------------------- //! \name Cuda space instances + KOKKOS_INLINE_FUNCTION ~Cuda() {} + Cuda(); - explicit Cuda( const int instance_id ); Cuda( Cuda && ) = default ; Cuda( const Cuda & ) = default ; Cuda & operator = ( Cuda && ) = default ; Cuda & operator = ( const Cuda & ) = default ; + Cuda(cudaStream_t stream); + //-------------------------------------------------------------------------- //! \name Device-specific functions //@{ @@ -219,18 +247,18 @@ public: */ static std::vector detect_device_arch(); - cudaStream_t cuda_stream() const { return m_stream ; } - int cuda_device() const { return m_device ; } + cudaStream_t cuda_stream() const; + int cuda_device() const; //@} //-------------------------------------------------------------------------- static const char* name(); + inline Impl::CudaInternal* impl_internal_space_instance() const { return m_space_instance; } private: - int m_device ; - cudaStream_t m_stream ; + Impl::CudaInternal* m_space_instance; }; } // namespace Kokkos @@ -302,7 +330,8 @@ struct VerifyExecutionCanAccessMemorySpace /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ -#include +#include +#include #include #include #include diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp index d4693b43c1..5c85850fda 100644 --- a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -46,12 +46,14 @@ #include #include -#include #include #include #include #include #include +#if defined(KOKKOS_ENABLE_PROFILING) +#include +#endif // KOKKOS_ENABLE_PROFILING //---------------------------------------------------------------------------- @@ -91,8 +93,9 @@ template class RangePolicy : public Impl::PolicyTraits { -private: +public: typedef Impl::PolicyTraits traits; +private: typename traits::execution_space m_space ; typename traits::index_type m_begin ; @@ -100,6 +103,9 @@ private: typename traits::index_type m_granularity ; typename traits::index_type m_granularity_mask ; + template + friend class RangePolicy; + public: //! Tag this class as an execution policy typedef RangePolicy execution_policy; @@ -118,6 +124,15 @@ public: RangePolicy(const RangePolicy&) = default; RangePolicy(RangePolicy&&) = default; + template + RangePolicy(const RangePolicy p) { + m_space = p.m_space; + m_begin = p.m_begin; + m_end = p.m_end; + m_granularity = p.m_granularity; + m_granularity_mask = p.m_granularity_mask; + } + inline RangePolicy() : m_space(), m_begin(0), m_end(0) {} /** \brief Total range */ @@ -523,19 +538,22 @@ class TeamPolicy: public typename Impl::PolicyTraits::execution_space, Properties ...> internal_policy; - typedef Impl::PolicyTraits traits; + template + friend class TeamPolicy; public: + typedef Impl::PolicyTraits traits; + typedef TeamPolicy execution_policy; TeamPolicy& operator = (const TeamPolicy&) = default; /** \brief Construct policy with the given instance of the execution space */ - TeamPolicy( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 ) - : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {first_arg = false;} + TeamPolicy( const typename traits::execution_space & space_ , int league_size_request , int team_size_request , int vector_length_request = 1 ) + : internal_policy(space_,league_size_request,team_size_request, vector_length_request) {first_arg = false;} - TeamPolicy( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 ) - : internal_policy(typename traits::execution_space(),league_size_request,Kokkos::AUTO(), vector_length_request) {first_arg = false;} + TeamPolicy( const typename traits::execution_space & space_, int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 ) + : internal_policy(space_,league_size_request,Kokkos::AUTO(), vector_length_request) {first_arg = false;} /** \brief Construct policy with the default instance of the execution space */ TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 ) @@ -618,6 +636,11 @@ public: } #endif + template + TeamPolicy(const TeamPolicy p):internal_policy(p) { + first_arg = p.first_arg; + } + private: bool first_arg; TeamPolicy(const internal_policy& p):internal_policy(p) {first_arg = false;} @@ -754,6 +777,59 @@ public: {} }; +template +struct TeamVectorRangeBoundariesStruct { +private: + + KOKKOS_INLINE_FUNCTION static + iType ibegin( const iType & arg_begin + , const iType & arg_end + , const iType & arg_rank + , const iType & arg_size + ) + { + return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ; + } + + KOKKOS_INLINE_FUNCTION static + iType iend( const iType & arg_begin + , const iType & arg_end + , const iType & arg_rank + , const iType & arg_size + ) + { + const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 ); + return end_ < arg_end ? end_ : arg_end ; + } + +public: + + typedef iType index_type; + const iType start; + const iType end; + enum {increment = 1}; + const TeamMemberType& thread; + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct( const TeamMemberType& arg_thread + , const iType& arg_end + ) + : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , thread( arg_thread ) + {} + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct( const TeamMemberType& arg_thread + , const iType& arg_begin + , const iType& arg_end + ) + : start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , thread( arg_thread ) + {} +}; + template struct ThreadVectorRangeBoundariesStruct { typedef iType index_type; @@ -804,10 +880,10 @@ struct VectorSingleStruct { * This policy is used together with a parallel pattern as a nested layer within a kernel launched * with the TeamPolicy. This variant expects a single count. So the range is (0,count]. */ -template -KOKKOS_INLINE_FUNCTION +template +KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct -TeamThreadRange( const TeamMemberType&, const iType& count ); +TeamThreadRange( const TeamMemberType&, const iType& count ) = delete; /** \brief Execution policy for parallel work over a threads within a team. * @@ -815,10 +891,32 @@ TeamThreadRange( const TeamMemberType&, const iType& count ); * This policy is used together with a parallel pattern as a nested layer within a kernel launched * with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end]. */ -template -KOKKOS_INLINE_FUNCTION +template +KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct::type, TeamMemberType> -TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end ); +TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end ) = delete; + +/** \brief Execution policy for parallel work over a threads within a team. + * + * The range is split over all threads in a team. The Mapping scheme depends on the architecture. + * This policy is used together with a parallel pattern as a nested layer within a kernel launched + * with the TeamPolicy. This variant expects a single count. So the range is (0,count]. + */ +template +KOKKOS_INLINE_FUNCTION_DELETED +Impl::TeamThreadRangeBoundariesStruct +TeamVectorRange( const TeamMemberType&, const iType& count ) = delete; + +/** \brief Execution policy for parallel work over a threads within a team. + * + * The range is split over all threads in a team. The Mapping scheme depends on the architecture. + * This policy is used together with a parallel pattern as a nested layer within a kernel launched + * with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end]. + */ +template +KOKKOS_INLINE_FUNCTION_DELETED +Impl::TeamThreadRangeBoundariesStruct::type, TeamMemberType> +TeamVectorRange( const TeamMemberType&, const iType1& begin, const iType2& end ) = delete; /** \brief Execution policy for a vector parallel loop. * @@ -826,15 +924,15 @@ TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end ) * This policy is used together with a parallel pattern as a nested layer within a kernel launched * with the TeamPolicy. This variant expects a single count. So the range is (0,count]. */ -template -KOKKOS_INLINE_FUNCTION +template +KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange( const TeamMemberType&, const iType& count ); +ThreadVectorRange( const TeamMemberType&, const iType& count ) = delete; -template -KOKKOS_INLINE_FUNCTION +template +KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange( const TeamMemberType&, const iType& arg_begin, const iType& arg_end ); +ThreadVectorRange( const TeamMemberType&, const iType& arg_begin, const iType& arg_end ) = delete; #if defined(KOKKOS_ENABLE_PROFILING) namespace Impl { @@ -877,5 +975,44 @@ struct ParallelConstructName { } // namespace Kokkos +namespace Kokkos { +namespace Experimental { + +namespace Impl { + template + struct PolicyPropertyAdaptor; + + template + struct PolicyPropertyAdaptor,RangePolicy> { + typedef RangePolicy policy_in_t; + typedef RangePolicy> policy_out_t; + }; + + template + struct PolicyPropertyAdaptor,TeamPolicy> { + typedef TeamPolicy policy_in_t; + typedef TeamPolicy> policy_out_t; + }; +} + +template +constexpr typename Impl::PolicyPropertyAdaptor,PolicyType>::policy_out_t + require(const PolicyType p, WorkItemProperty::ImplWorkItemProperty

){ + return typename Impl::PolicyPropertyAdaptor,PolicyType>::policy_out_t(p); +} +} //Experimental +} //Kokkos #endif /* #define KOKKOS_EXECPOLICY_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_Extents.hpp b/lib/kokkos/core/src/Kokkos_Extents.hpp new file mode 100644 index 0000000000..c8b9110485 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Extents.hpp @@ -0,0 +1,186 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_EXTENTS_HPP +#define KOKKOS_KOKKOS_EXTENTS_HPP + +#include + +namespace Kokkos { +namespace Experimental { + +constexpr ptrdiff_t dynamic_extent = -1; + +template +struct Extents { + /* TODO @enhancement flesh this out more */ +}; + +template +struct PrependExtent; + +template +struct PrependExtent< + Extents, NewExtent +> { + using type = Extents; +}; + +template +struct AppendExtent; + +template +struct AppendExtent< + Extents, NewExtent +> { + using type = Extents; +}; + +} // end namespace Experimental + +namespace Impl { + +namespace _parse_view_extents_impl { + +template +struct _all_remaining_extents_dynamic : std::true_type { }; + +template +struct _all_remaining_extents_dynamic + : _all_remaining_extents_dynamic +{ }; + +template +struct _all_remaining_extents_dynamic + : std::false_type +{ }; + +template +struct _parse_impl { + using type = Result; +}; + +// We have to treat the case of int**[x] specially, since it *doesn't* go backwards +template +struct _parse_impl< + T*, Experimental::Extents, + typename std::enable_if<_all_remaining_extents_dynamic::value>::type +> + : _parse_impl< + T, Experimental::Extents + > +{ }; + +// int*(*[x])[y] should still work also (meaning int[][x][][y]) +template +struct _parse_impl< + T*, Experimental::Extents, + typename std::enable_if::value>::type +> +{ + using _next = Kokkos::Experimental::AppendExtent< + typename _parse_impl, void>::type, + Experimental::dynamic_extent + >; + using type = typename _next::type; +}; + +template +struct _parse_impl< + T[N], Experimental::Extents, void +> + : _parse_impl< + T, Experimental::Extents // TODO @pedantic this could be a narrowing cast + > +{ }; + +} // end namespace _parse_view_extents_impl + +template +struct ParseViewExtents { + using type = + typename _parse_view_extents_impl + ::_parse_impl>::type; +}; + +template +struct ApplyExtent +{ + using type = ValueType[Ext]; +}; + +template +struct ApplyExtent +{ + using type = ValueType*; +}; + +template +struct ApplyExtent +{ + using type = typename ApplyExtent::type[N]; +}; + +template +struct ApplyExtent +{ + using type = ValueType*[Ext]; +}; + +template +struct ApplyExtent +{ + using type = typename ApplyExtent::type*; +}; + +template +struct ApplyExtent +{ + using type = typename ApplyExtent::type[N]; +}; + +} // end namespace Impl + +} // end namespace Kokkos + +#endif //KOKKOS_KOKKOS_EXTENTS_HPP diff --git a/lib/kokkos/core/src/Kokkos_Future.hpp b/lib/kokkos/core/src/Kokkos_Future.hpp new file mode 100644 index 0000000000..665ce71cf5 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Future.hpp @@ -0,0 +1,567 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_FUTURE_HPP +#define KOKKOS_FUTURE_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include +//---------------------------------------------------------------------------- + +#include +#include +#include +#include + +#include // is_space + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// For now, hack this in as a partial specialization +// TODO @tasking @cleanup Make this the "normal" class template and make the old code the specialization +template +class BasicFuture> +{ +public: + + using value_type = ValueType; + using execution_space = ExecutionSpace; + using scheduler_type = SimpleTaskScheduler; + using queue_type = typename scheduler_type::task_queue_type; + + +private: + + template + friend class SimpleTaskScheduler; + template + friend class BasicFuture; + + using task_base_type = typename scheduler_type::task_base_type; + using task_queue_type = typename scheduler_type::task_queue_type; + + using task_queue_traits = typename scheduler_type::task_queue_traits; + using task_scheduling_info_type = typename scheduler_type::task_scheduling_info_type; + + using result_storage_type = + Impl::TaskResultStorage< + ValueType, + Impl::SchedulingInfoStorage< + Impl::RunnableTaskBase, + task_scheduling_info_type + > + >; + + + + OwningRawPtr m_task = nullptr; + + KOKKOS_INLINE_FUNCTION + explicit + BasicFuture(task_base_type* task) + : m_task(task) + { + // Note: reference count starts at 2 to account for initial increment + // TODO @tasking @minor DSH verify reference count here and/or encapsulate starting reference count closer to here + } + +public: + + KOKKOS_INLINE_FUNCTION + BasicFuture() noexcept : m_task(nullptr) { } + + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture&& rhs) noexcept + : m_task(std::move(rhs.m_task)) + { + rhs.m_task = nullptr; + } + + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture const& rhs) + // : m_task(rhs.m_task) + : m_task(nullptr) + { + *static_cast(&m_task) = rhs.m_task; + if(m_task) m_task->increment_reference_count(); + } + + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture&& rhs) noexcept + { + if(m_task != rhs.m_task) { + clear(); + //m_task = std::move(rhs.m_task); + *static_cast(&m_task) = rhs.m_task; + // rhs.m_task reference count is unchanged, since this is a move + } + else { + // They're the same, but this is a move, so 1 fewer references now + rhs.clear(); + } + rhs.m_task = nullptr; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture const& rhs) + { + if(m_task != rhs.m_task) { + clear(); + //m_task = rhs.m_task; + *static_cast(&m_task) = rhs.m_task; + } + if(m_task != nullptr) { m_task->increment_reference_count(); } + return *this; + } + + //---------------------------------------- + + template + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture&& rhs) noexcept // NOLINT(google-explicit-constructor) + : m_task(std::move(rhs.m_task)) + { + static_assert( + std::is_same::value || + std::is_same::value, + "Moved Futures must have the same scheduler" + ); + + static_assert( + std::is_same::value || + std::is_same::value, + "Moved Futures must have the same value_type" + ); + + // reference counts are unchanged, since this is a move + rhs.m_task = nullptr; + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture const& rhs) // NOLINT(google-explicit-constructor) + //: m_task(rhs.m_task) + : m_task(nullptr) + { + static_assert( + std::is_same::value || + std::is_same::value, + "Copied Futures must have the same scheduler" + ); + + static_assert( + std::is_same::value || + std::is_same::value, + "Copied Futures must have the same value_type" + ); + + *static_cast(&m_task) = rhs.m_task; + if(m_task) m_task->increment_reference_count(); + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture& + operator=(BasicFuture const& rhs) + { + static_assert( + std::is_same::value || + std::is_same::value, + "Assigned Futures must have the same scheduler" + ); + + static_assert( + std::is_same::value || + std::is_same::value, + "Assigned Futures must have the same value_type" + ); + + if(m_task != rhs.m_task) { + clear(); + //m_task = rhs.m_task; + *static_cast(&m_task) = rhs.m_task; + if(m_task != nullptr) { m_task->increment_reference_count(); } + } + return *this; + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture&& rhs) + { + static_assert( + std::is_same::value || + std::is_same::value, + "Assigned Futures must have the same scheduler" + ); + + static_assert( + std::is_same::value || + std::is_same::value, + "Assigned Futures must have the same value_type" + ); + + if(m_task != rhs.m_task) { + clear(); + //m_task = std::move(rhs.m_task); + *static_cast(&m_task) = rhs.m_task; + // rhs.m_task reference count is unchanged, since this is a move + } + else { + // They're the same, but this is a move, so 1 fewer references now + rhs.clear(); + } + rhs.m_task = nullptr; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + ~BasicFuture() noexcept { clear(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + void clear() noexcept { + if(m_task) { + bool should_delete = m_task->decrement_and_check_reference_count(); + if(should_delete) { + static_cast(m_task->ready_queue_base_ptr()) + ->deallocate(std::move(*m_task)); + } + } + //m_task = nullptr; + *static_cast(&m_task) = nullptr; + } + + KOKKOS_INLINE_FUNCTION + bool is_null() const noexcept { + return m_task == nullptr; + } + + + KOKKOS_INLINE_FUNCTION + bool is_ready() const noexcept { + return (m_task == nullptr) || m_task->wait_queue_is_consumed(); + } + + KOKKOS_INLINE_FUNCTION + const typename Impl::TaskResult< ValueType >::reference_type + get() const + { + KOKKOS_EXPECTS(is_ready()); + return static_cast(m_task)->value_reference(); + //return Impl::TaskResult::get(m_task); + } + +}; + +//////////////////////////////////////////////////////////////////////////////// +// OLD CODE +//////////////////////////////////////////////////////////////////////////////// + +template +class BasicFuture { +private: + + template< typename , typename > friend class BasicTaskScheduler ; + template< typename , typename > friend class BasicFuture ; + friend class Impl::TaskBase ; + template< typename , typename , typename > friend class Impl::Task ; + + + //---------------------------------------- + +public: + + //---------------------------------------- + + using scheduler_type = Scheduler; + using queue_type = typename scheduler_type::queue_type; + using execution_space = typename scheduler_type::execution_space; + using value_type = ValueType; + + //---------------------------------------- + +private: + + //---------------------------------------- + + using task_base = Impl::TaskBase; + + task_base * m_task ; + + KOKKOS_INLINE_FUNCTION explicit + BasicFuture( task_base * task ) : m_task(0) + { if ( task ) queue_type::assign( & m_task , task ); } + + //---------------------------------------- + +public: + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + bool is_null() const { return 0 == m_task ; } + + KOKKOS_INLINE_FUNCTION + int reference_count() const + { return 0 != m_task ? m_task->reference_count() : 0 ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + void clear() + { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + ~BasicFuture() { clear(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + BasicFuture() noexcept : m_task(nullptr) { } + + KOKKOS_INLINE_FUNCTION + BasicFuture( BasicFuture && rhs ) noexcept + : m_task( rhs.m_task ) + { + rhs.m_task = 0; + } + + KOKKOS_INLINE_FUNCTION + BasicFuture( const BasicFuture & rhs ) + : m_task(0) + { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); } + + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture&& rhs) noexcept + { + clear(); + m_task = rhs.m_task ; + rhs.m_task = 0 ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture const& rhs) + { + if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + return *this ; + } + + //---------------------------------------- + + template + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture&& rhs) noexcept // NOLINT(google-explicit-constructor) + : m_task( rhs.m_task ) + { + static_assert + ( std::is_same::value || + std::is_same::value + , "Assigned Futures must have the same scheduler" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same::value + , "Assigned Futures must have the same value_type" ); + + rhs.m_task = 0 ; + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture const& rhs) // NOLINT(google-explicit-constructor) + : m_task(nullptr) + { + static_assert + ( std::is_same::value || + std::is_same::value + , "Assigned Futures must have the same scheduler" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same::value + , "Assigned Futures must have the same value_type" ); + + if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture& + operator=(BasicFuture const& rhs) + { + static_assert + ( std::is_same::value || + std::is_same::value + , "Assigned Futures must have the same scheduler" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same::value + , "Assigned Futures must have the same value_type" ); + + if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + return *this ; + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture&& rhs) + { + static_assert + ( std::is_same::value || + std::is_same::value + , "Assigned Futures must have the same scheduler" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same::value + , "Assigned Futures must have the same value_type" ); + + clear(); + m_task = rhs.m_task ; + rhs.m_task = 0 ; + return *this ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int is_ready() const noexcept + { return ( 0 == m_task ) || ( ((task_base*) task_base::LockTag) == m_task->m_wait ); } + + KOKKOS_INLINE_FUNCTION + const typename Impl::TaskResult< ValueType >::reference_type + get() const + { + if ( 0 == m_task ) { + Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()"); + } + return Impl::TaskResult< ValueType >::get( m_task ); + } +}; + +// Is a Future with the given execution space +template< typename , typename ExecSpace = void > +struct is_future : public std::false_type {}; + +template +struct is_future, ExecSpace> + : std::integral_constant::value + || std::is_void::value + > +{}; + +//////////////////////////////////////////////////////////////////////////////// +// END OLD CODE +//////////////////////////////////////////////////////////////////////////////// + +namespace Impl { + +template +class ResolveFutureArgOrder { +private: + enum { Arg1_is_space = Kokkos::is_space::value }; + enum { Arg2_is_space = Kokkos::is_space::value }; + enum { Arg1_is_value = !Arg1_is_space && !std::is_same::value }; + enum { Arg2_is_value = !Arg2_is_space && !std::is_same::value }; + + static_assert( + ! ( Arg1_is_space && Arg2_is_space ), + "Future cannot be given two spaces" + ); + + static_assert( + ! ( Arg1_is_value && Arg2_is_value ), + "Future cannot be given two value types" + ); + + using value_type = + typename std::conditional::type + >::type; + + using execution_space = + typename std::conditional::type + >::type::execution_space; + +public: + + using type = BasicFuture>; + +}; + +} // end namespace Impl + +/** + * + * Future< space > // value_type == void + * Future< value > // space == Default + * Future< value , space > + * + */ +template +using Future = typename Impl::ResolveFutureArgOrder::type; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_FUTURE */ diff --git a/lib/kokkos/core/src/Kokkos_HPX.hpp b/lib/kokkos/core/src/Kokkos_HPX.hpp new file mode 100644 index 0000000000..79a2b74da4 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_HPX.hpp @@ -0,0 +1,1999 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_HPP +#define KOKKOS_HPX_HPP + +#include +#if defined(KOKKOS_ENABLE_HPX) + +#include + +#include +#include +#include + +#ifdef KOKKOS_ENABLE_HBWSPACE +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +// There are currently two different implementations for the parallel dispatch +// functions: +// +// - 0: The HPX way. Unfortunately, this comes with unnecessary +// overheads at the moment, so there is +// - 1: The manual way. This way is more verbose and does not take advantage of +// e.g. parallel::for_loop in HPX but it is significantly faster in many +// benchmarks. +// +// In the long run 0 should be the preferred implementation, but until HPX is +// improved 1 will be the default. +#ifndef KOKKOS_HPX_IMPLEMENTATION +#define KOKKOS_HPX_IMPLEMENTATION 1 +#endif + +#if (KOKKOS_HPX_IMPLEMENTATION < 0) || (KOKKOS_HPX_IMPLEMENTATION > 1) +#error "You have chosen an invalid value for KOKKOS_HPX_IMPLEMENTATION" +#endif + +namespace Kokkos { +namespace Impl { +class thread_buffer { + static constexpr std::size_t m_cache_line_size = 64; + + std::size_t m_num_threads; + std::size_t m_size_per_thread; + std::size_t m_size_total; + char *m_data; + + void pad_to_cache_line(std::size_t &size) { + size = ((size + m_cache_line_size - 1) / m_cache_line_size) * + m_cache_line_size; + } + +public: + thread_buffer() + : m_num_threads(0), m_size_per_thread(0), m_size_total(0), + m_data(nullptr) {} + thread_buffer(const std::size_t num_threads, + const std::size_t size_per_thread) { + resize(num_threads, size_per_thread); + } + ~thread_buffer() { delete[] m_data; } + + thread_buffer(const thread_buffer &) = delete; + thread_buffer(thread_buffer &&) = delete; + thread_buffer &operator=(const thread_buffer &) = delete; + thread_buffer &operator=(thread_buffer) = delete; + + void resize(const std::size_t num_threads, + const std::size_t size_per_thread) { + m_num_threads = num_threads; + m_size_per_thread = size_per_thread; + + pad_to_cache_line(m_size_per_thread); + + std::size_t size_total_new = m_num_threads * m_size_per_thread; + + if (m_size_total < size_total_new) { + delete[] m_data; + m_data = new char[size_total_new]; + m_size_total = size_total_new; + } + } + + char *get(std::size_t thread_num) { + assert(thread_num < m_num_threads); + if (m_data == nullptr) { + return nullptr; + } + return &m_data[thread_num * m_size_per_thread]; + } + + std::size_t size_per_thread() const noexcept { return m_size_per_thread; } + std::size_t size_total() const noexcept { return m_size_total; } +}; +} // namespace Impl + +namespace Experimental { +class HPX { +private: + static bool m_hpx_initialized; + static Kokkos::Impl::thread_buffer m_buffer; +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + static hpx::future m_future; +#endif + +public: + using execution_space = HPX; + using memory_space = HostSpace; + using device_type = Kokkos::Device; + using array_layout = LayoutRight; + using size_type = memory_space::size_type; + using scratch_memory_space = ScratchMemorySpace; + + HPX() noexcept {} + static void print_configuration(std::ostream &, + const bool /* verbose */ = false) { + std::cout << "HPX backend" << std::endl; + } + + static bool in_parallel(HPX const & = HPX()) noexcept { return false; } + static void impl_static_fence(HPX const & = HPX()) + #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + { + if (hpx::threads::get_self_ptr() == nullptr) { + hpx::threads::run_as_hpx_thread([]() { impl_get_future().wait(); }); + } else { + impl_get_future().wait(); + } + } + #else + noexcept { + } + #endif + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE + static void fence(HPX const & = HPX()) { + #else + void fence() const { + #endif + impl_static_fence(); + } + + static bool is_asynchronous(HPX const & = HPX()) noexcept { +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + return true; +#else + return false; +#endif + } + + static std::vector partition(...) { + Kokkos::abort("Kokkos::Experimental::HPX::partition_master: can't partition an HPX " + "instance\n"); + return std::vector(); + } + + template + static void partition_master(F const &f, int requested_num_partitions = 0, + int requested_partition_size = 0) { + if (requested_num_partitions > 1) { + Kokkos::abort("Kokkos::Experimental::HPX::partition_master: can't partition an " + "HPX instance\n"); + } + } + + static int concurrency(); + static void impl_initialize(int thread_count); + static void impl_initialize(); + static bool impl_is_initialized() noexcept; + static void impl_finalize(); + + static int impl_thread_pool_size() noexcept { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + return 0; + } else { + if (hpx::threads::get_self_ptr() == nullptr) { + return hpx::resource::get_thread_pool(0).get_os_thread_count(); + } else { + return hpx::this_thread::get_pool()->get_os_thread_count(); + } + } + } + + static int impl_thread_pool_rank() noexcept { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + return 0; + } else { + if (hpx::threads::get_self_ptr() == nullptr) { + return 0; + } else { + return hpx::this_thread::get_pool()->get_pool_index(); + } + } + } + + static int impl_thread_pool_size(int depth) { + if (depth == 0) { + return impl_thread_pool_size(); + } else { + return 1; + } + } + + static int impl_max_hardware_threads() noexcept { + return hpx::threads::hardware_concurrency(); + } + + static int impl_hardware_thread_id() noexcept { + return hpx::get_worker_thread_num(); + } + + static Kokkos::Impl::thread_buffer &impl_get_buffer() noexcept { + return m_buffer; + } +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + static hpx::future &impl_get_future() noexcept { return m_future; } +#endif + + static constexpr const char *name() noexcept { return "HPX"; } +}; +} // namespace Experimental + +namespace Impl { +template +inline void dispatch_execute_task(Closure *closure) { +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + if (hpx::threads::get_self_ptr() == nullptr) { + hpx::threads::run_as_hpx_thread([closure]() { + hpx::future &fut = Kokkos::Experimental::HPX::impl_get_future(); + Closure closure_copy = *closure; + fut = fut.then([closure_copy](hpx::future &&) { + closure_copy.execute_task(); + }); + }); + } else { + hpx::future &fut = Kokkos::Experimental::HPX::impl_get_future(); + Closure closure_copy = *closure; + fut = fut.then( + [closure_copy](hpx::future &&) { closure_copy.execute_task(); }); + } +#else + if (hpx::threads::get_self_ptr() == nullptr) { + hpx::threads::run_as_hpx_thread([closure]() { closure->execute_task(); }); + } else { + closure->execute_task(); + } +#endif +} +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { +template <> +struct MemorySpaceAccess { + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + +template <> +struct VerifyExecutionCanAccessMemorySpace< + Kokkos::Experimental::HPX::memory_space, + Kokkos::Experimental::HPX::scratch_memory_space> { + enum { value = true }; + inline static void verify(void) {} + inline static void verify(const void *) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { +template <> class UniqueToken { +public: + using execution_space = HPX; + using size_type = int; + UniqueToken(execution_space const & = execution_space()) noexcept {} + + // NOTE: Currently this assumes that there is no oversubscription. + // hpx::get_num_worker_threads can't be used directly because it may yield + // it's task (problematic if called after hpx::get_worker_thread_num). + int size() const noexcept { return HPX::impl_max_hardware_threads(); } + int acquire() const noexcept { return HPX::impl_hardware_thread_id(); } + void release(int) const noexcept {} +}; + +template <> class UniqueToken { +public: + using execution_space = HPX; + using size_type = int; + UniqueToken(execution_space const & = execution_space()) noexcept {} + + // NOTE: Currently this assumes that there is no oversubscription. + // hpx::get_num_worker_threads can't be used directly because it may yield + // it's task (problematic if called after hpx::get_worker_thread_num). + int size() const noexcept { return HPX::impl_max_hardware_threads(); } + int acquire() const noexcept { return HPX::impl_hardware_thread_id(); } + void release(int) const noexcept {} +}; +} // namespace Experimental +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + +struct HPXTeamMember { +public: + using execution_space = Kokkos::Experimental::HPX; + using scratch_memory_space = + Kokkos::ScratchMemorySpace; + +private: + scratch_memory_space m_team_shared; + std::size_t m_team_shared_size; + + int m_league_size; + int m_league_rank; + int m_team_size; + int m_team_rank; + +public: + KOKKOS_INLINE_FUNCTION + const scratch_memory_space &team_shmem() const { + return m_team_shared.set_team_thread_mode(0, 1, 0); + } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space &team_scratch(const int) const { + return m_team_shared.set_team_thread_mode(0, 1, 0); + } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space &thread_scratch(const int) const { + return m_team_shared.set_team_thread_mode(0, team_size(), team_rank()); + } + + KOKKOS_INLINE_FUNCTION int league_rank() const noexcept { + return m_league_rank; + } + + KOKKOS_INLINE_FUNCTION int league_size() const noexcept { + return m_league_size; + } + + KOKKOS_INLINE_FUNCTION int team_rank() const noexcept { return m_team_rank; } + KOKKOS_INLINE_FUNCTION int team_size() const noexcept { return m_team_size; } + + template + constexpr KOKKOS_INLINE_FUNCTION + HPXTeamMember(const TeamPolicyInternal &policy, + const int team_rank, const int league_rank, void *scratch, + int scratch_size) noexcept + : m_team_shared(scratch, scratch_size, scratch, scratch_size), + m_team_shared_size(scratch_size), m_league_size(policy.league_size()), + m_league_rank(league_rank), m_team_size(policy.team_size()), + m_team_rank(team_rank) {} + + KOKKOS_INLINE_FUNCTION + void team_barrier() const {} + + template + KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType &, const int &) const { + static_assert(std::is_trivially_default_constructible(), + "Only trivial constructible types can be broadcasted"); + } + + template + KOKKOS_INLINE_FUNCTION void team_broadcast(const Closure &, ValueType &, + const int &) const { + static_assert(std::is_trivially_default_constructible(), + "Only trivial constructible types can be broadcasted"); + } + + template + KOKKOS_INLINE_FUNCTION ValueType team_reduce(const ValueType &value, + const JoinOp &) const { + return value; + } + + template + KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + team_reduce(const ReducerType &reducer) const {} + + template + KOKKOS_INLINE_FUNCTION Type + team_scan(const Type &value, Type *const global_accum = nullptr) const { + if (global_accum) { + Kokkos::atomic_fetch_add(global_accum, value); + } + + return 0; + } +}; + +template +class TeamPolicyInternal + : public PolicyTraits { + using traits = PolicyTraits; + + int m_league_size; + int m_team_size; + std::size_t m_team_scratch_size[2]; + std::size_t m_thread_scratch_size[2]; + int m_chunk_size; + +public: + using member_type = HPXTeamMember; + + // NOTE: Max size is 1 for simplicity. In most cases more than 1 is not + // necessary on CPU. Implement later if there is a need. + template + inline static int team_size_max(const FunctorType &) { + return 1; + } + + template + inline static int team_size_recommended(const FunctorType &) { + return 1; + } + + template + inline static int team_size_recommended(const FunctorType &, const int &) { + return 1; + } + + template + int team_size_max(const FunctorType &, const ParallelForTag &) const { + return 1; + } + + template + int team_size_max(const FunctorType &, const ParallelReduceTag &) const { + return 1; + } + template + int team_size_recommended(const FunctorType &, const ParallelForTag &) const { + return 1; + } + template + int team_size_recommended(const FunctorType &, + const ParallelReduceTag &) const { + return 1; + } + +private: + inline void init(const int league_size_request, const int team_size_request) { + m_league_size = league_size_request; + const int max_team_size = 1; // TODO: Can't use team_size_max(...) because + // it requires a functor as argument. + m_team_size = + team_size_request > max_team_size ? max_team_size : team_size_request; + + if (m_chunk_size > 0) { + if (!Impl::is_integral_power_of_two(m_chunk_size)) + Kokkos::abort("TeamPolicy blocking granularity must be power of two"); + } else { + int new_chunk_size = 1; + while (new_chunk_size * 4 * Kokkos::Experimental::HPX::concurrency() < + m_league_size) { + new_chunk_size *= 2; + } + + if (new_chunk_size < 128) { + new_chunk_size = 1; + while ((new_chunk_size * Kokkos::Experimental::HPX::concurrency() < + m_league_size) && + (new_chunk_size < 128)) + new_chunk_size *= 2; + } + + m_chunk_size = new_chunk_size; + } + } + +public: + inline int team_size() const { return m_team_size; } + inline int league_size() const { return m_league_size; } + + inline size_t scratch_size(const int &level, int team_size_ = -1) const { + if (team_size_ < 0) { + team_size_ = m_team_size; + } + return m_team_scratch_size[level] + + team_size_ * m_thread_scratch_size[level]; + } + +public: + template + friend class TeamPolicyInternal; + + template + TeamPolicyInternal( + const TeamPolicyInternal &p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + + TeamPolicyInternal(const typename traits::execution_space &, + int league_size_request, int team_size_request, + int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, + m_chunk_size(0) { + init(league_size_request, team_size_request); + } + + TeamPolicyInternal(const typename traits::execution_space &, + int league_size_request, + const Kokkos::AUTO_t &team_size_request, + int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, + m_chunk_size(0) { + init(league_size_request, 1); + } + + TeamPolicyInternal(int league_size_request, int team_size_request, + int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, + m_chunk_size(0) { + init(league_size_request, team_size_request); + } + + TeamPolicyInternal(int league_size_request, + const Kokkos::AUTO_t &team_size_request, + int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, + m_chunk_size(0) { + init(league_size_request, 1); + } + + inline int chunk_size() const { return m_chunk_size; } + + inline TeamPolicyInternal & + set_chunk_size(typename traits::index_type chunk_size_) { + m_chunk_size = chunk_size_; + return *this; + } + + inline TeamPolicyInternal &set_scratch_size(const int &level, + const PerTeamValue &per_team) { + m_team_scratch_size[level] = per_team.value; + return *this; + } + + inline TeamPolicyInternal & + set_scratch_size(const int &level, const PerThreadValue &per_thread) { + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + inline TeamPolicyInternal & + set_scratch_size(const int &level, const PerTeamValue &per_team, + const PerThreadValue &per_thread) { + m_team_scratch_size[level] = per_team.value; + m_thread_scratch_size[level] = per_thread.value; + return *this; + } +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + +template +class ParallelFor, + Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::RangePolicy; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + + const FunctorType m_functor; + const Policy m_policy; + + template + static typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Member i) { + functor(i); + } + + template + static typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Member i) { + const TagType t{}; + functor(t, i); + } + + template + static typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end) { + for (Member i = i_begin; i < i_end; ++i) { + functor(i); + } + } + + template + static typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end) { + const TagType t{}; + for (Member i = i_begin; i < i_end; ++i) { + functor(t, i); + } + } + +public: + void execute() const { Kokkos::Impl::dispatch_execute_task(this); } + + void execute_task() const { +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), + m_policy.begin(), m_policy.end(), [this](const Member i) { + execute_functor(m_functor, i); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (Member i_begin = m_policy.begin(); i_begin < m_policy.end(); + i_begin += m_policy.chunk_size()) { + apply([this, &sem, i_begin]() { + const Member i_end = + (std::min)(i_begin + m_policy.chunk_size(), m_policy.end()); + execute_functor_range(m_functor, i_begin, i_end); + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + } + + inline ParallelFor(const FunctorType &arg_functor, Policy arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +template +class ParallelFor, + Kokkos::Experimental::HPX> { +private: + using MDRangePolicy = Kokkos::MDRangePolicy; + using Policy = typename MDRangePolicy::impl_range_policy; + using WorkTag = typename MDRangePolicy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using iterate_type = + typename Kokkos::Impl::HostIterateTile; + + const FunctorType m_functor; + const MDRangePolicy m_mdr_policy; + const Policy m_policy; + +public: + void execute() const { dispatch_execute_task(this); } + + inline void execute_task() const { +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), + m_policy.begin(), m_policy.end(), [this](const Member i) { + iterate_type(m_mdr_policy, m_functor)(i); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (Member i_begin = m_policy.begin(); i_begin < m_policy.end(); + i_begin += m_policy.chunk_size()) { + apply([this, &sem, i_begin]() { + const Member i_end = + (std::min)(i_begin + m_policy.chunk_size(), m_policy.end()); + for (Member i = i_begin; i < i_end; ++i) { + iterate_type(m_mdr_policy, m_functor)(i); + } + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + } + + inline ParallelFor(const FunctorType &arg_functor, MDRangePolicy arg_policy) + : m_functor(arg_functor), m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { +template +class ParallelReduce, ReducerType, + Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::RangePolicy; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using Analysis = + FunctorAnalysis; + using ReducerConditional = + Kokkos::Impl::if_c::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using value_type = typename Analysis::value_type; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + bool m_force_synchronous; + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Member i, + reference_type update) { + functor(i, update); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Member i, + reference_type update) { + const TagType t{}; + functor(t, i, update); + } + + template + inline typename std::enable_if::value>::type + execute_functor_range(reference_type update, const Member i_begin, + const Member i_end) const { + for (Member i = i_begin; i < i_end; ++i) { + m_functor(i, update); + } + } + + template + inline typename std::enable_if::value>::type + execute_functor_range(reference_type update, const Member i_begin, + const Member i_end) const { + const TagType t{}; + + for (Member i = i_begin; i < i_end; ++i) { + m_functor(t, i, update); + } + } + + class value_type_wrapper { + private: + std::size_t m_value_size; + char *m_value_buffer; + + public: + value_type_wrapper() : m_value_size(0), m_value_buffer(nullptr) {} + + value_type_wrapper(const std::size_t value_size) + : m_value_size(value_size), m_value_buffer(new char[m_value_size]) {} + + value_type_wrapper(const value_type_wrapper &other) + : m_value_size(0), m_value_buffer(nullptr) { + if (this != &other) { + m_value_buffer = new char[other.m_value_size]; + m_value_size = other.m_value_size; + + std::copy(other.m_value_buffer, other.m_value_buffer + m_value_size, + m_value_buffer); + } + } + + ~value_type_wrapper() { delete[] m_value_buffer; } + + value_type_wrapper(value_type_wrapper &&other) + : m_value_size(0), m_value_buffer(nullptr) { + if (this != &other) { + m_value_buffer = other.m_value_buffer; + m_value_size = other.m_value_size; + + other.m_value_buffer = nullptr; + other.m_value_size = 0; + } + } + + value_type_wrapper &operator=(const value_type_wrapper &other) { + if (this != &other) { + delete[] m_value_buffer; + m_value_buffer = new char[other.m_value_size]; + m_value_size = other.m_value_size; + + std::copy(other.m_value_buffer, other.m_value_buffer + m_value_size, + m_value_buffer); + } + + return *this; + } + + value_type_wrapper &operator=(value_type_wrapper &&other) { + if (this != &other) { + delete[] m_value_buffer; + m_value_buffer = other.m_value_buffer; + m_value_size = other.m_value_size; + + other.m_value_buffer = nullptr; + other.m_value_size = 0; + } + + return *this; + } + + pointer_type pointer() const { + return reinterpret_cast(m_value_buffer); + } + + reference_type reference() const { + return ValueOps::reference( + reinterpret_cast(m_value_buffer)); + } + }; + +public: + void execute() const { + dispatch_execute_task(this); + } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + + std::size_t value_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + +#if KOKKOS_HPX_IMPLEMENTATION == 0 + // NOTE: This version makes the most use of HPX functionality, but + // requires the struct value_type_wrapper to handle different + // reference_types. It is also significantly slower than the version + // below due to not reusing the buffer used by other functions. + using hpx::parallel::reduction; + using hpx::parallel::execution::static_chunk_size; + + value_type_wrapper final_value(value_size); + value_type_wrapper identity(value_size); + + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + final_value.pointer()); + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + identity.pointer()); + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), + m_policy.begin(), m_policy.end(), + reduction(final_value, identity, + [this](value_type_wrapper &a, + value_type_wrapper &b) -> value_type_wrapper & { + ValueJoin::join( + ReducerConditional::select(m_functor, m_reducer), + a.pointer(), b.pointer()); + return a; + }), + [this](Member i, value_type_wrapper &update) { + execute_functor(m_functor, i, update.reference()); + }); + + pointer_type final_value_ptr = final_value.pointer(); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, value_size); + + for_loop(par, 0, num_worker_threads, [this, &buffer](std::size_t t) { + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(t))); + }); + + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (Member i_begin = m_policy.begin(); i_begin < m_policy.end(); + i_begin += m_policy.chunk_size()) { + apply([this, &buffer, &sem, i_begin]() { + reference_type update = + ValueOps::reference(reinterpret_cast( + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()))); + const Member i_end = + (std::min)(i_begin + m_policy.chunk_size(), m_policy.end()); + execute_functor_range(update, i_begin, i_end); + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); + + for (int i = 1; i < num_worker_threads; ++i) { + ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(0)), + reinterpret_cast(buffer.get(i))); + } + + pointer_type final_value_ptr = + reinterpret_cast(buffer.get(0)); +#endif + + Kokkos::Impl::FunctorFinal::final( + ReducerConditional::select(m_functor, m_reducer), final_value_ptr); + + if (m_result_ptr != nullptr) { + const int n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + + for (int j = 0; j < n; ++j) { + m_result_ptr[j] = final_value_ptr[j]; + } + } + } + + template + inline ParallelReduce( + const FunctorType &arg_functor, Policy arg_policy, + const ViewType &arg_view, + typename std::enable_if::value && + !Kokkos::is_reducer_type::value, + void *>::type = NULL) + : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), + m_result_ptr(arg_view.data()), + m_force_synchronous(!arg_view.impl_track().has_record()) {} + + inline ParallelReduce(const FunctorType &arg_functor, Policy arg_policy, + const ReducerType &reducer) + : m_functor(arg_functor), m_policy(arg_policy), m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_force_synchronous(!reducer.view().impl_track().has_record()) {} +}; + +template +class ParallelReduce, ReducerType, + Kokkos::Experimental::HPX> { +private: + using MDRangePolicy = Kokkos::MDRangePolicy; + using Policy = typename MDRangePolicy::impl_range_policy; + using WorkTag = typename MDRangePolicy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using Analysis = FunctorAnalysis; + using ReducerConditional = + Kokkos::Impl::if_c::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; + using iterate_type = + typename Kokkos::Impl::HostIterateTile; + + const FunctorType m_functor; + const MDRangePolicy m_mdr_policy; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + bool m_force_synchronous; + +public: + void execute() const { + dispatch_execute_task(this); + } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + const std::size_t value_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, value_size); + + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + + for_loop(par, 0, num_worker_threads, [this, &buffer](std::size_t t) { + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(t))); + }); + +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::execution::static_chunk_size; + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), + m_policy.begin(), m_policy.end(), [this, &buffer](const Member i) { + reference_type update = ValueOps::reference( + reinterpret_cast(buffer.get( + Kokkos::Experimental::HPX::impl_hardware_thread_id()))); + iterate_type(m_mdr_policy, m_functor, update)(i); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (Member i_begin = m_policy.begin(); i_begin < m_policy.end(); + i_begin += m_policy.chunk_size()) { + apply([this, &buffer, &sem, i_begin]() { + reference_type update = + ValueOps::reference(reinterpret_cast( + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()))); + const Member i_end = + (std::min)(i_begin + m_policy.chunk_size(), m_policy.end()); + + for (Member i = i_begin; i < i_end; ++i) { + iterate_type(m_mdr_policy, m_functor, update)(i); + } + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + + for (int i = 1; i < num_worker_threads; ++i) { + ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(0)), + reinterpret_cast(buffer.get(i))); + } + + Kokkos::Impl::FunctorFinal::final( + ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(0))); + + if (m_result_ptr != nullptr) { + const int n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + + for (int j = 0; j < n; ++j) { + m_result_ptr[j] = reinterpret_cast(buffer.get(0))[j]; + } + } + } + + template + inline ParallelReduce( + const FunctorType &arg_functor, MDRangePolicy arg_policy, + const ViewType &arg_view, + typename std::enable_if::value && + !Kokkos::is_reducer_type::value, + void *>::type = NULL) + : m_functor(arg_functor), m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_reducer(InvalidType()), m_result_ptr(arg_view.data()), + m_force_synchronous(!arg_view.impl_track().has_record()) {} + + inline ParallelReduce(const FunctorType &arg_functor, + MDRangePolicy arg_policy, const ReducerType &reducer) + : m_functor(arg_functor), m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_reducer(reducer), m_result_ptr(reducer.view().data()), + m_force_synchronous(!reducer.view().impl_track().has_record()) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + +template +class ParallelScan, + Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::RangePolicy; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using Analysis = + FunctorAnalysis; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; + + const FunctorType m_functor; + const Policy m_policy; + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { + for (Member i = i_begin; i < i_end; ++i) { + functor(i, update, final); + } + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { + const TagType t{}; + for (Member i = i_begin; i < i_end; ++i) { + functor(t, i, update, final); + } + } + +public: + void execute() const { dispatch_execute_task(this); } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + const int value_count = Analysis::value_count(m_functor); + const std::size_t value_size = Analysis::value_size(m_functor); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, 2 * value_size); + + using hpx::lcos::local::barrier; + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + barrier bar(num_worker_threads); + + for_loop(par.with(static_chunk_size(1)), 0, num_worker_threads, + [this, &buffer, &bar, num_worker_threads, value_count, + value_size](std::size_t const t) { + reference_type update_sum = ValueInit::init( + m_functor, reinterpret_cast(buffer.get(t))); + + const WorkRange range(m_policy, t, num_worker_threads); + execute_functor_range(m_functor, range.begin(), + range.end(), update_sum, false); + + bar.wait(); + + if (t == 0) { + ValueInit::init(m_functor, reinterpret_cast( + buffer.get(0) + value_size)); + + for (int i = 1; i < num_worker_threads; ++i) { + pointer_type ptr_1_prev = + reinterpret_cast(buffer.get(i - 1)); + pointer_type ptr_2_prev = reinterpret_cast( + buffer.get(i - 1) + value_size); + pointer_type ptr_2 = reinterpret_cast( + buffer.get(i) + value_size); + + for (int j = 0; j < value_count; ++j) { + ptr_2[j] = ptr_2_prev[j]; + } + + ValueJoin::join(m_functor, ptr_2, ptr_1_prev); + } + } + + bar.wait(); + + reference_type update_base = ValueOps::reference( + reinterpret_cast(buffer.get(t) + value_size)); + + execute_functor_range(m_functor, range.begin(), + range.end(), update_base, true); + }); + } + + inline ParallelScan(const FunctorType &arg_functor, const Policy &arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +template +class ParallelScanWithTotal, + ReturnType, Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::RangePolicy; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using Analysis = + FunctorAnalysis; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; + + const FunctorType m_functor; + const Policy m_policy; + ReturnType &m_returnvalue; + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { + for (Member i = i_begin; i < i_end; ++i) { + functor(i, update, final); + } + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { + const TagType t{}; + for (Member i = i_begin; i < i_end; ++i) { + functor(t, i, update, final); + } + } + +public: + void execute() const { dispatch_execute_task(this); } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + const int value_count = Analysis::value_count(m_functor); + const std::size_t value_size = Analysis::value_size(m_functor); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, 2 * value_size); + + using hpx::lcos::local::barrier; + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + barrier bar(num_worker_threads); + + for_loop(par.with(static_chunk_size(1)), 0, num_worker_threads, + [this, &buffer, &bar, num_worker_threads, value_count, + value_size](std::size_t const t) { + reference_type update_sum = ValueInit::init( + m_functor, reinterpret_cast(buffer.get(t))); + + const WorkRange range(m_policy, t, num_worker_threads); + execute_functor_range(m_functor, range.begin(), + range.end(), update_sum, false); + + bar.wait(); + + if (t == 0) { + ValueInit::init(m_functor, reinterpret_cast( + buffer.get(0) + value_size)); + + for (int i = 1; i < num_worker_threads; ++i) { + pointer_type ptr_1_prev = + reinterpret_cast(buffer.get(i - 1)); + pointer_type ptr_2_prev = reinterpret_cast( + buffer.get(i - 1) + value_size); + pointer_type ptr_2 = reinterpret_cast( + buffer.get(i) + value_size); + + for (int j = 0; j < value_count; ++j) { + ptr_2[j] = ptr_2_prev[j]; + } + + ValueJoin::join(m_functor, ptr_2, ptr_1_prev); + } + } + + bar.wait(); + + reference_type update_base = ValueOps::reference( + reinterpret_cast(buffer.get(t) + value_size)); + + execute_functor_range(m_functor, range.begin(), + range.end(), update_base, true); + + if (t == std::size_t(num_worker_threads - 1)) { + m_returnvalue = update_base; + } + }); + } + + inline ParallelScanWithTotal(const FunctorType &arg_functor, + const Policy &arg_policy, + ReturnType &arg_returnvalue) + : m_functor(arg_functor), m_policy(arg_policy), + m_returnvalue(arg_returnvalue) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { +template +class ParallelFor, + Kokkos::Experimental::HPX> { +private: + using Policy = TeamPolicyInternal; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using memory_space = Kokkos::HostSpace; + + const FunctorType m_functor; + const Policy m_policy; + const int m_league; + const std::size_t m_shared; + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Policy &policy, + const int league_rank, char *local_buffer, + const std::size_t local_buffer_size) { + functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size)); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Policy &policy, + const int league_rank, char *local_buffer, + const std::size_t local_buffer_size) { + const TagType t{}; + functor(t, Member(policy, 0, league_rank, local_buffer, local_buffer_size)); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, + const int league_rank_end, char *local_buffer, + const std::size_t local_buffer_size) { + for (int league_rank = league_rank_begin; league_rank < league_rank_end; + ++league_rank) { + functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size)); + } + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, + const int league_rank_end, char *local_buffer, + const std::size_t local_buffer_size) { + const TagType t{}; + for (int league_rank = league_rank_begin; league_rank < league_rank_end; + ++league_rank) { + functor(t, + Member(policy, 0, league_rank, local_buffer, local_buffer_size)); + } + } + +public: + void execute() const { dispatch_execute_task(this); } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, m_shared); + +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), 0, + m_policy.league_size(), [this, &buffer](const int league_rank) { + execute_functor( + m_functor, m_policy, league_rank, + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()), + m_shared); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (int league_rank_begin = 0; league_rank_begin < m_policy.league_size(); + league_rank_begin += m_policy.chunk_size()) { + apply([this, &buffer, &sem, league_rank_begin]() { + const int league_rank_end = (std::min)( + league_rank_begin + m_policy.chunk_size(), m_policy.league_size()); + execute_functor_range( + m_functor, m_policy, league_rank_begin, league_rank_end, + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()), m_shared); + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + } + + ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) + : m_functor(arg_functor), m_policy(arg_policy), + m_league(arg_policy.league_size()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize::value( + arg_functor, arg_policy.team_size())) {} +}; + +template +class ParallelReduce, + ReducerType, Kokkos::Experimental::HPX> { +private: + using Policy = TeamPolicyInternal; + using Analysis = + FunctorAnalysis; + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using ReducerConditional = + Kokkos::Impl::if_c::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; + + const FunctorType m_functor; + const int m_league; + const Policy m_policy; + const ReducerType m_reducer; + pointer_type m_result_ptr; + const std::size_t m_shared; + + bool m_force_synchronous; + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Policy &policy, + const int league_rank, char *local_buffer, + const std::size_t local_buffer_size, + reference_type update) { + functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size), + update); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Policy &policy, + const int league_rank, char *local_buffer, + const std::size_t local_buffer_size, + reference_type update) { + const TagType t{}; + functor(t, Member(policy, 0, league_rank, local_buffer, local_buffer_size), + update); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, + const int league_rank_end, char *local_buffer, + const std::size_t local_buffer_size, + reference_type update) { + for (int league_rank = league_rank_begin; league_rank < league_rank_end; + ++league_rank) { + functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size), + update); + } + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, + const int league_rank_end, char *local_buffer, + const std::size_t local_buffer_size, + reference_type update) { + const TagType t{}; + for (int league_rank = league_rank_begin; league_rank < league_rank_end; + ++league_rank) { + functor(t, + Member(policy, 0, league_rank, local_buffer, local_buffer_size), + update); + } + } + +public: + void execute() const { + dispatch_execute_task(this); + } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + const std::size_t value_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, value_size + m_shared); + + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + + for_loop(par, 0, num_worker_threads, [this, &buffer](std::size_t t) { + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(t))); + }); + +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::execution::static_chunk_size; + + hpx::parallel::for_loop( + par.with(static_chunk_size(m_policy.chunk_size())), 0, + m_policy.league_size(), + [this, &buffer, value_size](const int league_rank) { + std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); + reference_type update = ValueOps::reference( + reinterpret_cast(buffer.get(t))); + + execute_functor(m_functor, m_policy, league_rank, + buffer.get(t) + value_size, m_shared, + update); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (int league_rank_begin = 0; league_rank_begin < m_policy.league_size(); + league_rank_begin += m_policy.chunk_size()) { + apply([this, &buffer, &sem, league_rank_begin, value_size]() { + std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); + reference_type update = + ValueOps::reference(reinterpret_cast(buffer.get(t))); + const int league_rank_end = (std::min)( + league_rank_begin + m_policy.chunk_size(), m_policy.league_size()); + execute_functor_range( + m_functor, m_policy, league_rank_begin, league_rank_end, + buffer.get(t) + value_size, m_shared, update); + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + + const pointer_type ptr = reinterpret_cast(buffer.get(0)); + for (int t = 1; t < num_worker_threads; ++t) { + ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), ptr, + reinterpret_cast(buffer.get(t))); + } + + Kokkos::Impl::FunctorFinal::final( + ReducerConditional::select(m_functor, m_reducer), ptr); + + if (m_result_ptr) { + const int n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + + for (int j = 0; j < n; ++j) { + m_result_ptr[j] = ptr[j]; + } + } + } + + template + ParallelReduce( + const FunctorType &arg_functor, const Policy &arg_policy, + const ViewType &arg_result, + typename std::enable_if::value && + !Kokkos::is_reducer_type::value, + void *>::type = NULL) + : m_functor(arg_functor), m_league(arg_policy.league_size()), + m_policy(arg_policy), m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize::value( + m_functor, arg_policy.team_size())), + m_force_synchronous(!arg_result.impl_track().has_record()) {} + + inline ParallelReduce(const FunctorType &arg_functor, Policy arg_policy, + const ReducerType &reducer) + : m_functor(arg_functor), m_league(arg_policy.league_size()), + m_policy(arg_policy), m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize::value( + arg_functor, arg_policy.team_size())), + m_force_synchronous(!reducer.view().impl_track().has_record()) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { + +template +KOKKOS_INLINE_FUNCTION + Impl::TeamThreadRangeBoundariesStruct + TeamThreadRange(const Impl::HPXTeamMember &thread, const iType &count) { + return Impl::TeamThreadRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type::type, Impl::HPXTeamMember> +TeamThreadRange(const Impl::HPXTeamMember &thread, const iType1 &i_begin, + const iType2 &i_end) { + using iType = typename std::common_type::type; + return Impl::TeamThreadRangeBoundariesStruct( + thread, iType(i_begin), iType(i_end)); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::TeamThreadRangeBoundariesStruct + TeamVectorRange(const Impl::HPXTeamMember &thread, const iType &count) { + return Impl::TeamThreadRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type::type, Impl::HPXTeamMember> +TeamVectorRange(const Impl::HPXTeamMember &thread, const iType1 &i_begin, + const iType2 &i_end) { + using iType = typename std::common_type::type; + return Impl::TeamThreadRangeBoundariesStruct( + thread, iType(i_begin), iType(i_end)); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct + ThreadVectorRange(const Impl::HPXTeamMember &thread, const iType &count) { + return Impl::ThreadVectorRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct + ThreadVectorRange(const Impl::HPXTeamMember &thread, const iType &i_begin, + const iType &i_end) { + return Impl::ThreadVectorRangeBoundariesStruct( + thread, i_begin, i_end); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct +PerTeam(const Impl::HPXTeamMember &thread) { + return Impl::ThreadSingleStruct(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct +PerThread(const Impl::HPXTeamMember &thread) { + return Impl::VectorSingleStruct(thread); +} + +/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each + * i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::TeamThreadRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda) { + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) + lambda(i); +} + +/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, + * ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team + * and a summation of val is performed and put into result. This functionality + * requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_reduce( + const Impl::TeamThreadRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda, ValueType &result) { + result = ValueType(); + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, result); + } +} + +/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each + * i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. + * This functionality requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::ThreadVectorRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda) { +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i); + } +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, + * ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread + * and a summation of val is performed and put into result. This functionality + * requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_reduce( + const Impl::ThreadVectorRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda, ValueType &result) { + result = ValueType(); +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, result); + } +} + +template +KOKKOS_INLINE_FUNCTION void parallel_reduce( + const Impl::TeamThreadRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda, const ReducerType &reducer) { + reducer.init(reducer.reference()); + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, reducer.reference()); + } +} + +template +KOKKOS_INLINE_FUNCTION void parallel_reduce( + const Impl::ThreadVectorRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda, const ReducerType &reducer) { + reducer.init(reducer.reference()); +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, reducer.reference()); + } +} + +template +KOKKOS_INLINE_FUNCTION void parallel_scan( + Impl::TeamThreadRangeBoundariesStruct const + &loop_boundaries, + const FunctorType &lambda) { + using value_type = typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, void, + FunctorType>::value_type; + + value_type scan_val = value_type(); + + // Intra-member scan + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, false); + } + + // 'scan_val' output is the exclusive prefix sum + scan_val = loop_boundaries.thread.team_scan(scan_val); + + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, true); + } +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. Executes + * lambda(iType i, ValueType & val, bool final) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan + * operation is performed. Depending on the target execution space the operator + * might be called twice: once with final=false and once with final=true. When + * final==true val contains the prefix sum value. The contribution of this "i" + * needs to be added to val no matter whether final==true or not. In a serial + * execution (i.e. team_size==1) the operator is only called once with + * final==true. Scan_val will be set to the final sum value over all vector + * lanes. This functionality requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_scan( + const Impl::ThreadVectorRangeBoundariesStruct + &loop_boundaries, + const FunctorType &lambda) { + using ValueTraits = Kokkos::Impl::FunctorValueTraits; + using value_type = typename ValueTraits::value_type; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, true); + } +} + +template +KOKKOS_INLINE_FUNCTION void +single(const Impl::VectorSingleStruct &single_struct, + const FunctorType &lambda) { + lambda(); +} + +template +KOKKOS_INLINE_FUNCTION void +single(const Impl::ThreadSingleStruct &single_struct, + const FunctorType &lambda) { + lambda(); +} + +template +KOKKOS_INLINE_FUNCTION void +single(const Impl::VectorSingleStruct &single_struct, + const FunctorType &lambda, ValueType &val) { + lambda(val); +} + +template +KOKKOS_INLINE_FUNCTION void +single(const Impl::ThreadSingleStruct &single_struct, + const FunctorType &lambda, ValueType &val) { + lambda(val); +} + +} // namespace Kokkos + +#include + +#endif /* #if defined( KOKKOS_ENABLE_HPX ) */ +#endif /* #ifndef KOKKOS_HPX_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp index 3fd55d9148..921ba0df34 100644 --- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -57,6 +57,8 @@ #include #include +#include "impl/Kokkos_HostSpace_deepcopy.hpp" + /*--------------------------------------------------------------------------*/ namespace Kokkos { @@ -113,6 +115,8 @@ public: typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) typedef Kokkos::Threads execution_space; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX ) + typedef Kokkos::Experimental::HPX execution_space; //#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) // typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_OPENMP ) @@ -121,6 +125,8 @@ public: typedef Kokkos::Threads execution_space; //#elif defined( KOKKOS_ENABLE_QTHREADS ) // typedef Kokkos::Qthreads execution_space; +#elif defined( KOKKOS_ENABLE_HPX ) + typedef Kokkos::Experimental::HPX execution_space; #elif defined( KOKKOS_ENABLE_SERIAL ) typedef Kokkos::Serial execution_space; #else @@ -291,15 +297,18 @@ namespace Kokkos { namespace Impl { +#define PAR_DEEP_COPY_USE_MEMCPY + template< class ExecutionSpace > struct DeepCopy< HostSpace, HostSpace, ExecutionSpace > { DeepCopy( void * dst, const void * src, size_t n ) { - memcpy( dst, src, n ); + hostspace_parallel_deepcopy(dst,src,n); } DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst, src, n ); + hostspace_parallel_deepcopy(dst,src,n); + exec.fence(); } }; diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp index 43e117783b..6f423d545f 100644 --- a/lib/kokkos/core/src/Kokkos_Layout.hpp +++ b/lib/kokkos/core/src/Kokkos_Layout.hpp @@ -193,6 +193,9 @@ struct LayoutStride { {} }; +// ========================================================================== +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + //---------------------------------------------------------------------------- /// \struct LayoutTileLeft /// \brief Memory layout tag indicating left-to-right (Fortran scheme) @@ -243,6 +246,8 @@ struct LayoutTileLeft { : dimension { argN0 , argN1 , argN2 , argN3 , argN4 , argN5 , argN6 , argN7 } {} }; +#endif // KOKKOS_ENABLE_DEPRECATED_CODE +// =================================================================================== ////////////////////////////////////////////////////////////////////////////////////// @@ -269,14 +274,14 @@ namespace Experimental { template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 = 0, unsigned ArgN3 = 0, unsigned ArgN4 = 0, unsigned ArgN5 = 0, unsigned ArgN6 = 0, unsigned ArgN7 = 0, bool IsPowerOfTwo = - ( Impl::is_integral_power_of_two(ArgN0) && - Impl::is_integral_power_of_two(ArgN1) && - (Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0) ) && - (Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0) ) && - (Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0) ) && - (Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0) ) && - (Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0) ) && - (Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0) ) + ( Kokkos::Impl::is_integral_power_of_two(ArgN0) && + Kokkos::Impl::is_integral_power_of_two(ArgN1) && + (Kokkos::Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0) ) ) > struct LayoutTiled { diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index 10fc09423e..6b8ae02f82 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -50,6 +50,7 @@ * KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces * KOKKOS_ENABLE_THREADS Kokkos::Threads execution space * KOKKOS_ENABLE_QTHREADS Kokkos::Qthreads execution space + * KOKKOS_ENABLE_HPX Kokkos::Experimental::HPX execution space * KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space * KOKKOS_ENABLE_OPENMPTARGET Kokkos::Experimental::OpenMPTarget execution space * KOKKOS_ENABLE_HWLOC HWLOC library is available. @@ -98,12 +99,14 @@ #if defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_THREADS) || \ defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_QTHREADS) || \ + defined(KOKKOS_ENABLE_HPX) || \ defined(KOKKOS_ENABLE_ROCM) || defined(KOKKOS_ENABLE_OPENMPTARGET) #define KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND #endif #if !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_CUDA) && \ !defined(KOKKOS_ENABLE_OPENMP) && !defined(KOKKOS_ENABLE_QTHREADS) && \ + !defined(KOKKOS_ENABLE_HPX) && \ !defined(KOKKOS_ENABLE_ROCM) && !defined(KOKKOS_ENABLE_OPENMPTARGET) #define KOKKOS_INTERNAL_NOT_PARALLEL #endif @@ -174,33 +177,22 @@ #if ( 10000 > CUDA_VERSION ) #define KOKKOS_ENABLE_PRE_CUDA_10_DEPRECATION_API #endif + + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700) + // PTX atomics with memory order semantics are only available on volta and later + #if !defined(KOKKOS_DISABLE_CUDA_ASM) + #if !defined(KOKKOS_ENABLE_CUDA_ASM) + #define KOKKOS_ENABLE_CUDA_ASM + #if !defined(KOKKOS_DISABLE_CUDA_ASM_ATOMICS) + #define KOKKOS_ENABLE_CUDA_ASM_ATOMICS + #endif + #endif + #endif + #endif + + #endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) -//---------------------------------------------------------------------------- -// Language info: C++, CUDA, OPENMP - -#if defined( KOKKOS_ENABLE_CUDA ) - // Compiling Cuda code to 'ptx' - - #define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ - #define KOKKOS_INLINE_FUNCTION __device__ __host__ inline - #define KOKKOS_FUNCTION __device__ __host__ -#endif // #if defined( __CUDA_ARCH__ ) - -#if defined( KOKKOS_ENABLE_ROCM ) && defined( __HCC__ ) - - #define KOKKOS_FORCEINLINE_FUNCTION __attribute__((amp,cpu)) inline - #define KOKKOS_INLINE_FUNCTION __attribute__((amp,cpu)) inline - #define KOKKOS_FUNCTION __attribute__((amp,cpu)) - #define KOKKOS_LAMBDA [=] __attribute__((amp,cpu)) -#endif - -#if defined( _OPENMP ) - // Compiling with OpenMP. - // The value of _OPENMP is an integer value YYYYMM - // where YYYY and MM are the year and month designation - // of the supported OpenMP API version. -#endif // #if defined( _OPENMP ) //---------------------------------------------------------------------------- // Mapping compiler built-ins to KOKKOS_COMPILER_*** macros @@ -263,7 +255,7 @@ #endif #endif -#if defined( __PGIC__ ) +#if defined( __PGIC__ ) #define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__ #if ( 1540 > KOKKOS_COMPILER_PGI ) @@ -272,6 +264,36 @@ #endif //#endif // #if !defined( __CUDA_ARCH__ ) +//---------------------------------------------------------------------------- +// Language info: C++, CUDA, OPENMP + +#if defined( KOKKOS_ENABLE_CUDA ) + // Compiling Cuda code to 'ptx' + + #define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ + #define KOKKOS_INLINE_FUNCTION __device__ __host__ inline + #define KOKKOS_FUNCTION __device__ __host__ + #if defined( KOKKOS_COMPILER_NVCC ) + #define KOKKOS_INLINE_FUNCTION_DELETED inline + #else + #define KOKKOS_INLINE_FUNCTION_DELETED __device__ __host__ inline + #endif +#endif // #if defined( __CUDA_ARCH__ ) + +#if defined( KOKKOS_ENABLE_ROCM ) && defined( __HCC__ ) + + #define KOKKOS_FORCEINLINE_FUNCTION __attribute__((amp,cpu)) inline + #define KOKKOS_INLINE_FUNCTION __attribute__((amp,cpu)) inline + #define KOKKOS_FUNCTION __attribute__((amp,cpu)) + #define KOKKOS_LAMBDA [=] __attribute__((amp,cpu)) +#endif + +#if defined( _OPENMP ) + // Compiling with OpenMP. + // The value of _OPENMP is an integer value YYYYMM + // where YYYY and MM are the year and month designation + // of the supported OpenMP API version. +#endif // #if defined( _OPENMP ) //---------------------------------------------------------------------------- // Intel compiler macros @@ -320,7 +342,10 @@ #if defined( KOKKOS_ARCH_AVX512MIC ) #define KOKKOS_ENABLE_RFO_PREFETCH 1 - #endif + #if (KOKKOS_COMPILER_INTEL < 1800) && !defined(KOKKOS_KNL_USE_ASM_WORKAROUND) + #define KOKKOS_KNL_USE_ASM_WORKAROUND 1 + #endif + #endif #if defined( __MIC__ ) // Compiling for Xeon Phi @@ -386,6 +411,8 @@ #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #endif + #define KOKKOS_RESTRICT __restrict__ + #if !defined( KOKKOS_ENABLE_ASM ) && !defined( __PGIC__ ) && \ ( defined( __amd64 ) || defined( __amd64__ ) || \ defined( __x86_64 ) || defined( __x86_64__ ) || \ @@ -416,7 +443,7 @@ // Define function marking macros if compiler specific macros are undefined: #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) - #define KOKKOS_FORCEINLINE_FUNCTION inline + define KOKKOS_FORCEINLINE_FUNCTION inline #endif #if !defined( KOKKOS_INLINE_FUNCTION ) @@ -427,6 +454,9 @@ #define KOKKOS_FUNCTION /**/ #endif +#if !defined( KOKKOS_INLINE_FUNCTION_DELETED ) + #define KOKKOS_INLINE_FUNCTION_DELETED inline +#endif //---------------------------------------------------------------------------- // Define empty macro for restrict if necessary: @@ -459,18 +489,20 @@ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) ) #error "More than one KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_* specified." #endif // If default is not specified then chose from enabled execution spaces. -// Priority: CUDA, OPENMP, THREADS, QTHREADS, SERIAL +// Priority: CUDA, OPENMP, THREADS, QTHREADS, HPX, SERIAL #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) //#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) #elif defined( KOKKOS_ENABLE_CUDA ) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA @@ -484,6 +516,8 @@ #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS //#elif defined( KOKKOS_ENABLE_QTHREADS ) // #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS +#elif defined( KOKKOS_ENABLE_HPX ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX #else #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL #endif @@ -539,7 +573,27 @@ #define KOKKOS_IMPL_CTOR_DEFAULT_ARG KOKKOS_INVALID_INDEX #endif +#if (defined(KOKKOS_ENABLE_CXX14) || defined(KOKKOS_ENABLE_CXX17) || defined(KOKKOS_ENABLE_CXX20)) + #define KOKKOS_CONSTEXPR_14 constexpr + #define KOKKOS_DEPRECATED [[deprecated]] + #define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE +#else + #define KOKKOS_CONSTEXPR_14 + #if defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) + #define KOKKOS_DEPRECATED + #define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE __attribute__ ((deprecated)) + #else + #define KOKKOS_DEPRECATED + #define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE + #endif +#endif +// DJS 05/28/2019: Bugfix: Issue 2155 +// Use KOKKOS_ENABLE_CUDA_LDG_INTRINSIC to avoid memory leak in RandomAccess View +#if defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC) + #define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC +#endif + #endif // #ifndef KOKKOS_MACROS_HPP diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp index 157345c552..365db2baec 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -132,12 +132,18 @@ private: public: + using memory_space = typename DeviceType::memory_space; + /**\brief The maximum size of a superblock and block */ enum : uint32_t { max_superblock_size = 1LU << 31 /* 2 gigabytes */ }; enum : uint32_t { max_block_per_superblock = max_bit_count }; //-------------------------------------------------------------------------- + KOKKOS_INLINE_FUNCTION + bool operator==(MemoryPool const& other) const + { return m_sb_state_array == other.m_sb_state_array; } + KOKKOS_INLINE_FUNCTION size_t capacity() const noexcept { return size_t(m_sb_count) << m_sb_size_lg2 ; } diff --git a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp index eebc83cf3d..509ac6499e 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp @@ -71,13 +71,18 @@ template < unsigned T > struct MemoryTraits { //! Tag this class as a kokkos memory traits: typedef MemoryTraits memory_traits ; - +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE enum : bool { Unmanaged = (unsigned(0) != (T & unsigned(Kokkos::Unmanaged))) }; enum : bool { RandomAccess = (unsigned(0) != (T & unsigned(Kokkos::RandomAccess))) }; enum : bool { Atomic = (unsigned(0) != (T & unsigned(Kokkos::Atomic))) }; enum : bool { Restrict = (unsigned(0) != (T & unsigned(Kokkos::Restrict))) }; enum : bool { Aligned = (unsigned(0) != (T & unsigned(Kokkos::Aligned))) }; - +#endif + enum : bool { is_unmanaged = (unsigned(0) != (T & unsigned(Kokkos::Unmanaged))) }; + enum : bool { is_random_access = (unsigned(0) != (T & unsigned(Kokkos::RandomAccess))) }; + enum : bool { is_atomic = (unsigned(0) != (T & unsigned(Kokkos::Atomic))) }; + enum : bool { is_restrict = (unsigned(0) != (T & unsigned(Kokkos::Restrict))) }; + enum : bool { is_aligned = (unsigned(0) != (T & unsigned(Kokkos::Aligned))) }; }; } // namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp index ed4071a6da..6ee8f08dc8 100644 --- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp @@ -107,8 +107,14 @@ public: /// \brief Wait until all dispatched functors complete on the given instance /// /// This is a no-op on OpenMP - inline + static void impl_static_fence( OpenMP const& = OpenMP() ) noexcept; + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence( OpenMP const& = OpenMP() ) noexcept; + #else + void fence() const; + #endif + /// \brief Does the given instance return immediately after launching /// a parallel algorithm diff --git a/lib/kokkos/core/src/Kokkos_Pair.hpp b/lib/kokkos/core/src/Kokkos_Pair.hpp index 1be763be85..ab0ab8152a 100644 --- a/lib/kokkos/core/src/Kokkos_Pair.hpp +++ b/lib/kokkos/core/src/Kokkos_Pair.hpp @@ -528,6 +528,15 @@ KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>= (const pair& lhs, const pair& rhs) { return !(lhs struct is_pair_like : std::false_type { }; +template struct is_pair_like> : std::true_type { }; +template struct is_pair_like> : std::true_type { }; + +} // end namespace Impl + } // namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp index b095f5728e..09dcf60b11 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp @@ -525,7 +525,7 @@ void parallel_scan( const ExecutionPolicy & policy Kokkos::Profiling::endParallelScan(kpID); } #endif - + Kokkos::fence(); } template< class FunctorType, class ReturnType > @@ -560,7 +560,7 @@ void parallel_scan( const size_t work_count Kokkos::Profiling::endParallelScan(kpID); } #endif - + Kokkos::fence(); } template< class ExecutionPolicy, class FunctorType, class ReturnType > diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index 06aaa6546e..36bc6e4153 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -69,18 +69,19 @@ public: typedef Sum reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - Sum(value_type& value_): value(&value_) {} + Sum(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - Sum(const result_view_type& value_): value(value_.data()) {} + Sum(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -100,12 +101,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -116,18 +122,19 @@ public: typedef Prod reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - Prod(value_type& value_): value(&value_) {} + Prod(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - Prod(const result_view_type& value_): value(value_.data()) {} + Prod(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -147,12 +154,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -163,18 +175,19 @@ public: typedef Min reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - Min(value_type& value_): value(&value_) {} + Min(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - Min(const result_view_type& value_): value(value_.data()) {} + Min(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -196,12 +209,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -212,18 +230,19 @@ public: typedef Max reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - Max(value_type& value_): value(&value_) {} + Max(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - Max(const result_view_type& value_): value(value_.data()) {} + Max(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -246,12 +265,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -262,18 +286,19 @@ public: typedef LAnd reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - LAnd(value_type& value_): value(&value_) {} + LAnd(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - LAnd(const result_view_type& value_): value(value_.data()) {} + LAnd(const result_view_type& value_): value(value_),references_scalar_v(false) {} KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { @@ -292,12 +317,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -308,18 +338,19 @@ public: typedef LOr reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - LOr(value_type& value_): value(&value_) {} + LOr(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - LOr(const result_view_type& value_): value(value_.data()) {} + LOr(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -339,12 +370,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -355,18 +391,19 @@ public: typedef BAnd reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - BAnd(value_type& value_): value(&value_) {} + BAnd(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - BAnd(const result_view_type& value_): value(value_.data()) {} + BAnd(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -386,12 +423,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -402,18 +444,19 @@ public: typedef BOr reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - BOr(value_type& value_): value(&value_) {} + BOr(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - BOr(const result_view_type& value_): value(value_.data()) {} + BOr(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -433,12 +476,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -471,18 +519,19 @@ public: typedef MinLoc reducer; typedef ValLocScalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - MinLoc(value_type& value_): value(&value_) {} + MinLoc(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - MinLoc(const result_view_type& value_): value(value_.data()) {} + MinLoc(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required @@ -506,12 +555,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -526,18 +580,19 @@ public: typedef MaxLoc reducer; typedef ValLocScalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - MaxLoc(value_type& value_): value(&value_) {} + MaxLoc(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - MaxLoc(const result_view_type& value_): value(value_.data()) {} + MaxLoc(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -560,12 +615,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -596,18 +656,19 @@ public: typedef MinMax reducer; typedef MinMaxScalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - MinMax(value_type& value_): value(&value_) {} + MinMax(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - MinMax(const result_view_type& value_): value(value_.data()) {} + MinMax(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -638,12 +699,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -680,18 +746,19 @@ public: typedef MinMaxLoc reducer; typedef MinMaxLocScalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - MinMaxLoc(value_type& value_): value(&value_) {} + MinMaxLoc(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - MinMaxLoc(const result_view_type& value_): value(value_.data()) {} + MinMaxLoc(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -728,12 +795,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; } @@ -813,7 +885,7 @@ struct ParallelReduceReturnValue + struct ReducerHasTestReferenceFunction + { + template static std::true_type test_func( decltype(&E::references_scalar) ) ; + template static std::false_type test_func(...); + + enum { value = std::is_same(0))>::value }; + }; + + template::value> + struct ParallelReduceFence { + static void fence(const T&) { + Kokkos::fence(); + } + }; + template + struct ParallelReduceFence, false> { + static void fence(const View) {}; + }; + template + struct ParallelReduceFence { + static void fence(const T& reducer) { + if(reducer.references_scalar()) + Kokkos::fence(); + } + }; +} + /** \brief Parallel reduction * * parallel_reduce performs parallel reductions with arbitrary functions - i.e. @@ -959,6 +1062,7 @@ void parallel_reduce(const std::string& label, Kokkos::Impl::is_execution_policy::value >::type * = 0) { Impl::ParallelReduceAdaptor::execute(label,policy,functor,return_value); + Impl::ParallelReduceFence::fence(return_value); } template< class PolicyType, class FunctorType, class ReturnType > @@ -970,6 +1074,7 @@ void parallel_reduce(const PolicyType& policy, Kokkos::Impl::is_execution_policy::value >::type * = 0) { Impl::ParallelReduceAdaptor::execute("",policy,functor,return_value); + Impl::ParallelReduceFence::fence(return_value); } template< class FunctorType, class ReturnType > @@ -979,6 +1084,7 @@ void parallel_reduce(const size_t& policy, ReturnType& return_value) { typedef typename Impl::ParallelReducePolicyType::policy_type policy_type; Impl::ParallelReduceAdaptor::execute("",policy_type(0,policy),functor,return_value); + Impl::ParallelReduceFence::fence(return_value); } template< class FunctorType, class ReturnType > @@ -989,6 +1095,7 @@ void parallel_reduce(const std::string& label, ReturnType& return_value) { typedef typename Impl::ParallelReducePolicyType::policy_type policy_type; Impl::ParallelReduceAdaptor::execute(label,policy_type(0,policy),functor,return_value); + Impl::ParallelReduceFence::fence(return_value); } // ReturnValue as View or Reducer: take by copy to allow for inline construction @@ -1004,6 +1111,7 @@ void parallel_reduce(const std::string& label, >::type * = 0) { ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute(label,policy,functor,return_value_impl); + Impl::ParallelReduceFence::fence(return_value); } template< class PolicyType, class FunctorType, class ReturnType > @@ -1016,6 +1124,7 @@ void parallel_reduce(const PolicyType& policy, >::type * = 0) { ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute("",policy,functor,return_value_impl); + Impl::ParallelReduceFence::fence(return_value); } template< class FunctorType, class ReturnType > @@ -1026,6 +1135,7 @@ void parallel_reduce(const size_t& policy, typedef typename Impl::ParallelReducePolicyType::policy_type policy_type; ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute("",policy_type(0,policy),functor,return_value_impl); + Impl::ParallelReduceFence::fence(return_value); } template< class FunctorType, class ReturnType > @@ -1037,6 +1147,7 @@ void parallel_reduce(const std::string& label, typedef typename Impl::ParallelReducePolicyType::policy_type policy_type; ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute(label,policy_type(0,policy),functor,return_value_impl); + Impl::ParallelReduceFence::fence(return_value); } // No Return Argument diff --git a/lib/kokkos/core/src/Kokkos_PointerOwnership.hpp b/lib/kokkos/core/src/Kokkos_PointerOwnership.hpp new file mode 100644 index 0000000000..be76ec3def --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_PointerOwnership.hpp @@ -0,0 +1,74 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_POINTEROWNERSHIP_HPP +#define KOKKOS_IMPL_POINTEROWNERSHIP_HPP + +#include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/// Trivial wrapper for raw pointers that express ownership. +template +using OwningRawPtr = T*; + +/// Trivial wrapper for raw pointers that do not express ownership. +template +using ObservingRawPtr = T*; + +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + + +#endif /* #ifndef KOKKOS_IMPL_POINTEROWNERSHIP_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_ROCm.hpp b/lib/kokkos/core/src/Kokkos_ROCm.hpp index 469d6b2787..96207e73c6 100644 --- a/lib/kokkos/core/src/Kokkos_ROCm.hpp +++ b/lib/kokkos/core/src/Kokkos_ROCm.hpp @@ -140,7 +140,14 @@ public: static bool wake() ; /** \brief Wait until all dispatched functors complete. A noop for OpenMP. */ - static void fence() ; + static void impl_static_fence(); + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE + static void fence(); + #else + void fence() const; + #endif + /// \brief Print configuration information to the given output stream. static void print_configuration( std::ostream & , const bool detail = false ); diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp index 01701e53a2..5821b0c0c5 100644 --- a/lib/kokkos/core/src/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -118,10 +118,16 @@ public: /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. + static void impl_static_fence() {} + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence() {} + #else + void fence() const {} + #endif /** \brief Return the maximum amount of concurrency. */ - static int concurrency() {return 1;}; + static int concurrency() {return 1;} //! Print configuration information to the given output stream. static void print_configuration( std::ostream & , const bool /* detail */ = false ) {} @@ -261,6 +267,20 @@ public: return *this; } + template + friend class TeamPolicyInternal; + + template< class ... OtherProperties > + TeamPolicyInternal(const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + + //---------------------------------------- #ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > @@ -302,7 +322,7 @@ public: 20*1024*1024); } /** \brief Specify league size, request team size */ - TeamPolicyInternal( execution_space & + TeamPolicyInternal( const execution_space & , int league_size_request #ifndef KOKKOS_ENABLE_DEPRECATED_CODE , int team_size_request @@ -320,7 +340,7 @@ public: #endif } - TeamPolicyInternal( execution_space & + TeamPolicyInternal( const execution_space & , int league_size_request , const Kokkos::AUTO_t & /* team_size_request */ , int /* vector_length_request */ = 1 ) diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp index 5045e9cbbc..1c3d58af08 100644 --- a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -50,393 +50,203 @@ #if defined( KOKKOS_ENABLE_TASKDAG ) #include +#include //---------------------------------------------------------------------------- #include #include -//---------------------------------------------------------------------------- - -namespace Kokkos { - -// Forward declarations used in Impl::TaskQueue - -template< typename Arg1 = void , typename Arg2 = void > -class Future ; - -template< typename Space > -class TaskScheduler ; - -template< typename Space > -void wait( TaskScheduler< Space > const & ); - -template< typename Space > -struct is_scheduler : public std::false_type {}; - -template< typename Space > -struct is_scheduler< TaskScheduler< Space > > : public std::true_type {}; - -} // namespace Kokkos - +#include #include +#include +#include +#include +#include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { -/*\brief Implementation data for task data management, access, and execution. - * - * CRTP Inheritance structure to allow static_cast from the - * task root type and a task's FunctorType. - * - * TaskBase< Space , ResultType , FunctorType > - * : TaskBase< Space , ResultType , void > - * , FunctorType - * { ... }; - * - * TaskBase< Space , ResultType , void > - * : TaskBase< Space , void , void > - * { ... }; - */ -template< typename Space , typename ResultType , typename FunctorType > -class TaskBase ; +template +class TaskExec; -} // namespace Impl -} // namespace Kokkos +} // end namespace Impl -//---------------------------------------------------------------------------- - -namespace Kokkos { - -/** - * - * Future< space > // value_type == void - * Future< value > // space == Default - * Future< value , space > - * - */ -template< typename Arg1 , typename Arg2 > -class Future { -private: - - template< typename > friend class TaskScheduler ; - template< typename , typename > friend class Future ; - template< typename , typename , typename > friend class Impl::TaskBase ; - - enum { Arg1_is_space = Kokkos::is_space< Arg1 >::value }; - enum { Arg2_is_space = Kokkos::is_space< Arg2 >::value }; - enum { Arg1_is_value = ! Arg1_is_space && - ! std::is_same< Arg1 , void >::value }; - enum { Arg2_is_value = ! Arg2_is_space && - ! std::is_same< Arg2 , void >::value }; - - static_assert( ! ( Arg1_is_space && Arg2_is_space ) - , "Future cannot be given two spaces" ); - - static_assert( ! ( Arg1_is_value && Arg2_is_value ) - , "Future cannot be given two value types" ); - - using ValueType = - typename std::conditional< Arg1_is_value , Arg1 , - typename std::conditional< Arg2_is_value , Arg2 , void - >::type >::type ; - - using Space = - typename std::conditional< Arg1_is_space , Arg1 , - typename std::conditional< Arg2_is_space , Arg2 , void - >::type >::type ; - - using task_base = Impl::TaskBase< void , void , void > ; - using queue_type = Impl::TaskQueue< Space > ; - - task_base * m_task ; - - KOKKOS_INLINE_FUNCTION explicit - Future( task_base * task ) : m_task(0) - { if ( task ) queue_type::assign( & m_task , task ); } - - //---------------------------------------- +template +class BasicTaskScheduler : public Impl::TaskSchedulerBase +{ public: - using execution_space = typename Space::execution_space ; - using value_type = ValueType ; + using scheduler_type = BasicTaskScheduler; + using execution_space = ExecSpace; + using queue_type = QueueType; + using memory_space = typename queue_type::memory_space; + using memory_pool = typename queue_type::memory_pool; + using specialization = Impl::TaskQueueSpecialization; + using member_type = typename specialization::member_type; + using team_scheduler_type = BasicTaskScheduler; + template + using runnable_task_type = Impl::Task; + template + using future_type = Kokkos::BasicFuture; + template + using future_type_for_functor = future_type; - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - bool is_null() const { return 0 == m_task ; } - - KOKKOS_INLINE_FUNCTION - int reference_count() const - { return 0 != m_task ? m_task->reference_count() : 0 ; } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - void clear() - { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - ~Future() { clear(); } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr Future() noexcept : m_task(0) {} - - KOKKOS_INLINE_FUNCTION - Future( Future && rhs ) - : m_task( rhs.m_task ) { rhs.m_task = 0 ; } - - KOKKOS_INLINE_FUNCTION - Future( const Future & rhs ) - : m_task(0) - { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); } - - KOKKOS_INLINE_FUNCTION - Future & operator = ( Future && rhs ) - { - clear(); - m_task = rhs.m_task ; - rhs.m_task = 0 ; - return *this ; - } - - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future & rhs ) - { - if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - return *this ; - } - - //---------------------------------------- - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future( Future && rhs ) - : m_task( rhs.m_task ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future::value_type >::value - , "Assigned Futures must have the same value_type" ); - - rhs.m_task = 0 ; - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future( const Future & rhs ) - : m_task(0) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future::value_type >::value - , "Assigned Futures must have the same value_type" ); - - if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future & rhs ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future::value_type >::value - , "Assigned Futures must have the same value_type" ); - - if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - return *this ; - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future & operator = ( Future && rhs ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future::value_type >::value - , "Assigned Futures must have the same value_type" ); - - clear(); - m_task = rhs.m_task ; - rhs.m_task = 0 ; - return *this ; - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - int is_ready() const noexcept - { return ( 0 == m_task ) || ( ((task_base*) task_base::LockTag) == m_task->m_wait ); } - - KOKKOS_INLINE_FUNCTION - const typename Impl::TaskResult< ValueType >::reference_type - get() const - { - if ( 0 == m_task ) { - Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()"); - } - return Impl::TaskResult< ValueType >::get( m_task ); - } -}; - -// Is a Future with the given execution space -template< typename , typename ExecSpace = void > -struct is_future : public std::false_type {}; - -template< typename Arg1 , typename Arg2 , typename ExecSpace > -struct is_future< Future , ExecSpace > - : public std::integral_constant - < bool , - ( std::is_same< ExecSpace , void >::value || - std::is_same< ExecSpace - , typename Future::execution_space >::value ) - > {}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -enum class TaskPriority : int { High = 0 - , Regular = 1 - , Low = 2 }; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -template< int TaskEnum , typename DepFutureType > -struct TaskPolicyData -{ - using execution_space = typename DepFutureType::execution_space ; - using scheduler_type = TaskScheduler< execution_space > ; - - enum : int { m_task_type = TaskEnum }; - - scheduler_type const * m_scheduler ; - DepFutureType const m_dependence ; - int m_priority ; - - TaskPolicyData() = delete ; - TaskPolicyData( TaskPolicyData && ) = default ; - TaskPolicyData( TaskPolicyData const & ) = default ; - TaskPolicyData & operator = ( TaskPolicyData && ) = default ; - TaskPolicyData & operator = ( TaskPolicyData const & ) = default ; - - KOKKOS_INLINE_FUNCTION - TaskPolicyData( DepFutureType const & arg_future - , Kokkos::TaskPriority const & arg_priority ) - : m_scheduler( 0 ) - , m_dependence( arg_future ) - , m_priority( static_cast( arg_priority ) ) - {} - - KOKKOS_INLINE_FUNCTION - TaskPolicyData( scheduler_type const & arg_scheduler - , Kokkos::TaskPriority const & arg_priority ) - : m_scheduler( & arg_scheduler ) - , m_dependence() - , m_priority( static_cast( arg_priority ) ) - {} - - KOKKOS_INLINE_FUNCTION - TaskPolicyData( scheduler_type const & arg_scheduler - , DepFutureType const & arg_future - , Kokkos::TaskPriority const & arg_priority ) - : m_scheduler( & arg_scheduler ) - , m_dependence( arg_future ) - , m_priority( static_cast( arg_priority ) ) - {} -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< typename ExecSpace > -class TaskScheduler -{ private: using track_type = Kokkos::Impl::SharedAllocationTracker ; - using queue_type = Kokkos::Impl::TaskQueue< ExecSpace > ; - using task_base = Impl::TaskBase< void , void , void > ; + using task_base = Impl::TaskBase; - track_type m_track ; - queue_type * m_queue ; + track_type m_track; + queue_type * m_queue; //---------------------------------------- + template + friend class Impl::TaskQueue; + template + friend struct Impl::TaskQueueSpecialization; + template + friend class Impl::TaskQueueSpecializationConstrained; + template + friend class Impl::TaskTeamMemberAdapter; + template + friend class Impl::TaskExec; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + BasicTaskScheduler( + track_type arg_track, + queue_type* arg_queue + ) + : m_track(std::move(arg_track)), + m_queue(std::move(arg_queue)) + { } + + KOKKOS_INLINE_FUNCTION + team_scheduler_type get_team_scheduler(int team_rank) const { + return { m_track, &m_queue->get_team_queue(team_rank) }; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + static constexpr task_base* _get_task_ptr(std::nullptr_t) { return nullptr; } + + template + KOKKOS_INLINE_FUNCTION + static constexpr task_base* _get_task_ptr(future_type&& f) + { + return f.m_task; + } + + template< int TaskEnum , typename DepTaskType , typename FunctorType > + KOKKOS_FUNCTION + Kokkos::BasicFuture + _spawn_impl( + DepTaskType* arg_predecessor_task, + TaskPriority arg_priority, + typename task_base::function_type arg_function, + typename task_base::destroy_type arg_destroy, + FunctorType&& arg_functor + ) + { + using functor_future_type = future_type_for_functor::type>; + using task_type = Impl::Task; + + //---------------------------------------- + // Give single-thread back-ends an opportunity to clear + // queue of ready tasks before allocating a new task + + // TODO @tasking @optimization DSH re-enable this, maybe? + // specialization::iff_single_thread_recursive_execute(scheduler); + + //---------------------------------------- + + functor_future_type f ; + + // Allocate task from memory pool + + const size_t alloc_size = + m_queue->template spawn_allocation_size< FunctorType >(); + + void* task_storage = m_queue->allocate(alloc_size); + + if (task_storage) { + + // Placement new construction + // Reference count starts at two: + // +1 for the matching decrement when task is complete + // +1 for the future + f.m_task = new (task_storage) task_type( std::forward(arg_functor) ); + + f.m_task->m_apply = arg_function; + //f.m_task->m_destroy = arg_destroy; + f.m_task->m_queue = m_queue; + f.m_task->m_next = arg_predecessor_task; + f.m_task->m_ref_count = 2; + f.m_task->m_alloc_size = alloc_size; + f.m_task->m_task_type = TaskEnum; + f.m_task->m_priority = (int16_t)arg_priority; + + Kokkos::memory_fence(); + + // The dependence (if any) is processed immediately + // within the schedule function, as such the dependence's + // reference count does not need to be incremented for + // the assignment. + + m_queue->schedule_runnable( f.m_task ); + // This task may be updated or executed at any moment, + // even during the call to 'schedule'. + } + + return f; + + } + public: - using execution_space = ExecSpace ; - using memory_space = typename queue_type::memory_space ; - using memory_pool = typename queue_type::memory_pool ; - using member_type = - typename Kokkos::Impl::TaskQueueSpecialization< ExecSpace >::member_type ; KOKKOS_INLINE_FUNCTION - TaskScheduler() : m_track(), m_queue(0) {} + BasicTaskScheduler() : m_track(), m_queue(0) {} KOKKOS_INLINE_FUNCTION - TaskScheduler( TaskScheduler && rhs ) - : m_track( rhs.m_track ), m_queue( rhs.m_queue ) {} + BasicTaskScheduler( BasicTaskScheduler && rhs ) noexcept + : m_track(rhs.m_track), // probably should be a move, but this is deprecated code anyway + m_queue(std::move(rhs.m_queue)) + { } KOKKOS_INLINE_FUNCTION - TaskScheduler( TaskScheduler const & rhs ) - : m_track( rhs.m_track ), m_queue( rhs.m_queue ) {} + BasicTaskScheduler( BasicTaskScheduler const & rhs ) + : m_track(rhs.m_track), + m_queue(rhs.m_queue) + { } KOKKOS_INLINE_FUNCTION - TaskScheduler & operator = ( TaskScheduler && rhs ) - { m_track = rhs.m_track ; m_queue = rhs.m_queue ; return *this ; } + BasicTaskScheduler& operator=(BasicTaskScheduler&& rhs) noexcept + { + m_track = rhs.m_track; // probably should be a move, but this is deprecated code anyway + m_queue = std::move(rhs.m_queue); + return *this; + } KOKKOS_INLINE_FUNCTION - TaskScheduler & operator = ( TaskScheduler const & rhs ) - { m_track = rhs.m_track ; m_queue = rhs.m_queue ; return *this ; } + BasicTaskScheduler& operator=(BasicTaskScheduler const& rhs) + { + m_track = rhs.m_track; + m_queue = rhs.m_queue; + return *this; + } - TaskScheduler( memory_pool const & arg_memory_pool ) - : m_track() - , m_queue(0) + explicit BasicTaskScheduler(memory_pool const & arg_memory_pool) noexcept + : m_track(), m_queue(0) { typedef Kokkos::Impl::SharedAllocationRecord < memory_space , typename queue_type::Destroy > @@ -455,13 +265,13 @@ public: m_track.assign_allocated_record_to_uninitialized( record ); } - TaskScheduler( memory_space const & arg_memory_space + BasicTaskScheduler( memory_space const & arg_memory_space , size_t const mempool_capacity , unsigned const mempool_min_block_size // = 1u << 6 , unsigned const mempool_max_block_size // = 1u << 10 , unsigned const mempool_superblock_size // = 1u << 12 ) - : TaskScheduler( memory_pool( arg_memory_space + : BasicTaskScheduler( memory_pool( arg_memory_space , mempool_capacity , mempool_min_block_size , mempool_max_block_size @@ -470,6 +280,12 @@ public: //---------------------------------------- + KOKKOS_INLINE_FUNCTION + queue_type& queue() const noexcept { + KOKKOS_EXPECTS(m_queue != nullptr); + return *m_queue; + } + KOKKOS_INLINE_FUNCTION memory_pool * memory() const noexcept { return m_queue ? &( m_queue->m_memory ) : (memory_pool*) 0 ; } @@ -486,216 +302,173 @@ public: size_t when_all_allocation_size( int narg ) const { return m_queue->when_all_allocation_size( narg ); } + //---------------------------------------- - template< int TaskEnum , typename DepFutureType , typename FunctorType > + template KOKKOS_FUNCTION static - Kokkos::Future< typename FunctorType::value_type , execution_space > - spawn( Impl::TaskPolicyData const & arg_policy - , typename task_base::function_type arg_function - , FunctorType && arg_functor - ) - { - using value_type = typename FunctorType::value_type ; - using future_type = Future< value_type , execution_space > ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + Kokkos::BasicFuture + spawn( + Impl::TaskPolicyWithScheduler&& arg_policy, + typename task_base::function_type arg_function, + typename task_base::destroy_type arg_destroy, + FunctorType&& arg_functor + ) + { + return std::move(arg_policy.scheduler()).template _spawn_impl( + _get_task_ptr(std::move(arg_policy.predecessor())), + arg_policy.priority(), + arg_function, + arg_destroy, + std::forward(arg_functor) + ); + } - queue_type * const queue = - arg_policy.m_scheduler ? arg_policy.m_scheduler->m_queue : ( - arg_policy.m_dependence.m_task - ? static_cast(arg_policy.m_dependence.m_task->m_queue) - : (queue_type*) 0 ); + template + KOKKOS_FUNCTION + future_type_for_functor::type> + spawn( + Impl::TaskPolicyWithPredecessor&& arg_policy, + FunctorType&& arg_functor + ) + { + using task_type = runnable_task_type; + typename task_type::function_type const ptr = task_type::apply; + typename task_type::destroy_type const dtor = task_type::destroy; - if ( 0 == queue ) { - Kokkos::abort("Kokkos spawn requires scheduler or non-null Future"); - } + return _spawn_impl( + _get_task_ptr(std::move(arg_policy).predecessor()), + arg_policy.priority(), + ptr, dtor, + std::forward(arg_functor) + ); + } - if ( arg_policy.m_dependence.m_task != 0 && - arg_policy.m_dependence.m_task->m_queue != queue ) { - Kokkos::abort("Kokkos spawn given incompatible scheduler and Future"); - } - - //---------------------------------------- - // Give single-thread back-ends an opportunity to clear - // queue of ready tasks before allocating a new task - - queue->iff_single_thread_recursive_execute(); - - //---------------------------------------- - - future_type f ; - - // Allocate task from memory pool - - const size_t alloc_size = - queue->template spawn_allocation_size< FunctorType >(); - - f.m_task = - reinterpret_cast< task_type * >(queue->allocate(alloc_size) ); - - if ( f.m_task ) { - - // Placement new construction - // Reference count starts at two: - // +1 for the matching decrement when task is complete - // +1 for the future - new ( f.m_task ) task_type( std::move(arg_functor) ); - - f.m_task->m_apply = arg_function ; - f.m_task->m_queue = queue ; - f.m_task->m_next = arg_policy.m_dependence.m_task ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = alloc_size ; - f.m_task->m_task_type = arg_policy.m_task_type ; - f.m_task->m_priority = arg_policy.m_priority ; - - Kokkos::memory_fence(); - - // The dependence (if any) is processed immediately - // within the schedule function, as such the dependence's - // reference count does not need to be incremented for - // the assignment. - - queue->schedule_runnable( f.m_task ); - // This task may be updated or executed at any moment, - // even during the call to 'schedule'. - } - - return f ; - } - - template< typename FunctorType , typename A1 , typename A2 > + template KOKKOS_FUNCTION static void - respawn( FunctorType * arg_self - , Future const & arg_dependence - , TaskPriority const & arg_priority - ) - { - // Precondition: task is in Executing state + respawn( + FunctorType* arg_self, + BasicFuture const & arg_dependence, + TaskPriority const & arg_priority + ) { + // Precondition: task is in Executing state - using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + using value_type = typename FunctorType::value_type ; + using task_type = Impl::Task; - task_type * const task = static_cast< task_type * >( arg_self ); + task_type * const task = static_cast< task_type * >( arg_self ); - task->m_priority = static_cast(arg_priority); + task->m_priority = static_cast(arg_priority); - task->add_dependence( arg_dependence.m_task ); + task->add_dependence( arg_dependence.m_task ); - // Postcondition: task is in Executing-Respawn state - } + // Postcondition: task is in Executing-Respawn state + } template< typename FunctorType > KOKKOS_FUNCTION static void - respawn( FunctorType * arg_self - , TaskScheduler const & - , TaskPriority const & arg_priority - ) - { - // Precondition: task is in Executing state + respawn( + FunctorType* arg_self, + BasicTaskScheduler const &, + TaskPriority const & arg_priority + ) + { + // Precondition: task is in Executing state - using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + using value_type = typename FunctorType::value_type; + using task_type = Impl::Task; - task_type * const task = static_cast< task_type * >( arg_self ); + task_type * const task = static_cast< task_type * >( arg_self ); - task->m_priority = static_cast(arg_priority); + task->m_priority = static_cast(arg_priority); - task->add_dependence( (task_base*) 0 ); + task->add_dependence( (task_base*) 0 ); - // Postcondition: task is in Executing-Respawn state - } + // Postcondition: task is in Executing-Respawn state + } //---------------------------------------- /**\brief Return a future that is complete * when all input futures are complete. */ - template< typename A1 , typename A2 > - KOKKOS_FUNCTION static - Future< execution_space > - when_all( Future< A1 , A2 > const arg[] , int narg ) - { - using future_type = Future< execution_space > ; + template + KOKKOS_FUNCTION + BasicFuture< void, scheduler_type > + when_all(BasicFuture const arg[], int narg) + { - future_type f ; + future_type f ; - if ( narg ) { + if ( narg ) { - queue_type * queue = 0 ; + queue_type* q = m_queue; - for ( int i = 0 ; i < narg ; ++i ) { - task_base * const t = arg[i].m_task ; - if ( 0 != t ) { - // Increment reference count to track subsequent assignment. - Kokkos::atomic_increment( &(t->m_ref_count) ); - if ( queue == 0 ) { - queue = static_cast< queue_type * >( t->m_queue ); - } - else if ( queue != static_cast< queue_type * >( t->m_queue ) ) { - Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); - } - } - } + //BasicTaskScheduler const* scheduler_ptr = nullptr; - if ( queue != 0 ) { - - size_t const alloc_size = queue->when_all_allocation_size( narg ); - - f.m_task = - reinterpret_cast< task_base * >( queue->allocate( alloc_size ) ); - - if ( f.m_task ) { - - // Reference count starts at two: - // +1 to match decrement when task completes - // +1 for the future - - new( f.m_task ) task_base(); - - f.m_task->m_queue = queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = alloc_size ; - f.m_task->m_dep_count = narg ; - f.m_task->m_task_type = task_base::Aggregate ; - - // Assign dependences, reference counts were already incremented - - task_base * volatile * const dep = - f.m_task->aggregate_dependences(); - - for ( int i = 0 ; i < narg ; ++i ) { dep[i] = arg[i].m_task ; } - - Kokkos::memory_fence(); - - queue->schedule_aggregate( f.m_task ); - // this when_all may be processed at any moment + for ( int i = 0 ; i < narg ; ++i ) { + task_base * const t = arg[i].m_task ; + if ( nullptr != t ) { + // Increment reference count to track subsequent assignment. + Kokkos::atomic_increment( &(t->m_ref_count) ); + if(q != static_cast< queue_type const* >(t->m_queue)) { + Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); } } } - return f ; + if ( q != 0 ) { // this should probably handle the queue == 0 case, but this is deprecated code anyway + + size_t const alloc_size = q->when_all_allocation_size( narg ); + + f.m_task = + reinterpret_cast< task_base * >( q->allocate( alloc_size ) ); + //f.m_scheduler = *scheduler_ptr; + + if ( f.m_task ) { + + // Reference count starts at two: + // +1 to match decrement when task completes + // +1 for the future + + new( f.m_task ) task_base(); + + f.m_task->m_queue = q; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = static_cast(alloc_size); + f.m_task->m_dep_count = narg ; + f.m_task->m_task_type = task_base::Aggregate ; + + // Assign dependences, reference counts were already incremented + + task_base * volatile * const dep = + f.m_task->aggregate_dependences(); + + for ( int i = 0 ; i < narg ; ++i ) { dep[i] = arg[i].m_task ; } + + Kokkos::memory_fence(); + + q->schedule_aggregate( f.m_task ); + // this when_all may be processed at any moment + } + } } + return f ; + } + template < class F > KOKKOS_FUNCTION - Future< execution_space > + BasicFuture< void, scheduler_type > when_all( int narg , F const func ) { using input_type = decltype( func(0) ); - using future_type = Future< execution_space > ; static_assert( is_future< input_type >::value , "Functor must return a Kokkos::Future" ); - future_type f ; + future_type f ; if ( 0 == narg ) return f ; @@ -711,12 +484,16 @@ public: // +1 for the future new( f.m_task ) task_base(); + //f.m_scheduler = *this; - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = alloc_size ; - f.m_task->m_dep_count = narg ; - f.m_task->m_task_type = task_base::Aggregate ; + //f.m_task->m_scheduler = &f.m_scheduler; + f.m_task->m_queue = m_queue; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = static_cast(alloc_size); + f.m_task->m_dep_count = narg ; + f.m_task->m_task_type = task_base::Aggregate ; + //f.m_task->m_apply = nullptr; + //f.m_task->m_destroy = nullptr; // Assign dependences, reference counts were already incremented @@ -727,9 +504,10 @@ public: const input_type arg_f = func(i); if ( 0 != arg_f.m_task ) { - if ( m_queue != static_cast< queue_type * >( arg_f.m_task->m_queue ) ) { - Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); - } + // Not scheduled, so task scheduler is not yet set + //if ( m_queue != static_cast< BasicTaskScheduler const * >( arg_f.m_task->m_scheduler )->m_queue ) { + // Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); + //} // Increment reference count to track subsequent assignment. Kokkos::atomic_increment( &(arg_f.m_task->m_ref_count) ); dep[i] = arg_f.m_task ; @@ -764,9 +542,9 @@ public: //---------------------------------------- - template< typename S > + template friend - void Kokkos::wait( Kokkos::TaskScheduler< S > const & ); + void wait(Kokkos::BasicTaskScheduler const&); }; @@ -780,84 +558,122 @@ namespace Kokkos { //---------------------------------------------------------------------------- // Construct a TaskTeam execution policy -template< typename T > -Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskTeam - , typename std::conditional< Kokkos::is_future< T >::value , T , - typename Kokkos::Future< typename T::execution_space > >::type - > +template +Impl::TaskPolicyWithPredecessor< + Impl::TaskType::TaskTeam, + Kokkos::BasicFuture +> KOKKOS_INLINE_FUNCTION -TaskTeam( T const & arg - , TaskPriority const & arg_priority = TaskPriority::Regular - ) +TaskTeam( + Kokkos::BasicFuture arg_future, + TaskPriority arg_priority = TaskPriority::Regular +) { - static_assert( Kokkos::is_future::value || - Kokkos::is_scheduler::value - , "Kokkos TaskTeam argument must be Future or TaskScheduler" ); - - return - Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskTeam - , typename std::conditional< Kokkos::is_future< T >::value , T , - typename Kokkos::Future< typename T::execution_space > >::type - >( arg , arg_priority ); + return { std::move(arg_future), arg_priority }; } -template< typename E , typename F > -Kokkos::Impl:: - TaskPolicyData< Kokkos::Impl::TaskBase::TaskTeam , F > +template +Impl::TaskPolicyWithScheduler< + Impl::TaskType::TaskTeam, Scheduler +> KOKKOS_INLINE_FUNCTION -TaskTeam( TaskScheduler const & arg_scheduler - , F const & arg_future - , typename std::enable_if< Kokkos::is_future::value , - TaskPriority >::type const & arg_priority = TaskPriority::Regular - ) +TaskTeam( + Scheduler arg_scheduler, + typename std::enable_if< + Kokkos::is_scheduler::value, + TaskPriority + >::type arg_priority = TaskPriority::Regular +) { - return - Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskTeam , F > - ( arg_scheduler , arg_future , arg_priority ); + return { std::move(arg_scheduler), arg_priority }; +} + +template< + class Scheduler, + class PredecessorFuture +> +Impl::TaskPolicyWithScheduler< + Kokkos::Impl::TaskType::TaskTeam, + Scheduler, + PredecessorFuture +> +KOKKOS_INLINE_FUNCTION +TaskTeam( + Scheduler arg_scheduler, + PredecessorFuture arg_future, + typename std::enable_if< + Kokkos::is_scheduler::value + && Kokkos::is_future::value, + TaskPriority + >::type arg_priority = TaskPriority::Regular +) +{ + static_assert( + std::is_same::value, + "Can't create a task policy from a scheduler and a future from a different scheduler" + ); + + return { std::move(arg_scheduler), std::move(arg_future), arg_priority }; } // Construct a TaskSingle execution policy -template< typename T > -Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskSingle - , typename std::conditional< Kokkos::is_future< T >::value , T , - typename Kokkos::Future< typename T::execution_space > >::type - > +template +Impl::TaskPolicyWithPredecessor< + Impl::TaskType::TaskSingle, + Kokkos::BasicFuture +> KOKKOS_INLINE_FUNCTION -TaskSingle( T const & arg - , TaskPriority const & arg_priority = TaskPriority::Regular - ) +TaskSingle( + Kokkos::BasicFuture arg_future, + TaskPriority arg_priority = TaskPriority::Regular +) { - static_assert( Kokkos::is_future::value || - Kokkos::is_scheduler::value - , "Kokkos TaskSingle argument must be Future or TaskScheduler" ); - - return - Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskSingle - , typename std::conditional< Kokkos::is_future< T >::value , T , - typename Kokkos::Future< typename T::execution_space > >::type - >( arg , arg_priority ); + return { std::move(arg_future), arg_priority }; } -template< typename E , typename F > -Kokkos::Impl:: - TaskPolicyData< Kokkos::Impl::TaskBase::TaskSingle , F > +template +Impl::TaskPolicyWithScheduler< + Impl::TaskType::TaskSingle, Scheduler +> KOKKOS_INLINE_FUNCTION -TaskSingle( TaskScheduler const & arg_scheduler - , F const & arg_future - , typename std::enable_if< Kokkos::is_future::value , - TaskPriority >::type const & arg_priority = TaskPriority::Regular - ) +TaskSingle( + Scheduler arg_scheduler, + typename std::enable_if< + Kokkos::is_scheduler::value, + TaskPriority + >::type arg_priority = TaskPriority::Regular +) { - return - Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskSingle , F > - ( arg_scheduler , arg_future , arg_priority ); + return { std::move(arg_scheduler), arg_priority }; +} + +template< + class Scheduler, + class PredecessorFuture +> +Impl::TaskPolicyWithScheduler< + Kokkos::Impl::TaskType::TaskSingle, + Scheduler, + PredecessorFuture +> +KOKKOS_INLINE_FUNCTION +TaskSingle( + Scheduler arg_scheduler, + PredecessorFuture arg_future, + typename std::enable_if< + Kokkos::is_scheduler::value + && Kokkos::is_future::value, + TaskPriority + >::type arg_priority = TaskPriority::Regular +) +{ + static_assert( + std::is_same::value, + "Can't create a task policy from a scheduler and a future from a different scheduler" + ); + + return { std::move(arg_scheduler), std::move(arg_future), arg_priority }; } //---------------------------------------------------------------------------- @@ -868,34 +684,31 @@ TaskSingle( TaskScheduler const & arg_scheduler * 2) With scheduler or dependence * 3) High, Normal, or Low priority */ -template< int TaskEnum - , typename DepFutureType - , typename FunctorType > -Future< typename FunctorType::value_type - , typename DepFutureType::execution_space > -host_spawn( Impl::TaskPolicyData const & arg_policy - , FunctorType && arg_functor - ) -{ - using exec_space = typename DepFutureType::execution_space ; - using scheduler = TaskScheduler< exec_space > ; +template +typename Scheduler::template future_type_for_functor::type> +host_spawn( + Impl::TaskPolicyWithScheduler arg_policy, + FunctorType&& arg_functor +) { + using scheduler_type = Scheduler; + using task_type = + typename scheduler_type::template runnable_task_type; - typedef Impl::TaskBase< exec_space - , typename FunctorType::value_type - , FunctorType - > task_type ; - - static_assert( TaskEnum == task_type::TaskTeam || - TaskEnum == task_type::TaskSingle - , "Kokkos host_spawn requires TaskTeam or TaskSingle" ); + static_assert( + TaskEnum == Impl::TaskType::TaskTeam || TaskEnum == Impl::TaskType::TaskSingle, + "Kokkos host_spawn requires TaskTeam or TaskSingle" + ); // May be spawning a Cuda task, must use the specialization // to query on-device function pointer. - typename task_type::function_type const ptr = - Kokkos::Impl::TaskQueueSpecialization< exec_space >:: - template get_function_pointer< task_type >(); + typename task_type::function_type ptr; + typename task_type::destroy_type dtor; + Kokkos::Impl::TaskQueueSpecialization< scheduler_type >:: + template get_function_pointer< task_type >(ptr, dtor); - return scheduler::spawn( arg_policy , ptr , std::move(arg_functor) ); + return scheduler_type::spawn( + std::move(arg_policy), ptr, dtor, std::forward(arg_functor) + ); } /**\brief A task spawns a task with options @@ -904,39 +717,38 @@ host_spawn( Impl::TaskPolicyData const & arg_policy * 2) With scheduler or dependence * 3) High, Normal, or Low priority */ -template< int TaskEnum - , typename DepFutureType - , typename FunctorType > -Future< typename FunctorType::value_type - , typename DepFutureType::execution_space > +template +typename Scheduler::template future_type_for_functor::type> KOKKOS_INLINE_FUNCTION -task_spawn( Impl::TaskPolicyData const & arg_policy - , FunctorType && arg_functor - ) +task_spawn( + Impl::TaskPolicyWithScheduler arg_policy, + FunctorType&& arg_functor +) { - using exec_space = typename DepFutureType::execution_space ; - using scheduler = TaskScheduler< exec_space > ; + using scheduler_type = Scheduler; - typedef Impl::TaskBase< exec_space - , typename FunctorType::value_type - , FunctorType - > task_type ; + using task_type = + typename scheduler_type::template runnable_task_type; -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) && \ - defined( KOKKOS_ENABLE_CUDA ) + #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) && \ + defined( KOKKOS_ENABLE_CUDA ) - static_assert( ! std::is_same< Kokkos::Cuda , exec_space >::value - , "Error calling Kokkos::task_spawn for Cuda space within Host code" ); + static_assert( ! std::is_same< Kokkos::Cuda , typename Scheduler::execution_space >::value + , "Error calling Kokkos::task_spawn for Cuda space within Host code" ); -#endif + #endif - static_assert( TaskEnum == task_type::TaskTeam || - TaskEnum == task_type::TaskSingle - , "Kokkos host_spawn requires TaskTeam or TaskSingle" ); + static_assert( + TaskEnum == Impl::TaskType::TaskTeam || TaskEnum == Impl::TaskType::TaskSingle, + "Kokkos task_spawn requires TaskTeam or TaskSingle" + ); typename task_type::function_type const ptr = task_type::apply ; + typename task_type::destroy_type const dtor = task_type::destroy ; - return scheduler::spawn( arg_policy , ptr , std::move(arg_functor) ); + return scheduler_type::spawn(std::move(arg_policy), ptr, dtor, + std::forward(arg_functor) + ); } /**\brief A task respawns itself with options @@ -956,36 +768,42 @@ respawn( FunctorType * arg_self Kokkos::is_scheduler::value , "Kokkos respawn argument must be Future or TaskScheduler" ); - TaskScheduler< typename T::execution_space >:: - respawn( arg_self , arg , arg_priority ); + T::scheduler_type::respawn( + arg_self , arg , arg_priority + ); } //---------------------------------------------------------------------------- -template< typename A1 , typename A2 > -KOKKOS_INLINE_FUNCTION -Future< typename Future< A1 , A2 >::execution_space > -when_all( Future< A1 , A2 > const arg[] - , int narg - ) -{ - return TaskScheduler< typename Future::execution_space >:: - when_all( arg , narg ); -} +//template +//KOKKOS_INLINE_FUNCTION +//BasicFuture +//when_all(BasicFuture const arg[], int narg) +//{ +// return BasicFuture::scheduler_type::when_all(arg, narg); +//} //---------------------------------------------------------------------------- // Wait for all runnable tasks to complete -template< typename ExecSpace > +template inline -void wait( TaskScheduler< ExecSpace > const & scheduler ) -{ scheduler.m_queue->execute(); } +void wait(BasicTaskScheduler const& scheduler) +{ + using scheduler_type = BasicTaskScheduler; + scheduler_type::specialization::execute(scheduler); + //scheduler.m_queue->execute(); +} } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +//////////////////////////////////////////////////////////////////////////////// +// END OLD CODE +//////////////////////////////////////////////////////////////////////////////// + #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp new file mode 100644 index 0000000000..79d502c729 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp @@ -0,0 +1,249 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TASKSCHEDULER_FWD_HPP +#define KOKKOS_TASKSCHEDULER_FWD_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// Forward declarations used in Impl::TaskQueue + +template +class BasicFuture; + +template +class SimpleTaskScheduler; + +template +class BasicTaskScheduler; + +template< typename Space > +struct is_scheduler : public std::false_type {}; + +template +struct is_scheduler> : public std::true_type {}; + +template +struct is_scheduler> : public std::true_type {}; + +enum class TaskPriority : int { + High = 0, + Regular = 1, + Low = 2 +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template +class MemoryPool; + +namespace Impl { + +template +class TaskNode; + +class TaskBase; + +/*\brief Implementation data for task data management, access, and execution. + * (Deprecated) + * CRTP Inheritance structure to allow static_cast from the + * task root type and a task's FunctorType. + * + * TaskBase< Space , ResultType , FunctorType > + * : TaskBase< Space , ResultType , void > + * , FunctorType + * { ... }; + * + * TaskBase< Space , ResultType , void > + * : TaskBase< Space , void , void > + * { ... }; + */ +template< typename Space , typename ResultType , typename FunctorType > +class Task; + +class TaskQueueBase; + +template< typename Space, typename MemorySpace> +class TaskQueue; + +template< typename ExecSpace, typename MemorySpace> +class TaskQueueMultiple; + +template< + typename ExecSpace, typename MemSpace, typename TaskQueueTraits, + class MemoryPool = Kokkos::MemoryPool> +> +class SingleTaskQueue; + +template< typename ExecSpace, typename MemSpace, typename TaskQueueTraits, class MemoryPool> +class MultipleTaskQueue; + +struct TaskQueueTraitsLockBased; + +template +struct TaskQueueTraitsChaseLev; + +template< typename ResultType > +struct TaskResult; + +struct TaskSchedulerBase; + +template +struct default_tasking_memory_space_for_execution_space +{ + using type = typename ExecSpace::memory_space; +}; + +#if defined( KOKKOS_ENABLE_CUDA ) +template <> +struct default_tasking_memory_space_for_execution_space +{ + using type = Kokkos::CudaUVMSpace; +}; +#endif + +template +using default_tasking_memory_space_for_execution_space_t = + typename default_tasking_memory_space_for_execution_space::type; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< typename Space > +using DeprecatedTaskScheduler = BasicTaskScheduler< + Space, + Impl::TaskQueue> +>; + +template< typename Space > +using DeprecatedTaskSchedulerMultiple = BasicTaskScheduler< + Space, + Impl::TaskQueueMultiple> +>; + +template< typename Space > +using TaskScheduler = SimpleTaskScheduler< + Space, + Impl::SingleTaskQueue< + Space, + Impl::default_tasking_memory_space_for_execution_space_t, + Impl::TaskQueueTraitsLockBased + > +>; + +template< typename Space > +using TaskSchedulerMultiple = SimpleTaskScheduler< + Space, + Impl::MultipleTaskQueue< + Space, + Impl::default_tasking_memory_space_for_execution_space_t, + Impl::TaskQueueTraitsLockBased, + Kokkos::MemoryPool< + Kokkos::Device< + Space, + Impl::default_tasking_memory_space_for_execution_space_t + > + > + > +>; + +template< typename Space > +using ChaseLevTaskScheduler = SimpleTaskScheduler< + Space, + Impl::MultipleTaskQueue< + Space, + Impl::default_tasking_memory_space_for_execution_space_t, + Impl::TaskQueueTraitsChaseLev<>, + Kokkos::MemoryPool< + Kokkos::Device< + Space, + Impl::default_tasking_memory_space_for_execution_space_t + > + > + > +>; + +template +void wait(BasicTaskScheduler const&); + +namespace Impl { + +struct TaskSchedulerBase { }; + +class TaskQueueBase { }; + +template +class TaskQueueSpecializationConstrained { }; + +template +struct TaskQueueSpecialization : TaskQueueSpecializationConstrained { }; + +template +struct TaskPolicyData; + + +} // end namespace Impl + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_TASKSCHEDULER_FWD_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Threads.hpp b/lib/kokkos/core/src/Kokkos_Threads.hpp index d5e684e4ea..03dab1acaf 100644 --- a/lib/kokkos/core/src/Kokkos_Threads.hpp +++ b/lib/kokkos/core/src/Kokkos_Threads.hpp @@ -105,7 +105,13 @@ public: /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. + static void impl_static_fence(); + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence(); + #else + void fence() const; + #endif /** \brief Return the maximum amount of concurrency. */ static int concurrency(); diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp index 754a0ab8c0..3fe8e6f067 100644 --- a/lib/kokkos/core/src/Kokkos_View.hpp +++ b/lib/kokkos/core/src/Kokkos_View.hpp @@ -74,7 +74,11 @@ template< class DataType , class ArrayLayout struct ViewDataAnalysis ; template< class , class ... > -class ViewMapping { public: enum { is_assignable = false }; }; +class ViewMapping { + public: + enum { is_assignable_data_type = false }; + enum { is_assignable = false }; +}; @@ -97,6 +101,7 @@ std::size_t count_valid_integers(const IntType i0, } +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE KOKKOS_INLINE_FUNCTION void runtime_check_rank_device(const size_t dyn_rank, const bool is_void_spec, @@ -109,8 +114,6 @@ void runtime_check_rank_device(const size_t dyn_rank, const size_t i6, const size_t i7 ){ -#ifndef KOKKOS_ENABLE_DEPRECATED_CODE - if ( is_void_spec ) { const size_t num_passed_args = count_valid_integers(i0, i1, i2, i3, i4, i5, i6, i7); @@ -121,10 +124,25 @@ void runtime_check_rank_device(const size_t dyn_rank, } } -#endif } +#else +KOKKOS_INLINE_FUNCTION +void runtime_check_rank_device(const size_t , + const bool , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t ){ + +} +#endif #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE KOKKOS_INLINE_FUNCTION void runtime_check_rank_host(const size_t dyn_rank, const bool is_void_spec, @@ -137,7 +155,6 @@ void runtime_check_rank_host(const size_t dyn_rank, const size_t i6, const size_t i7, const std::string & label ){ -#ifndef KOKKOS_ENABLE_DEPRECATED_CODE if ( is_void_spec ) { const size_t num_passed_args = count_valid_integers(i0, i1, i2, i3, @@ -150,8 +167,20 @@ void runtime_check_rank_host(const size_t dyn_rank, Kokkos::abort(message.c_str()) ; } } -#endif } +#else +KOKKOS_INLINE_FUNCTION +void runtime_check_rank_host(const size_t , + const bool , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , const std::string &){} +#endif #endif } /* namespace Impl */ @@ -362,8 +391,8 @@ public: typedef typename MemorySpace::size_type size_type ; enum { is_hostspace = std::is_same< MemorySpace , HostSpace >::value }; - enum { is_managed = MemoryTraits::Unmanaged == 0 }; - enum { is_random_access = MemoryTraits::RandomAccess == 1 }; + enum { is_managed = MemoryTraits::is_unmanaged == 0 }; + enum { is_random_access = MemoryTraits::is_random_access == 1 }; //------------------------------------ }; @@ -1965,7 +1994,10 @@ public: template< class RT , class ... RP > KOKKOS_INLINE_FUNCTION - View( const View & rhs ) + View( const View & rhs, + typename std::enable_if::traits , typename traits::specialize >::is_assignable_data_type>::type* = 0 + ) : m_track( rhs.m_track , traits::is_managed ) , m_map() { @@ -1977,7 +2009,9 @@ public: template< class RT , class ... RP > KOKKOS_INLINE_FUNCTION - View & operator = ( const View & rhs ) + typename std::enable_if::traits , typename traits::specialize >::is_assignable_data_type, + View>::type & operator = ( const View & rhs ) { typedef typename View::traits SrcTraits ; typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize > Mapping ; @@ -1994,7 +2028,7 @@ public: template< class RT , class ... RP , class Arg0 , class ... Args > KOKKOS_INLINE_FUNCTION View( const View< RT , RP... > & src_view - , const Arg0 & arg0 , Args ... args ) + , const Arg0 arg0 , Args ... args ) : m_track( src_view.m_track , traits::is_managed ) , m_map() { @@ -2077,7 +2111,7 @@ public: } // Copy the input allocation properties with possibly defaulted properties - alloc_prop prop( arg_prop ); + alloc_prop prop_copy( arg_prop ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) @@ -2087,18 +2121,18 @@ public: // Fence using the trait's executon space (which will be Kokkos::Cuda) // to avoid incomplete type errors from usng Kokkos::Cuda directly. if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ Kokkos::Impl::SharedAllocationRecord<> * - record = m_map.allocate_shared( prop , arg_layout ); + record = m_map.allocate_shared( prop_copy , arg_layout ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ diff --git a/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp b/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp index 33a0579df5..dd5e29a400 100644 --- a/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp @@ -55,7 +55,7 @@ class WorkGraphExec; namespace Kokkos { template< class ... Properties > -class WorkGraphPolicy +class WorkGraphPolicy: public Kokkos::Impl::PolicyTraits { public: @@ -64,7 +64,6 @@ public: using traits = Kokkos::Impl::PolicyTraits; using index_type = typename traits::index_type; using member_type = index_type; - using work_tag = typename traits::work_tag; using execution_space = typename traits::execution_space; using memory_space = typename execution_space::memory_space; using graph_type = Kokkos::Crs; @@ -217,7 +216,7 @@ public: using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this, policy_type(0, m_queue.size())); closure.execute(); - execution_space::fence(); + execution_space().fence(); } { // execute-after counts @@ -225,7 +224,7 @@ public: using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this,policy_type(0,m_graph.entries.size())); closure.execute(); - execution_space::fence(); + execution_space().fence(); } { // Scheduling ready tasks @@ -233,7 +232,7 @@ public: using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this,policy_type(0,m_graph.numRows())); closure.execute(); - execution_space::fence(); + execution_space().fence(); } } }; @@ -256,4 +255,8 @@ public: #include "Threads/Kokkos_Threads_WorkGraphPolicy.hpp" #endif +#ifdef KOKKOS_ENABLE_HPX +#include "HPX/Kokkos_HPX_WorkGraphPolicy.hpp" +#endif + #endif /* #define KOKKOS_WORKGRAPHPOLICY_HPP */ diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile index c2dbddf45e..ae8dc17510 100644 --- a/lib/kokkos/core/src/Makefile +++ b/lib/kokkos/core/src/Makefile @@ -40,6 +40,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) CONDITIONAL_COPIES += copy-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + CONDITIONAL_COPIES += copy-hpx +endif + ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) CONDITIONAL_COPIES += copy-rocm endif @@ -91,6 +95,10 @@ copy-openmp: mkdir mkdir -p $(PREFIX)/include/OpenMP $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP +copy-hpx: mkdir + mkdir -p $(PREFIX)/include/HPX + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_HPX) $(PREFIX)/include/HPX + copy-rocm: mkdir mkdir -p $(PREFIX)/include/ROCm $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm diff --git a/lib/kokkos/core/src/Makefile.generate_build_files b/lib/kokkos/core/src/Makefile.generate_build_files index cc856ee9a3..651b9d5fe9 100644 --- a/lib/kokkos/core/src/Makefile.generate_build_files +++ b/lib/kokkos/core/src/Makefile.generate_build_files @@ -84,6 +84,7 @@ generate_build_settings: $(KOKKOS_CONFIG_HEADER) $(KOKKOS_PKGCONFIG) @$(call kokkos_append_var,KOKKOS_HEADERS_IMPL,'STRING "Kokkos headers impl list"') @$(call kokkos_append_var,KOKKOS_HEADERS_CUDA,'STRING "Kokkos headers Cuda list"') @$(call kokkos_append_var,KOKKOS_HEADERS_OPENMP,'STRING "Kokkos headers OpenMP list"') + @$(call kokkos_append_var,KOKKOS_HEADERS_HPX,'STRING "Kokkos headers HPX list"') @$(call kokkos_append_var,KOKKOS_HEADERS_ROCM,'STRING "Kokkos headers ROCm list"') @$(call kokkos_append_var,KOKKOS_HEADERS_THREADS,'STRING "Kokkos headers Threads list"') @$(call kokkos_append_var,KOKKOS_HEADERS_QTHREADS,'STRING "Kokkos headers QThreads list"') @@ -103,11 +104,13 @@ generate_build_settings: $(KOKKOS_CONFIG_HEADER) $(KOKKOS_PKGCONFIG) @$(call kokkos_append_string,"#Internal settings which need to propagated for Kokkos examples") @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_CUDA,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_OPENMP,'STRING ""') + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_HPX,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_PTHREADS,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_SERIAL,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_ROCM,'STRING ""') + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_HPX,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_QTHREADS,'STRING ""') # Not in original cmake gen - @$(call kokkos_append_cmakefile "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS KOKKOS_INTERNAL_USE_SERIAL)") + @$(call kokkos_append_cmakefile "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_HPX KOKKOS_INTERNAL_USE_PTHREADS KOKKOS_INTERNAL_USE_SERIAL)") @$(call kokkos_append_makefile,"") @$(call kokkos_append_makefile,"#Fake kokkos-clean target") @$(call kokkos_append_makefile,"kokkos-clean:") diff --git a/lib/kokkos/core/src/Makefile.generate_header_lists b/lib/kokkos/core/src/Makefile.generate_header_lists index cd308bf8f4..afbefb3806 100644 --- a/lib/kokkos/core/src/Makefile.generate_header_lists +++ b/lib/kokkos/core/src/Makefile.generate_header_lists @@ -22,6 +22,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + KOKKOS_HEADERS_HPX += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) +endif + ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) KOKKOS_HEADERS_ROCM += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp) endif diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp index e57b61d7cb..1946c10741 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp @@ -472,6 +472,10 @@ int OpenMP::concurrency() { return Impl::g_openmp_hardware_max_threads; } +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE +void OpenMP::fence() const {} +#endif + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE void OpenMP::initialize( int thread_count , int, int ) diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp index 43fa7888cf..5178199ac2 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp @@ -184,8 +184,13 @@ int OpenMP::impl_thread_pool_rank() noexcept #endif } +inline +void OpenMP::impl_static_fence( OpenMP const& instance ) noexcept {} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE inline void OpenMP::fence( OpenMP const& instance ) noexcept {} +#endif inline bool OpenMP::is_asynchronous( OpenMP const& instance ) noexcept diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index e0bb572a3b..ae6b49f650 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -128,11 +128,10 @@ public: OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); @@ -228,11 +227,10 @@ public: OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); @@ -703,11 +701,10 @@ public: ); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); @@ -840,11 +837,10 @@ public: ); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); @@ -1005,11 +1001,10 @@ public: , thread_local_size ); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 2f2c768460..3b1c187c6d 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -48,6 +48,8 @@ #include #include +#include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -55,200 +57,44 @@ namespace Kokkos { namespace Impl { -template class TaskQueue< Kokkos::OpenMP > ; +template class TaskQueue< Kokkos::OpenMP, typename Kokkos::OpenMP::memory_space > ; -class HostThreadTeamDataSingleton : private HostThreadTeamData { -private: - - HostThreadTeamDataSingleton() : HostThreadTeamData() - { - Kokkos::OpenMP::memory_space space ; - const size_t num_pool_reduce_bytes = 32 ; - const size_t num_team_reduce_bytes = 32 ; - const size_t num_team_shared_bytes = 1024 ; - const size_t num_thread_local_bytes = 1024 ; - const size_t alloc_bytes = - HostThreadTeamData::scratch_size( num_pool_reduce_bytes - , num_team_reduce_bytes - , num_team_shared_bytes - , num_thread_local_bytes ); - - HostThreadTeamData::scratch_assign - ( space.allocate( alloc_bytes ) - , alloc_bytes - , num_pool_reduce_bytes - , num_team_reduce_bytes - , num_team_shared_bytes - , num_thread_local_bytes ); - } - - ~HostThreadTeamDataSingleton() - { - Kokkos::OpenMP::memory_space space ; - space.deallocate( HostThreadTeamData::scratch_buffer() - , HostThreadTeamData::scratch_bytes() ); - } - -public: - - static HostThreadTeamData & singleton() - { - static HostThreadTeamDataSingleton s ; - return s ; - } -}; - -//---------------------------------------------------------------------------- - -void TaskQueueSpecialization< Kokkos::OpenMP >::execute - ( TaskQueue< Kokkos::OpenMP > * const queue ) +HostThreadTeamData& HostThreadTeamDataSingleton::singleton() { - using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< execution_space > ; - - static task_root_type * const end = - (task_root_type *) task_root_type::EndTag ; - - - HostThreadTeamData & team_data_single = - HostThreadTeamDataSingleton::singleton(); - - Impl::OpenMPExec * instance = t_openmp_instance; -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); -#else - const int pool_size = OpenMP::impl_thread_pool_size(); -#endif - - const int team_size = 1; // Threads per core - instance->resize_thread_data( 0 /* global reduce buffer */ - , 512 * team_size /* team reduce buffer */ - , 0 /* team shared buffer */ - , 0 /* thread local buffer */ - ); - - #pragma omp parallel num_threads(pool_size) - { - Impl::HostThreadTeamData & self = *(instance->get_thread_data()); - - // Organizing threads into a team performs a barrier across the - // entire pool to insure proper initialization of the team - // rendezvous mechanism before a team rendezvous can be performed. - - if ( self.organize_team( team_size ) ) { - - Member single_exec( team_data_single ); - Member team_exec( self ); - - // Loop until all queues are empty and no tasks in flight - - task_root_type * task = 0 ; - - do { - // Each team lead attempts to acquire either a thread team task - // or a single thread task for the team. - - if ( 0 == team_exec.team_rank() ) { - - bool leader_loop = false ; - - do { - - if ( 0 != task && end != task ) { - // team member #0 completes the previously executed task, - // completion may delete the task - queue->complete( task ); - } - - // If 0 == m_ready_count then set task = 0 - - task = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; - - // Attempt to acquire a task - // Loop by priority and then type - for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); - } - } - - // If still tasks are still executing - // and no task could be acquired - // then continue this leader loop - leader_loop = end == task ; - - if ( ( ! leader_loop ) && - ( 0 != task ) && - ( task_root_type::TaskSingle == task->m_task_type ) ) { - - // if a single thread task then execute now - - (*task->m_apply)( task , & single_exec ); - - leader_loop = true ; - } - } while ( leader_loop ); - } - - // Team lead either found 0 == m_ready_count or a team task - // Team lead broadcast acquired task: - - team_exec.team_broadcast( task , 0); - - if ( 0 != task ) { // Thread Team Task - - (*task->m_apply)( task , & team_exec ); - - // The m_apply function performs a barrier - } - } while( 0 != task ); - } - self.disband_team(); - } + static HostThreadTeamDataSingleton s; + return s; } -void TaskQueueSpecialization< Kokkos::OpenMP >:: - iff_single_thread_recursive_execute - ( TaskQueue< Kokkos::OpenMP > * const queue ) +HostThreadTeamDataSingleton::HostThreadTeamDataSingleton() + : HostThreadTeamData() { - using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< execution_space > ; + Kokkos::OpenMP::memory_space space ; + const size_t num_pool_reduce_bytes = 32 ; + const size_t num_team_reduce_bytes = 32 ; + const size_t num_team_shared_bytes = 1024 ; + const size_t num_thread_local_bytes = 1024 ; + const size_t alloc_bytes = + HostThreadTeamData::scratch_size( num_pool_reduce_bytes + , num_team_reduce_bytes + , num_team_shared_bytes + , num_thread_local_bytes ); -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - if ( 1 == OpenMP::thread_pool_size() ) -#else - if ( 1 == OpenMP::impl_thread_pool_size() ) -#endif - { + HostThreadTeamData::scratch_assign + ( space.allocate( alloc_bytes ) + , alloc_bytes + , num_pool_reduce_bytes + , num_team_reduce_bytes + , num_team_shared_bytes + , num_thread_local_bytes ); +} - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - - HostThreadTeamData & team_data_single = - HostThreadTeamDataSingleton::singleton(); - - Member single_exec( team_data_single ); - - task_root_type * task = end ; - - do { - - task = end ; - - // Loop by priority and then type - for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); - } - } - - if ( end == task ) break ; - - (*task->m_apply)( task , & single_exec ); - - queue->complete( task ); - - } while(1); - } +HostThreadTeamDataSingleton::~HostThreadTeamDataSingleton() +{ + Kokkos::OpenMP::memory_space space ; + space.deallocate( + HostThreadTeamData::scratch_buffer(), + static_cast(HostThreadTeamData::scratch_bytes()) + ); } }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index b99c149b06..4029c015b3 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -47,38 +47,388 @@ #include #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) +#include + +#include +#include + +#include +#include + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { -template<> -class TaskQueueSpecialization< Kokkos::OpenMP > +class HostThreadTeamDataSingleton : private HostThreadTeamData { +private: + + HostThreadTeamDataSingleton(); + ~HostThreadTeamDataSingleton(); + +public: + + static HostThreadTeamData & singleton(); + +}; + +// Hack this as a partial specialization for now +// TODO @tasking @cleanup DSH Make this the general class template and make the old code the partial specialization +template +class TaskQueueSpecialization< + SimpleTaskScheduler +> { public: - using execution_space = Kokkos::OpenMP ; - using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; - using task_base_type = Kokkos::Impl::TaskBase< void , void , void > ; - using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; + using execution_space = Kokkos::OpenMP; + using scheduler_type = SimpleTaskScheduler; + using member_type = TaskTeamMemberAdapter< + Kokkos::Impl::HostThreadTeamMember, + scheduler_type + >; + using memory_space = Kokkos::HostSpace; - // Must specify memory space - using memory_space = Kokkos::HostSpace ; - - static - void iff_single_thread_recursive_execute( queue_type * const ); + enum : int { max_league_size = HostThreadTeamData::max_pool_members }; // Must provide task queue execution function - static void execute( queue_type * const ); + static void execute(scheduler_type const& scheduler) + { + using task_base_type = typename scheduler_type::task_base_type; - template< typename TaskType > - static - typename TaskType::function_type - get_function_pointer() { return TaskType::apply ; } + // Unused; ChaseLev queue still needs worker ID even in single case (so we need to use + // the thread data from inside of the parallel region. Team size is fixed at 1 for now + // anyway + //HostThreadTeamData& team_data_single = HostThreadTeamDataSingleton::singleton(); + + // TODO @tasking @generalization DSH use scheduler.get_execution_space().impl() (or something like that) instead of the thread-local variable + Impl::OpenMPExec* instance = t_openmp_instance; + const int pool_size = get_max_team_count(scheduler.get_execution_space()); + + // TODO @tasking @new_feature DSH allow team sizes other than 1 + const int team_size = 1; // Threads per core + instance->resize_thread_data( + 0, /* global reduce buffer */ + 512 * team_size, /* team reduce buffer */ + 0, /* team shared buffer */ + 0 /* thread local buffer */ + ); + assert(pool_size % team_size == 0); + + auto& queue = scheduler.queue(); + + //queue.initialize_team_queues(pool_size / team_size); + + #pragma omp parallel num_threads(pool_size) + { + Impl::HostThreadTeamData & self = *(instance->get_thread_data()); + + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. + + // organize_team() returns true if this is an active team member + if(self.organize_team(team_size)) { + + member_type single_exec(scheduler, self); + member_type team_exec(scheduler, self); + + auto& team_scheduler = team_exec.scheduler(); + + auto current_task = OptionalRef(nullptr); + + while(not queue.is_done()) { + + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. + if(team_exec.team_rank() == 0) { + + // loop while both: + // - the queue is not done + // - the most recently popped task is a single task or empty + while(not queue.is_done()) { + + current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info()); + + if(current_task) { + + if(current_task->is_team_runnable()) { + // break out of the team leader loop to run the team task + break; + } + else { + KOKKOS_ASSERT(current_task->is_single_runnable()); + current_task->as_runnable_task().run(single_exec); + // Respawns are handled in the complete function + queue.complete( + (*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info() + ); + } + + } // end if current_task is not null + + current_task = nullptr; + + } // end team leader loop + + } + + // Otherwise, make sure everyone in the team has the same task + team_exec.team_broadcast(current_task, 0); + + if(current_task) { + KOKKOS_ASSERT(current_task->is_team_runnable()); + current_task->as_runnable_task().run(team_exec); + + if(team_exec.team_rank() == 0) { + // Respawns are handled in the complete function + queue.complete( + (*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info() + ); + } + + } + + } + } + self.disband_team(); + } // end pragma omp parallel + } + + static uint32_t + get_max_team_count(execution_space const& espace) { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + return static_cast(espace.thread_pool_size()); +#else + return static_cast(espace.impl_thread_pool_size()); +#endif + } + + // TODO @tasking @optimization DSH specialize this for trivially destructible types + template + static void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } }; -extern template class TaskQueue< Kokkos::OpenMP > ; + +template +class TaskQueueSpecializationConstrained< + Scheduler, + typename std::enable_if< + std::is_same::value + >::type +> +{ +public: + + using execution_space = Kokkos::OpenMP; + using scheduler_type = Scheduler; + using member_type = TaskTeamMemberAdapter< + Kokkos::Impl::HostThreadTeamMember, + scheduler_type + >; + using memory_space = Kokkos::HostSpace ; + + enum : int { max_league_size = HostThreadTeamData::max_pool_members }; + + static + void iff_single_thread_recursive_execute( scheduler_type const& scheduler ) { + using task_base_type = typename scheduler_type::task_base; + using queue_type = typename scheduler_type::queue_type; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + if ( 1 == OpenMP::thread_pool_size() ) +#else + if ( 1 == OpenMP::impl_thread_pool_size() ) +#endif + { + + task_base_type * const end = (task_base_type *) task_base_type::EndTag ; + + HostThreadTeamData & team_data_single = + HostThreadTeamDataSingleton::singleton(); + + member_type single_exec( scheduler, team_data_single ); + + task_base_type * task = end ; + + do { + + task = end ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & scheduler.m_queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task , & single_exec ); + + scheduler.m_queue->complete( task ); + + } while(1); + } + + } + + // Must provide task queue execution function + static void execute(scheduler_type const& scheduler) + { + using task_base_type = typename scheduler_type::task_base; + using queue_type = typename scheduler_type::queue_type; + + static task_base_type * const end = + (task_base_type *) task_base_type::EndTag ; + + constexpr task_base_type* no_more_tasks_sentinel = nullptr; + + + HostThreadTeamData & team_data_single = + HostThreadTeamDataSingleton::singleton(); + + Impl::OpenMPExec * instance = t_openmp_instance; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + const int pool_size = OpenMP::thread_pool_size(); +#else + const int pool_size = OpenMP::impl_thread_pool_size(); +#endif + + const int team_size = 1; // Threads per core + instance->resize_thread_data( 0 /* global reduce buffer */ + , 512 * team_size /* team reduce buffer */ + , 0 /* team shared buffer */ + , 0 /* thread local buffer */ + ); + assert(pool_size % team_size == 0); + auto& queue = scheduler.queue(); + queue.initialize_team_queues(pool_size / team_size); + +#pragma omp parallel num_threads(pool_size) + { + Impl::HostThreadTeamData & self = *(instance->get_thread_data()); + + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. + + // organize_team() returns true if this is an active team member + if ( self.organize_team( team_size ) ) { + + member_type single_exec(scheduler, team_data_single); + member_type team_exec(scheduler, self); + + auto& team_queue = team_exec.scheduler().queue(); + + // Loop until all queues are empty and no tasks in flight + + task_base_type * task = no_more_tasks_sentinel; + + + do { + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. + + if ( 0 == team_exec.team_rank() ) { + + bool leader_loop = false ; + + do { + + if ( task != no_more_tasks_sentinel && task != end ) { + // team member #0 completes the previously executed task, + // completion may delete the task + team_queue.complete( task ); + } + + // If 0 == m_ready_count then set task = 0 + + if( *((volatile int *) & team_queue.m_ready_count) > 0 ) { + task = end; + // Attempt to acquire a task + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & team_queue.m_ready[i][j] ); + } + } + } + else { + // returns nullptr if and only if all other queues have a ready + // count of 0 also. Otherwise, returns a task from another queue + // or `end` if one couldn't be popped + task = team_queue.attempt_to_steal_task(); + #if 0 + if(task != no_more_tasks_sentinel && task != end) { + std::printf("task stolen on rank %d\n", team_exec.league_rank()); + } + #endif + } + + // If still tasks are still executing + // and no task could be acquired + // then continue this leader loop + if(task == end) { + // this means that the ready task count was not zero, but we + // couldn't pop a task (because, for instance, someone else + // got there before us + leader_loop = true; + } + else if ( ( task != no_more_tasks_sentinel ) && + ( task_base_type::TaskSingle == task->m_task_type ) ) { + + // if a single thread task then execute now + + (*task->m_apply)(task, &single_exec); + + leader_loop = true; + } + else { + leader_loop = false; + } + } while ( leader_loop ); + } + + // Team lead either found 0 == m_ready_count or a team task + // Team lead broadcast acquired task: + + team_exec.team_broadcast( task , 0); + + if ( task != no_more_tasks_sentinel ) { // Thread Team Task + + (*task->m_apply)( task , & team_exec ); + + // The m_apply function performs a barrier + } + } while( task != no_more_tasks_sentinel ); + } + self.disband_team(); + } // end pragma omp parallel + } + + template< typename TaskType > + static void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } +}; + +extern template class TaskQueue< Kokkos::OpenMP, typename Kokkos::OpenMP::memory_space > ; }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp index e8fbc467e0..38b062bdc0 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp @@ -74,6 +74,21 @@ public: return *this; } + template + friend class TeamPolicyInternal; + + template< class ... OtherProperties > + TeamPolicyInternal(const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_alloc = p.m_team_alloc; + m_team_iter = p.m_team_iter; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } //---------------------------------------- #ifdef KOKKOS_ENABLE_DEPRECATED_CODE @@ -208,7 +223,7 @@ public: } /** \brief Specify league size, request team size */ - TeamPolicyInternal( typename traits::execution_space & + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , int team_size_request , int /* vector_length_request */ = 1 ) @@ -217,14 +232,18 @@ public: , m_chunk_size(0) { init( league_size_request , team_size_request ); } - TeamPolicyInternal( typename traits::execution_space & + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & /* team_size_request */ , int /* vector_length_request */ = 1) : m_team_scratch_size { 0 , 0 } , m_thread_scratch_size { 0 , 0 } , m_chunk_size(0) +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE { init( league_size_request , traits::execution_space::thread_pool_size(2) ); } +#else + { init( league_size_request , traits::execution_space::impl_thread_pool_size(2) ); } +#endif TeamPolicyInternal( int league_size_request , int team_size_request diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp index 879d5d2d24..0742575cb8 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp @@ -76,11 +76,10 @@ public: void execute() { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { // Spin until COMPLETED_TOKEN. // END_TOKEN indicates no work is currently available. diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp index fc31a91b22..c93a88606d 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp @@ -697,13 +697,13 @@ namespace Impl { const iType increment; inline - TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const iType& count): + TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, iType count): start( thread_.team_rank() ), end( count ), increment( thread_.team_size() ) {} inline - TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const iType& begin_, const iType& end_): + TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, iType begin_, iType end_): start( begin_+thread_.team_rank() ), end( end_ ), increment( thread_.team_size() ) @@ -718,13 +718,13 @@ namespace Impl { const index_type increment; inline - ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const index_type& count): + ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, index_type count): start( thread_.m_vector_lane ), end( count ), increment( thread_.m_vector_length ) {} inline - ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const index_type& begin_, const index_type& end_): + ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, index_type begin_, index_type end_): start( begin_+thread_.m_vector_lane ), end( end_ ), increment( thread_.m_vector_length ) @@ -734,28 +734,28 @@ namespace Impl { template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct - TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& count) { + TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, iType count) { return Impl::TeamThreadRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct - TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& begin, const iType& end) { + TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, iType begin, iType end) { return Impl::TeamThreadRangeBoundariesStruct(thread,begin,end); } template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& count) { + ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType count) { return Impl::ThreadVectorRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& begin, const iType& end) { + ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType begin, iType end) { return Impl::ThreadVectorRangeBoundariesStruct(thread,begin,end); } diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp index 5ad90436af..7b1b63befe 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp @@ -51,7 +51,6 @@ #include -#include #include #include diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp index 205e6a2955..3e81883278 100644 --- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp +++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp @@ -227,7 +227,7 @@ struct ROCmParallelLaunch< DriverType //#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) // ROCM_SAFE_CALL( rocmGetLastError() ); -// Kokkos::ROCm::fence(); +// Kokkos::ROCm().fence(); //#endif } } diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp index edd1c12e45..48654555b2 100644 --- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp +++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp @@ -86,6 +86,21 @@ public: return *this; } + template + friend class TeamPolicyInternal; + + template< class ... OtherProperties > + TeamPolicyInternal(const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + TeamPolicyInternal() : m_league_size( 0 ) , m_team_size( 0 ) @@ -1099,7 +1114,7 @@ public: ROCmParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute - ROCM::fence(); + ROCM().fence(); if ( m_result_ptr ) { const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); @@ -1494,14 +1509,14 @@ namespace Kokkos { template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct - TeamThreadRange(const Impl::ROCmTeamMember& thread, const iType& count) { + TeamThreadRange(const Impl::ROCmTeamMember& thread, iType count) { return Impl::TeamThreadRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct::type,Impl::ROCmTeamMember> - TeamThreadRange(const Impl::ROCmTeamMember& thread, const iType1& begin, const iType2& end) { + TeamThreadRange(const Impl::ROCmTeamMember& thread, iType1 begin, iType2 end) { typedef typename std::common_type< iType1, iType2 >::type iType; return Impl::TeamThreadRangeBoundariesStruct(thread,begin,end); } @@ -1509,14 +1524,14 @@ Impl::TeamThreadRangeBoundariesStruct KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::ROCmTeamMember& thread, const iType& count) { + ThreadVectorRange(const Impl::ROCmTeamMember& thread, iType count) { return Impl::ThreadVectorRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::ROCmTeamMember& thread, const iType& arg_begin, const iType& arg_end) { + ThreadVectorRange(const Impl::ROCmTeamMember& thread, iType arg_begin, iType arg_end) { return Impl::ThreadVectorRangeBoundariesStruct(thread,arg_begin,arg_end); } diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 559d6f2fcb..347778f289 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -804,6 +804,10 @@ int Threads::concurrency() { return impl_thread_pool_size(0); #endif } +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE +void Threads::fence() const +{ Impl::ThreadsExec::fence() ; } +#endif #ifdef KOKKOS_ENABLE_DEPRECATED_CODE Threads & Threads::instance(int) diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index 61d7667d58..7af9d9e065 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -649,8 +649,12 @@ inline bool Threads::wake() { return Impl::ThreadsExec::wake() ; } #endif +inline void Threads::impl_static_fence() +{ Impl::ThreadsExec::fence() ; } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE inline void Threads::fence() { Impl::ThreadsExec::fence() ; } +#endif } /* namespace Kokkos */ diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index e88abdba50..9d6c0fa8cf 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -72,9 +72,12 @@ private: enum { TEAM_REDUCE_SIZE = 512 }; +public: typedef Kokkos::Threads execution_space ; - typedef execution_space::scratch_memory_space space ; + typedef execution_space::scratch_memory_space scratch_memory_space ; +private: + typedef execution_space::scratch_memory_space space ; ThreadsExec * const m_exec ; ThreadsExec * const * m_team_base ; ///< Base for team fan-in space m_team_shared ; @@ -228,14 +231,20 @@ public: } #endif + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer ) const noexcept + { team_reduce(reducer,reducer.reference()); } + template< typename ReducerType > KOKKOS_INLINE_FUNCTION typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - team_reduce( const ReducerType & ) const + team_reduce( const ReducerType &, const typename ReducerType::value_type ) const {} #else - team_reduce( const ReducerType & reducer ) const + team_reduce( const ReducerType & reducer, const typename ReducerType::value_type contribution ) const { typedef typename ReducerType::value_type value_type; // Make sure there is enough scratch space: @@ -247,7 +256,7 @@ public: type * const local_value = ((type*) m_exec->scratch_memory()); // Set this thread's contribution - *local_value = reducer.reference() ; + *local_value = contribution ; // Fence to make sure the base team member has access: memory_fence(); @@ -277,58 +286,7 @@ public: } #endif - template< class ValueType, class JoinOp > - KOKKOS_INLINE_FUNCTION ValueType - team_reduce( const ValueType & value - , const JoinOp & op_in ) const - #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return ValueType(); } - #else - { - typedef ValueType value_type; - const JoinLambdaAdapter op(op_in); - #endif -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - // Make sure there is enough scratch space: - typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE - , value_type , void >::type type ; - - if ( 0 == m_exec ) return value ; - - type * const local_value = ((type*) m_exec->scratch_memory()); - - // Set this thread's contribution - *local_value = value ; - - // Fence to make sure the base team member has access: - memory_fence(); - - if ( team_fan_in() ) { - // The last thread to synchronize returns true, all other threads wait for team_fan_out() - type * const team_value = ((type*) m_team_base[0]->scratch_memory()); - - // Join to the team value: - for ( int i = 1 ; i < m_team_size ; ++i ) { - op.join( *team_value , *((type*) m_team_base[i]->scratch_memory()) ); - } - - // Team base thread may "lap" member threads so copy out to their local value. - for ( int i = 1 ; i < m_team_size ; ++i ) { - *((type*) m_team_base[i]->scratch_memory()) = *team_value ; - } - - // Fence to make sure all team members have access - memory_fence(); - } - - team_fan_out(); - - // Value was changed by the team base - return *((type volatile const *) local_value); - } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering + /** \brief Intra-team exclusive prefix sum with team_rank() ordering * with intra-team non-deterministic ordering accumulation. * * The global inter-team accumulation value will, at the end of the @@ -645,6 +603,22 @@ public: return *this; } + template + friend class TeamPolicyInternal; + + template< class ... OtherProperties > + TeamPolicyInternal(const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_alloc = p.m_team_alloc; + m_team_iter = p.m_team_iter; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + //---------------------------------------- #ifdef KOKKOS_ENABLE_DEPRECATED_CODE @@ -734,7 +708,7 @@ public: inline int team_iter() const { return m_team_iter ; } /** \brief Specify league size, request team size */ - TeamPolicyInternal( typename traits::execution_space & + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 ) @@ -747,7 +721,7 @@ public: { init(league_size_request,team_size_request); (void) vector_length_request; } /** \brief Specify league size, request team size */ - TeamPolicyInternal( typename traits::execution_space & + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & /* team_size_request */ , int /* vector_length_request */ = 1 ) @@ -757,7 +731,11 @@ public: , m_team_scratch_size { 0 , 0 } , m_thread_scratch_size { 0 , 0 } , m_chunk_size(0) +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE { init(league_size_request,traits::execution_space::thread_pool_size(2)); } +#else + { init(league_size_request,traits::execution_space::impl_thread_pool_size(2)); } +#endif TeamPolicyInternal( int league_size_request , int team_size_request @@ -924,6 +902,23 @@ TeamThreadRange( const Impl::ThreadsExecTeamMember& thread, const iType1 & begin return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, iType(begin), iType(end) ); } +template< typename iType > +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember > +TeamVectorRange( const Impl::ThreadsExecTeamMember& thread, const iType& count ) +{ + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, count ); +} + +template< typename iType1, typename iType2 > +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::ThreadsExecTeamMember> +TeamVectorRange( const Impl::ThreadsExecTeamMember& thread, const iType1 & begin, const iType2 & end ) +{ + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, iType(begin), iType(end) ); +} template KOKKOS_INLINE_FUNCTION @@ -974,15 +969,18 @@ typename std::enable_if< !Kokkos::is_reducer< ValueType >::value >::type parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, ValueType& result) { - result = ValueType(); + ValueType intermediate; + Sum sum(intermediate); + sum.init(intermediate); for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { ValueType tmp = ValueType(); lambda(i,tmp); - result+=tmp; + intermediate+=tmp; } - result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd()); + loop_boundaries.thread.team_reduce(sum,intermediate); + result = sum.reference(); } template< typename iType, class Lambda, typename ReducerType > @@ -991,36 +989,14 @@ typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { - reducer.init(reducer.reference()); + typename ReducerType::value_type value; + reducer.init(value); for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,reducer.reference()); + lambda(i,value); } - loop_boundaries.thread.team_reduce(reducer); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, - const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - - init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter(join)); + loop_boundaries.thread.team_reduce(reducer,value); } } //namespace Kokkos @@ -1068,25 +1044,6 @@ parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& result ) { - -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,result); - } -} /** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) * for each i=0..N-1. diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp index 42269176ed..022a5fc188 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp @@ -52,7 +52,6 @@ #include -#include #include #include diff --git a/lib/kokkos/core/src/eti/CMakeLists.txt b/lib/kokkos/core/src/eti/CMakeLists.txt index a4db7a7eb6..a7e7717a6e 100644 --- a/lib/kokkos/core/src/eti/CMakeLists.txt +++ b/lib/kokkos/core/src/eti/CMakeLists.txt @@ -4,6 +4,9 @@ endif() if (KOKKOS_ENABLE_OPENMP) add_subdirectory(OpenMP) endif() +if (KOKKOS_ENABLE_HPX) + add_subdirectory(HPX) +endif() if (KOKKOS_ENABLE_ROCM) add_subdirectory(ROCm) endif() diff --git a/lib/kokkos/core/src/eti/HPX/CMakeLists.txt b/lib/kokkos/core/src/eti/HPX/CMakeLists.txt new file mode 100644 index 0000000000..131a2d2e6e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/CMakeLists.txt @@ -0,0 +1,148 @@ +set(D "${CMAKE_CURRENT_SOURCE_DIR}") +set(ETI_SOURCES +${ETI_SOURCES} +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp +PARENT_SCOPE) diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..905c97c54e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..a7632852ce --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..cff22240cf --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..2b667c674f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..cd1a445d81 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..3d805d5134 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..3883d581b6 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..55f3e200a5 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..ed6d57260b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..ed1954e683 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..fb8dadb8d0 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..16a0ed3e9c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..f846f94a96 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..f4b51a1d78 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..622b3119bd --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..de871103dd --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..720e075aea --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..4c57c457c2 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..5a37da22c4 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..93a96ee554 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..dcfcc8a0e3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..7082701282 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..cbbd7c9ef3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..22d6fc5387 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..d44e95e67e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..ae79919c42 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..0c671ad593 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..24dd1c8354 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..6e2de8a02e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..38840ac9e6 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..bcb105628b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..8730f92f20 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..785996558b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..3ae193ca65 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..81f91019d6 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..d34a4870b9 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..0da5ed1770 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..444dad079b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..3f36a1d714 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..51c964b92d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..1a26522ff5 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..9bd9af3fe3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..dd5a325535 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..30a44c0a80 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..0b73280c6e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..3997d8ca58 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..6cbaa59223 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..351001c8d1 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..d37e34af30 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..7609d9478f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..30f0c1d882 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..4c4109e298 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..189245d352 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..921a8e88c7 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..7e492aa25f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..13b1a78d7c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..03fa72c21c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..10a46bcd9d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..4c23c7e796 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..1bc7ab41f7 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..0206838af6 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..78b67a4a2a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..564f530d9b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..b5ae4ae52a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..b2c91a1aa1 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..18e3f2b9b9 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..e3d08c6e38 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..5001fc2781 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..fd45308d15 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..d2fca73151 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..c7fafd4aec --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..046aafa6ad --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..60f78b7a57 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..304a5afc0d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..8aeaf8a1f8 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..26ff7aefed --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..518d000eea --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..36b3b4fab8 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..df5c890a49 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..b120215692 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..9b5e4c2e5f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..74ad489303 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..bc9dbc65c1 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..fbd98c8011 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..d52c5306d0 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..5cc29daaca --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..7e63d80236 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..11447c11b5 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..bafe266044 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..e4ef20c370 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..fb00c3bfd3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..12718353e8 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..c9ab75062d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..71380c21a2 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..9787086a80 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..81072d77cb --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..363b05bace --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..ce1bc89e01 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..4af590818c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..ad399eff76 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..661edef668 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..48cb4a34b1 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..d2f88bb243 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..58ce6f1911 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..bc4efab1e4 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..6225cf9720 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..e50472d850 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..5ad427acc5 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..4ae2437fc8 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..02a2b8e1d9 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..ff693c9b4f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..d96960d4a7 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..05c3ef68eb --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..d96f47ece0 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..208933899e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..aa7d9b8f15 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..e43a1783fd --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..6706074819 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..cd7082dcb3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..8735d58605 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..ec371dcba7 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..354da99794 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..bbc32aba03 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..addbbb291a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..dbebda1594 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..f8a89b4226 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..7f0b9fc346 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..4a31e60a3a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..e876da3a6c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..a7ee2c554d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..4769c235bc --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..3ac618b5dd --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..825bee722f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..44e24e57f3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..0b18c7e5c0 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..951d770305 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..a0e80d764d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..d8cd0155af --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..c4bd8a043a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..566eb71e4d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..4b99a8fd0c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..6cf55bb5b4 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..932a322bac --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..f46a156a93 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Makefile.eti_HPX b/lib/kokkos/core/src/eti/HPX/Makefile.eti_HPX new file mode 100644 index 0000000000..904f32fb82 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Makefile.eti_HPX @@ -0,0 +1,288 @@ +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp diff --git a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp index d27c2e1306..50af5ec82e 100644 --- a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp @@ -56,11 +56,12 @@ template < typename ExecutionSpace = void , typename IndexType = void , typename IterationPattern = void , typename LaunchBounds = void + , typename MyWorkItemProperty = Kokkos::Experimental::WorkItemProperty::None_t > struct PolicyTraitsBase { using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType, - IterationPattern, LaunchBounds>; + IterationPattern, LaunchBounds, MyWorkItemProperty>; using execution_space = ExecutionSpace; using schedule_type = Schedule; @@ -68,8 +69,23 @@ struct PolicyTraitsBase using index_type = IndexType; using iteration_pattern = IterationPattern; using launch_bounds = LaunchBounds; + using work_item_property = MyWorkItemProperty; }; +template +struct SetWorkItemProperty +{ + static_assert( std::is_same::value + , "Kokkos Error: More than one work item property given" ); + using type = PolicyTraitsBase< typename PolicyBase::execution_space + , typename PolicyBase::schedule_type + , typename PolicyBase::work_tag + , typename PolicyBase::index_type + , typename PolicyBase::iteration_pattern + , typename PolicyBase::launch_bounds + , Property + >; +}; template struct SetExecutionSpace @@ -82,6 +98,7 @@ struct SetExecutionSpace , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -96,6 +113,7 @@ struct SetSchedule , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -110,6 +128,7 @@ struct SetWorkTag , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -124,6 +143,7 @@ struct SetIndexType , IndexType , typename PolicyBase::iteration_pattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -139,6 +159,7 @@ struct SetIterationPattern , typename PolicyBase::index_type , IterationPattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -154,6 +175,7 @@ struct SetLaunchBounds , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern , LaunchBounds + , typename PolicyBase::work_item_property >; }; @@ -170,8 +192,9 @@ struct AnalyzePolicy : public , typename std::conditional< std::is_integral::value , SetIndexType > , typename std::conditional< is_iteration_pattern::value, SetIterationPattern , typename std::conditional< is_launch_bounds::value , SetLaunchBounds + , typename std::conditional< Experimental::is_work_item_property::value, SetWorkItemProperty , SetWorkTag - >::type >::type >::type >::type >::type>::type::type + >::type >::type >::type >::type >::type>::type>::type::type , Traits... > {}; @@ -208,13 +231,15 @@ struct AnalyzePolicy , typename Base::launch_bounds >::type; + using work_item_property = typename Base::work_item_property; + using type = PolicyTraitsBase< execution_space , schedule_type , work_tag , index_type , iteration_pattern , launch_bounds - >; + , work_item_property>; }; template diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index 3d99b07568..63067c137a 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -53,6 +53,13 @@ #include #endif +#include +#include + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + namespace Kokkos { //---------------------------------------------------------------------------- @@ -326,7 +333,165 @@ bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, con } //---------------------------------------------------------------------------- -} // namespace Kokkos +namespace Impl { +// memory-ordered versions are in the Impl namespace + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong_fallback( + T* dest, T compare, T val, memory_order_seq_cst_t, MemoryOrderFailure +) +{ + Kokkos::memory_fence(); + auto rv = Kokkos::atomic_compare_exchange_strong( + dest, compare, val + ); + Kokkos::memory_fence(); + return rv; +} + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong_fallback( + T* dest, T compare, T val, memory_order_acquire_t, MemoryOrderFailure +) +{ + auto rv = Kokkos::atomic_compare_exchange_strong( + dest, compare, val + ); + Kokkos::memory_fence(); + return rv; +} + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong_fallback( + T* dest, T compare, T val, memory_order_release_t, MemoryOrderFailure +) +{ + Kokkos::memory_fence(); + return Kokkos::atomic_compare_exchange_strong( + dest, compare, val + ); +} + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong_fallback( + T* dest, T compare, T val, memory_order_relaxed_t, MemoryOrderFailure +) +{ + return Kokkos::atomic_compare_exchange_strong( + dest, compare, val + ); +} + +#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + +#if defined(__CUDA_ARCH__) + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__ +#else + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline +#endif + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +bool _atomic_compare_exchange_strong( + T* dest, T compare, T val, + MemoryOrderSuccess, + MemoryOrderFailure, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_same< + typename MemoryOrderSuccess::memory_order, + typename std::remove_cv::type + >::value + && std::is_same< + typename MemoryOrderFailure::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + return __atomic_compare_exchange_n( + dest, &compare, val, /* weak = */ false, + MemoryOrderSuccess::gnu_constant, + MemoryOrderFailure::gnu_constant + ); +} + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +bool _atomic_compare_exchange_strong( + T* dest, T compare, T val, + MemoryOrderSuccess order_success, + MemoryOrderFailure order_failure, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_same< + typename MemoryOrderSuccess::memory_order, + typename std::remove_cv::type + >::value + && std::is_same< + typename MemoryOrderFailure::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + return _atomic_compare_exchange_fallback( + dest, compare, val, + order_success, order_failure + ); +} + +#else + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong( + T* dest, T compare, T val, + MemoryOrderSuccess order_success, + MemoryOrderFailure order_failure +) { + return _atomic_compare_exchange_strong_fallback( + dest, compare, val, order_success, order_failure + ); +} + +#endif + +// TODO static asserts in overloads that don't make sense (as listed in https://gcc.gnu.org/onlinedocs/gcc-5.2.0/gcc/_005f_005fatomic-Builtins.html) +template +KOKKOS_FORCEINLINE_FUNCTION +bool atomic_compare_exchange_strong( + T* dest, T compare, T val, + MemoryOrderSuccess order_success, + MemoryOrderFailure order_failure +) { + return _atomic_compare_exchange_strong(dest, compare, val, order_success, order_failure); +} + + +} // end namespace Impl + +} // namespace Kokkos + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp new file mode 100644 index 0000000000..3abc8ed4b7 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp @@ -0,0 +1,418 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + +#include +#include +#ifndef KOKKOS_ATOMIC_COMPARE_EXCHANGE_WEAK_HPP +#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_WEAK_HPP + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +namespace Kokkos { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Cuda sm_70 or greater supports C++-like semantics directly + +#if defined( KOKKOS_ENABLE_CUDA ) + +#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) + + +#if __CUDA_ARCH__ >= 700 +// See: https://github.com/ogiroux/freestanding +# define kokkos_cuda_internal_cas_release_32(ptr, old, expected, desired) \ + asm volatile("atom.cas.release.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +# define kokkos_cuda_internal_cas_acquire_32(ptr, old, expected, desired) \ + asm volatile("atom.cas.acquire.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +# define kokkos_cuda_internal_cas_acq_rel_32(ptr, old, expected, desired) \ + asm volatile("atom.cas.acq_rel.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +# define kokkos_cuda_internal_cas_relaxed_32(ptr, old, expected, desired) \ + asm volatile("atom.cas.relaxed.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +# define kokkos_cuda_internal_fence_seq_cst() asm volatile("fence.sc.sys;" : : : "memory") +# define kokkos_cuda_internal_fence_acq_rel() asm volatile("fence.acq_rel.sys;" : : : "memory") +#else +# define kokkos_cuda_internal_fence_acq_rel() asm volatile("membar.sys;" : : : "memory") +# define kokkos_cuda_internal_fence_seq_cst() asm volatile("membar.sys;" : : : "memory") +#endif + + +// 32-bit version +template ::type = 0 +> +__inline__ __device__ +bool +atomic_compare_exchange_weak( + T volatile* const dest, + T* const expected, + T const desired, + std::memory_order success_order = std::memory_order_seq_cst, + std::memory_order failure_order = std::memory_order_seq_cst +) { + // TODO assert that success_order >= failure_order + // See: https://github.com/ogiroux/freestanding + int32_t tmp = 0; + int32_t old = 0; + memcpy(&tmp, &desired, sizeof(T)); + memcpy(&old, expected, sizeof(T)); + int32_t old_tmp = old; +#if __CUDA_ARCH__ >= 700 + switch(success_order) { + case std::memory_order_seq_cst: + // sequentially consistent is just an acquire with a seq_cst fence + kokkos_cuda_internal_fence_seq_cst(); + kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_acquire: + kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_consume: + // same as acquire on PTX compatible platforms + kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_acq_rel: + kokkos_cuda_internal_cas_acq_rel_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_release: + kokkos_cuda_internal_cas_release_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_relaxed: + kokkos_cuda_internal_cas_relaxed_32((T*)dest, old, old_tmp, tmp); + break; + }; +#else + // All of the orders that require a fence before the relaxed atomic operation: + if( + success_order == std::memory_order_release + || success_order == std::memory_order_acq_rel + ) { + kokkos_cuda_internal_fence_acq_rel(); + } + else if(success_order == std::memory_order_seq_cst) { + kokkos_cuda_internal_fence_seq_cst(); + } + // This is relaxed: + // Cuda API requires casting away volatile + atomicCAS((T*)dest, old_tmp, tmp); +#endif + bool const rv = (old == old_tmp); +#if __CUDA_ARCH__ < 700 + if(rv) { + if( + success_order == std::memory_order_acquire + || success_order == std::memory_order_consume + || success_order == std::memory_order_acq_rel + ) { + kokkos_cuda_internal_fence_acq_rel(); + } + else if(success_order == std::memory_order_seq_cst) { + kokkos_cuda_internal_fence_seq_cst(); + } + } + else { + if( + failure_order == std::memory_order_acquire + || failure_order == std::memory_order_consume + || failure_order == std::memory_order_acq_rel + ) { + kokkos_cuda_internal_fence_acq_rel(); + } + else if(failure_order == std::memory_order_seq_cst) { + kokkos_cuda_internal_fence_seq_cst(); + } + } +#endif + memcpy(expected, &old, sizeof(T)); + return rv; +} + +// 64-bit version +template ::type = 0 +> +bool +atomic_compare_exchange_weak( + T volatile* const dest, + T* const expected, + T const desired, + std::memory_order success_order = std::memory_order_seq_cst, + std::memory_order failure_order = std::memory_order_seq_cst +) { + // TODO assert that success_order >= failure_order + // See: https://github.com/ogiroux/freestanding + int64_t tmp = 0; + int64_t old = 0; + memcpy(&tmp, &desired, sizeof(T)); + memcpy(&old, expected, sizeof(T)); + int64_t old_tmp = old; +#if __CUDA_ARCH__ >= 700 + switch(success_order) { + case std::memory_order_seq_cst: + // sequentially consistent is just an acquire with a seq_cst fence + kokkos_cuda_internal_fence_seq_cst(); + kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_acquire: + kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_consume: + // same as acquire on PTX compatible platforms + kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_acq_rel: + kokkos_cuda_internal_cas_acq_rel_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_release: + kokkos_cuda_internal_cas_release_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_relaxed: + kokkos_cuda_internal_cas_relaxed_64((T*)dest, old, old_tmp, tmp); + break; + }; +#else + // Cuda API requires casting away volatile + atomicCAS((T*)dest, old_tmp, tmp); +#endif + bool const rv = (old == old_tmp); + memcpy(expected, &old, sizeof(T)); + return rv; +} + +#endif // defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) + +#endif // defined( KOKKOS_ENABLE_CUDA ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +// GCC native CAS supports int, long, unsigned int, unsigned long. +// Intel native CAS support int and long with the same interface as GCC. +#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) +#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) + +inline +int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_val_compare_and_swap(dest,compare,val); +} + +inline +long atomic_compare_exchange( volatile long * const dest, const long compare, const long val ) +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_val_compare_and_swap(dest,compare,val); +} + +#if defined( KOKKOS_ENABLE_GNU_ATOMICS ) + +// GCC supports unsigned + +inline +unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val ) +{ return __sync_val_compare_and_swap(dest,compare,val); } + +inline +unsigned long atomic_compare_exchange( volatile unsigned long * const dest , + const unsigned long compare , + const unsigned long val ) +{ return __sync_val_compare_and_swap(dest,compare,val); } + +#endif + +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) +{ + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + + tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) ); + return tmp.t ; +} + +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(long) , const T & >::type val ) +{ + union U { + long i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + + tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) ); + return tmp.t ; +} + +#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 ) +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) != sizeof(long) && + sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val ) +{ + union U { + Impl::cas128_t i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + + tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) ); + return tmp.t ; +} +#endif + +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest , const T compare , + typename Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 ) + && ( sizeof(T) != 16 ) + #endif + , const T >::type& val ) +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + + while( !Impl::lock_address_host_space( (void*) dest ) ); + T return_val = *dest; + if( return_val == compare ) { + // Don't use the following line of code here: + // + //const T tmp = *dest = val; + // + // Instead, put each assignment in its own statement. This is + // because the overload of T::operator= for volatile *this should + // return void, not volatile T&. See Kokkos #177: + // + // https://github.com/kokkos/kokkos/issues/177 + *dest = val; + const T tmp = *dest; + #ifndef KOKKOS_COMPILER_CLANG + (void) tmp; + #endif + } + Impl::unlock_address_host_space( (void*) dest ); + return return_val; +} +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS ) + +template< typename T > +KOKKOS_INLINE_FUNCTION +T atomic_compare_exchange( volatile T * const dest, const T compare, const T val ) +{ + T retval; +#pragma omp critical + { + retval = dest[0]; + if ( retval == compare ) + dest[0] = val; + } + return retval; +} + +#elif defined( KOKKOS_ENABLE_SERIAL_ATOMICS ) + +template< typename T > +KOKKOS_INLINE_FUNCTION +T atomic_compare_exchange( volatile T * const dest_v, const T compare, const T val ) +{ + T* dest = const_cast(dest_v); + T retval = *dest; + if (retval == compare) *dest = val; + return retval; +} + +#endif +#endif +#endif // !defined ROCM_ATOMICS + +template +KOKKOS_INLINE_FUNCTION +bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val) +{ + return compare == atomic_compare_exchange(dest, compare, val); +} +//---------------------------------------------------------------------------- + +} // namespace Kokkos + +#endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index d6fab81133..495fd48477 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -90,10 +90,12 @@ __inline__ __device__ T atomic_fetch_add( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { - union U { + // to work around a bug in the clang cuda compiler, the name here needs to be + // different from the one internal to the other overloads + union U1 { int i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + KOKKOS_INLINE_FUNCTION U1() {}; } assume , oldval , newval ; oldval.t = *dest ; @@ -113,10 +115,12 @@ T atomic_fetch_add( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { - union U { + // to work around a bug in the clang cuda compiler, the name here needs to be + // different from the one internal to the other overloads + union U2 { unsigned long long int i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + KOKKOS_INLINE_FUNCTION U2() {}; } assume , oldval , newval ; oldval.t = *dest ; @@ -176,7 +180,7 @@ T atomic_fetch_add( volatile T * const dest , #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) -#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 ) +#if defined( KOKKOS_ENABLE_ASM ) && (defined(KOKKOS_ENABLE_ISA_X86_64) || defined(KOKKOS_KNL_USE_ASM_WORKAROUND)) inline int atomic_fetch_add( volatile int * dest , const int val ) { diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index 48dc8731ef..7a4f95cd99 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -89,7 +89,11 @@ __inline__ __device__ T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { - union { int i ; T t ; } oldval , assume , newval ; + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; @@ -108,7 +112,11 @@ T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { - union { unsigned long long int i ; T t ; } oldval , assume , newval ; + union U { + unsigned long long int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; @@ -211,7 +219,11 @@ inline T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { - union { int i ; T t ; } assume , oldval , newval ; + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; #if defined( KOKKOS_ENABLE_RFO_PREFETCH ) _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); @@ -238,7 +250,11 @@ T atomic_fetch_sub( volatile T * const dest , _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); #endif - union { long i ; T t ; } assume , oldval , newval ; + union U { + long i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp index a3a18166af..c1a7d80364 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -156,13 +156,17 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { - union { unsigned long long int i ; T t ; } oldval , assume , newval ; + union U { + unsigned long long int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; do { assume.i = oldval.i ; - newval.t = Oper::apply(assume.t, val) ; + newval.t = op.apply(assume.t, val) ; oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i ); } while ( assume.i != oldval.i ); @@ -175,7 +179,11 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { - union { unsigned long long int i ; T t ; } oldval , assume , newval ; + union U { + unsigned long long int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; @@ -193,13 +201,17 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { - union { int i ; T t ; } oldval , assume , newval ; + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; do { assume.i = oldval.i ; - newval.t = Oper::apply(assume.t, val) ; + newval.t = op.apply(assume.t, val) ; oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i ); } while ( assume.i != oldval.i ); @@ -211,7 +223,11 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val ) { - union { int i ; T t ; } oldval , assume , newval ; + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp new file mode 100644 index 0000000000..2db74b9f1e --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp @@ -0,0 +1,266 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP +#define KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP + +#include +#if defined(KOKKOS_ATOMIC_HPP) + +#include + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +namespace Kokkos { +namespace Impl { + +// Olivier's implementation helpfully binds to the same builtins as GNU, so +// we make this code common across multiple options +#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + +#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__ +#else + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline +#endif + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +T _atomic_load( + T* ptr, MemoryOrder, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_same< + typename MemoryOrder::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + return __atomic_load_n(ptr, MemoryOrder::gnu_constant); +} + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +T _atomic_load( + T* ptr, MemoryOrder, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_default_constructible::value + && std::is_same< + typename MemoryOrder::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + T rv{}; + __atomic_load(ptr, &rv, MemoryOrder::gnu_constant); + return rv; +} + +#undef KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH + +#elif defined(__CUDA_ARCH__) + +// Not compiling for Volta or later, or Cuda ASM atomics were manually disabled + +template +__device__ __inline__ +T _relaxed_atomic_load_impl( + T* ptr, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ), + void const** + >::type = nullptr +) { + return *ptr; +} + +template +struct NoOpOper { + __device__ __inline__ + static constexpr T apply(T const&, T const&) noexcept { } +}; + +template +__device__ __inline__ +T _relaxed_atomic_load_impl( + T* ptr, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ), + void const** + >::type = nullptr +) { + T rv{}; + // TODO remove a copy operation here? + Kokkos::atomic_oper_fetch(NoOpOper{}, &rv, rv); + return rv; +} + +template +__device__ __inline__ +T _atomic_load(T* ptr, memory_order_seq_cst_t) { + Kokkos::memory_fence(); + T rv = Impl::_relaxed_atomic_load_impl(ptr); + Kokkos::memory_fence(); + return rv; +} + +template +__device__ __inline__ +T _atomic_load(T* ptr, memory_order_acquire_t) { + T rv = Impl::_relaxed_atomic_load_impl(ptr); + Kokkos::memory_fence(); + return rv; +} + +template +__device__ __inline__ +T _atomic_load(T* ptr, memory_order_relaxed_t) { + return _relaxed_atomic_load_impl(ptr); +} + +#elif defined(KOKKOS_ENABLE_OPENMP_ATOMICS) + +template +inline +T _atomic_load(T* ptr, MemoryOrder) +{ + // AFAICT, all OpenMP atomics are sequentially consistent, so memory order doesn't matter + T retval{ }; +#pragma omp atomic read + { + retval = *ptr; + } + return retval; +} + +#elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS) + +template +inline +T _atomic_load(T* ptr, MemoryOrder) +{ + return *ptr; +} + +#endif // end of all atomic implementations + + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_seq_cst_t) { + return _atomic_load(ptr, Impl::memory_order_seq_cst); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_acquire_t) { + return _atomic_load(ptr, Impl::memory_order_acquire); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_relaxed_t) { + return _atomic_load(ptr, Impl::memory_order_relaxed); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_release_t) { + static_assert( + sizeof(T) == 0, // just something that will always be false, but only on instantiation + "atomic_load with memory order release doesn't make any sense!" + ); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_acq_rel_t) { + static_assert( + sizeof(T) == 0, // just something that will always be false, but only on instantiation + "atomic_load with memory order acq_rel doesn't make any sense!" + ); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr) { + // relaxed by default! + return _atomic_load(ptr, Impl::memory_order_relaxed); +} + +} // end namespace Impl +} // end namespace Kokkos + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +#endif // defined(KOKKOS_ATOMIC_HPP) +#endif //KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp new file mode 100644 index 0000000000..7b9c08551c --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp @@ -0,0 +1,122 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP +#define KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP + +#include + +#include + +namespace Kokkos { +namespace Impl { + +/** @file + * Provides strongly-typed analogs of the standard memory order enumerators. + * In addition to (very slightly) reducing the constant propagation burden on + * the compiler, this allows us to give compile-time errors for things that + * don't make sense, like atomic_load with memory order release. + */ + +struct memory_order_seq_cst_t { + using memory_order = memory_order_seq_cst_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_SEQ_CST; +#endif + static constexpr auto std_constant = std::memory_order_seq_cst; +}; +constexpr memory_order_seq_cst_t memory_order_seq_cst = { }; + +struct memory_order_relaxed_t { + using memory_order = memory_order_relaxed_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_RELAXED; +#endif + static constexpr auto std_constant = std::memory_order_relaxed; +}; +constexpr memory_order_relaxed_t memory_order_relaxed = { }; + +struct memory_order_acquire_t { + using memory_order = memory_order_acquire_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_ACQUIRE; +#endif + static constexpr auto std_constant = std::memory_order_acquire; +}; +constexpr memory_order_acquire_t memory_order_acquire = { }; + +struct memory_order_release_t { + using memory_order = memory_order_release_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_RELEASE; +#endif + static constexpr auto std_constant = std::memory_order_release; +}; +constexpr memory_order_release_t memory_order_release = { }; + +struct memory_order_acq_rel_t { + using memory_order = memory_order_acq_rel_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_ACQ_REL; +#endif + static constexpr auto std_constant = std::memory_order_acq_rel; +}; +constexpr memory_order_acq_rel_t memory_order_acq_rel = { }; + + +// Intentionally omit consume (for now) + +} // end namespace Impl +} // end namespace Kokkos + +#endif //KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp new file mode 100644 index 0000000000..066f90480d --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp @@ -0,0 +1,258 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP +#define KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP + +#include +#if defined(KOKKOS_ATOMIC_HPP) + +#include + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +namespace Kokkos { +namespace Impl { + +// Olivier's implementation helpfully binds to the same builtins as GNU, so +// we make this code common across multiple options +#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + +#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__ +#else + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline +#endif + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +void _atomic_store( + T* ptr, T val, MemoryOrder, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_same< + typename MemoryOrder::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + __atomic_store_n(ptr, val, MemoryOrder::gnu_constant); +} + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +void _atomic_store( + T* ptr, T val, MemoryOrder, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_default_constructible::value + && std::is_same< + typename MemoryOrder::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + __atomic_store(ptr, &val, MemoryOrder::gnu_constant); +} + +#undef KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH + +#elif defined(__CUDA_ARCH__) + +// Not compiling for Volta or later, or Cuda ASM atomics were manually disabled + +template +__device__ __inline__ +void _relaxed_atomic_store_impl( + T* ptr, T val, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ), + void const** + >::type = nullptr +) { + *ptr = val; +} + +template +struct StoreOper { + __device__ __inline__ + static constexpr T apply(T const&, T const& val) noexcept { return val; } +}; + +template +__device__ __inline__ +void _relaxed_atomic_store_impl( + T* ptr, T val, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ), + void const** + >::type = nullptr +) { + Kokkos::atomic_oper_fetch(StoreOper{}, &rv, (T&&)val); +} + +template +__device__ __inline__ +void _atomic_store(T* ptr, T val, memory_order_seq_cst_t) { + Kokkos::memory_fence(); + Impl::_relaxed_atomic_store_impl(ptr, val); + Kokkos::memory_fence(); + return rv; +} + +template +__device__ __inline__ +void _atomic_store(T* ptr, T val, memory_order_release_t) { + Kokkos::memory_fence(); + _relaxed_atomic_store_impl(ptr, val); +} + +template +__device__ __inline__ +void _atomic_store(T* ptr, T val, memory_order_relaxed_t) { + _relaxed_atomic_store_impl(ptr, val); +} + +#elif defined(KOKKOS_ENABLE_OPENMP_ATOMICS) + +template +inline +void _atomic_store(T* ptr, T val, MemoryOrder) +{ + // AFAICT, all OpenMP atomics are sequentially consistent, so memory order doesn't matter +#pragma omp atomic write + { + *ptr = val; + } +} + +#elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS) + +template +inline +void _atomic_store(T* ptr, T val, MemoryOrder) +{ + *ptr = val; +} + +#endif // end of all atomic implementations + + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_seq_cst_t) { + _atomic_store(ptr, val, Impl::memory_order_seq_cst); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_release_t) { + _atomic_store(ptr, val, Impl::memory_order_release); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_relaxed_t) { + _atomic_store(ptr, val, Impl::memory_order_relaxed); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_acquire_t) { + static_assert( + sizeof(T) == 0, // just something that will always be false, but only on instantiation + "atomic_store with memory order acquire doesn't make any sense!" + ); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_acq_rel_t) { + static_assert( + sizeof(T) == 0, // just something that will always be false, but only on instantiation + "atomic_store with memory order acq_rel doesn't make any sense!" + ); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val) { + // relaxed by default! + _atomic_store(ptr, Impl::memory_order_relaxed); +} + +} // end namespace Impl +} // end namespace Kokkos + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +#endif // defined(KOKKOS_ATOMIC_HPP) +#endif //KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp b/lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp new file mode 100644 index 0000000000..f86e68cb1d --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp @@ -0,0 +1,314 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP +#define KOKKOS_IMPL_LOCKFREEDEQUE_HPP + +#include +#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA + +#include + +#include +#include +#include // KOKKOS_EXPECTS +#include // KOKKOS_EXPECTS + +#include // atomic_compare_exchange, atomic_fence +#include "Kokkos_LIFO.hpp" + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +struct fixed_size_circular_buffer { +public: + + using node_type = NodeType; + using size_type = SizeType; + +private: + + node_type* m_buffer[CircularBufferSize] = { nullptr }; + +public: + + fixed_size_circular_buffer() = default; + fixed_size_circular_buffer(fixed_size_circular_buffer const&) = delete; + fixed_size_circular_buffer(fixed_size_circular_buffer&&) = default; + fixed_size_circular_buffer& operator=(fixed_size_circular_buffer const&) = delete; + fixed_size_circular_buffer& operator=(fixed_size_circular_buffer&&) = default; + ~fixed_size_circular_buffer() = default; + + KOKKOS_FORCEINLINE_FUNCTION + static constexpr size_type size() noexcept { + return size_type(CircularBufferSize); + } + + KOKKOS_FORCEINLINE_FUNCTION + node_type* operator[](size_type idx) const noexcept { + return m_buffer[idx % size()]; + } + + KOKKOS_FORCEINLINE_FUNCTION + node_type*& operator[](size_type idx) noexcept { + return m_buffer[idx % size()]; + } +}; + +template +struct non_owning_variable_size_circular_buffer { +public: + + using node_type = NodeType; + using size_type = SizeType; + +private: + + ObservingRawPtr m_buffer = nullptr; + size_type m_size = 0; + +public: + + KOKKOS_INLINE_FUNCTION + non_owning_variable_size_circular_buffer( + ObservingRawPtr buffer, + size_type arg_size + ) noexcept + : m_buffer(buffer), + m_size(arg_size) + { } + + non_owning_variable_size_circular_buffer() = default; + non_owning_variable_size_circular_buffer(non_owning_variable_size_circular_buffer const&) = delete; + non_owning_variable_size_circular_buffer(non_owning_variable_size_circular_buffer&&) = default; + non_owning_variable_size_circular_buffer& operator=(non_owning_variable_size_circular_buffer const&) = delete; + non_owning_variable_size_circular_buffer& operator=(non_owning_variable_size_circular_buffer&&) = default; + ~non_owning_variable_size_circular_buffer() = default; + + KOKKOS_FORCEINLINE_FUNCTION + constexpr size_type size() const noexcept { + return m_size; + } + + KOKKOS_FORCEINLINE_FUNCTION + node_type* operator[](size_type idx) const noexcept { + return m_buffer[idx % size()]; + } + + KOKKOS_FORCEINLINE_FUNCTION + node_type*& operator[](size_type idx) noexcept { + return m_buffer[idx % size()]; + } +}; + +/** Based on "Correct and Efficient Work-Stealing for Weak Memory Models," + * PPoPP '13, https://www.di.ens.fr/~zappa/readings/ppopp13.pdf + * + */ +template < + class T, + class CircularBufferT, + class SizeType = int32_t +> +struct ChaseLevDeque { +public: + + using size_type = SizeType; + using value_type = T; + // Still using intrusive linked list for waiting queue + using node_type = SimpleSinglyLinkedListNode<>; + +private: + + // TODO @tasking @new_feature DSH variable size circular buffer? + + CircularBufferT m_array; + size_type m_top = 0; + size_type m_bottom = 0; + + +public: + + template < + class _ignore=void, + class=typename std::enable_if< + std::is_default_constructible::value + >::type + > + ChaseLevDeque() : m_array() { } + + explicit + ChaseLevDeque(CircularBufferT buffer) + : m_array(std::move(buffer)) + { } + + KOKKOS_INLINE_FUNCTION + bool empty() const { + // TODO @tasking @memory_order DSH memory order + return m_top > m_bottom - 1; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef + pop() { + auto b = m_bottom - 1; // atomic load relaxed + auto& a = m_array; // atomic load relaxed + m_bottom = b; // atomic store relaxed + Kokkos::memory_fence(); // memory order seq_cst + auto t = m_top; // atomic load relaxed + OptionalRef return_value; + if(t <= b) { + /* non-empty queue */ + return_value = *static_cast(a[b]); // relaxed load + if(t == b) { + /* single last element in the queue. */ + if(not Impl::atomic_compare_exchange_strong(&m_top, t, t+1, memory_order_seq_cst, memory_order_relaxed)) { + /* failed race, someone else stole it */ + return_value = nullptr; + } + m_bottom = b + 1; // memory order relaxed + } + } else { + /* empty queue */ + m_bottom = b + 1; // memory order relaxed + } + return return_value; + } + + KOKKOS_INLINE_FUNCTION + bool push(node_type&& node) + { + // Just forward to the lvalue version + return push(node); + } + + KOKKOS_INLINE_FUNCTION + bool push(node_type& node) + { + auto b = m_bottom; // memory order relaxed + auto t = Impl::atomic_load(&m_top, memory_order_acquire); + auto& a = m_array; + if(b - t > a.size() - 1) { + /* queue is full, resize */ + //m_array = a->grow(); + //a = m_array; + return false; + } + a[b] = &node; // relaxed + Impl::atomic_store(&m_bottom, b + 1, memory_order_release); + return true; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef + steal() { + auto t = m_top; // TODO @tasking @memory_order DSH: atomic load acquire + Kokkos::memory_fence(); // seq_cst fence, so why does the above need to be acquire? + auto b = Impl::atomic_load(&m_bottom, memory_order_acquire); + OptionalRef return_value; + if(t < b) { + /* Non-empty queue */ + auto& a = m_array; // TODO @tasking @memory_order DSH: technically consume ordered, but acquire should be fine + Kokkos::load_fence(); // TODO @tasking @memory_order DSH memory order instead of fence + return_value = *static_cast(a[t]); // relaxed + if(not Impl::atomic_compare_exchange_strong(&m_top, t, t+1, memory_order_seq_cst, memory_order_relaxed)) { + return_value = nullptr; + } + } + return return_value; + } + +}; + +/* + // The atomicity of this load was more important in the paper's version + // because that version had a circular buffer that could grow. We're + // essentially using the memory order in this version as a fence, which + // may be unnecessary + auto buffer_ptr = (node_type***)&m_array.buffer; + auto a = Impl::atomic_load(buffer_ptr, memory_order_acquire); // technically consume ordered, but acquire should be fine + return_value = *static_cast(a[t % m_array->size]); // relaxed; we'd have to replace the m_array->size if we ever allow growth +*/ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +struct TaskQueueTraitsChaseLev { + + template + using ready_queue_type = ChaseLevDeque< + Task, + fixed_size_circular_buffer, CircularBufferSize, int32_t>, + int32_t + >; + + template + using waiting_queue_type = SingleConsumeOperationLIFO; + + template + using intrusive_task_base_type = + typename ready_queue_type::node_type; + + static constexpr auto ready_queue_insertion_may_fail = true; + +}; + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined KOKKOS_ENABLE_TASKDAG */ +#endif /* #ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index 82fdee4399..0d472e98bb 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -85,7 +85,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0); } // Protect declarations, to prevent "unused variable" warnings. -#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET ) +#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) ||\ + defined( KOKKOS_ENABLE_OPENMPTARGET ) || defined ( KOKKOS_ENABLE_HPX ) const int num_threads = args.num_threads; #endif #if defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET ) @@ -160,6 +161,21 @@ setenv("MEMKIND_HBW_NODES", "1", 0); } #endif +#if defined( KOKKOS_ENABLE_HPX ) + if( std::is_same< Kokkos::Experimental::HPX , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Experimental::HPX , Kokkos::HostSpace::execution_space >::value ) { + if(num_threads>0) { + Kokkos::Experimental::HPX::impl_initialize(num_threads); + } else { + Kokkos::Experimental::HPX::impl_initialize(); + } + //std::cout << "Kokkos::initialize() fyi: HPX enabled and initialized" << std::endl ; + } + else { + //std::cout << "Kokkos::initialize() fyi: HPX enabled but not initialized" << std::endl ; + } +#endif + #if defined( KOKKOS_ENABLE_SERIAL ) // Prevent "unused variable" warning for 'args' input struct. If // Serial::initialize() ever needs to take arguments from the input @@ -268,6 +284,8 @@ void finalize_internal( const bool all_spaces = false ) Kokkos::Cuda::impl_finalize(); #endif } +#else + (void)all_spaces; #endif #if defined( KOKKOS_ENABLE_ROCM ) @@ -298,6 +316,15 @@ void finalize_internal( const bool all_spaces = false ) } #endif +#if defined( KOKKOS_ENABLE_HPX ) + if( std::is_same< Kokkos::Experimental::HPX , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Experimental::HPX , Kokkos::HostSpace::execution_space >::value || + all_spaces ) { + if(Kokkos::Experimental::HPX::impl_is_initialized()) + Kokkos::Experimental::HPX::impl_finalize(); + } +#endif + #if defined( KOKKOS_ENABLE_THREADS ) if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value || @@ -331,34 +358,38 @@ void fence_internal() #if defined( KOKKOS_ENABLE_CUDA ) if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) { - Kokkos::Cuda::fence(); + Kokkos::Cuda::impl_static_fence(); } #endif #if defined( KOKKOS_ENABLE_ROCM ) if( std::is_same< Kokkos::Experimental::ROCm , Kokkos::DefaultExecutionSpace >::value ) { - Kokkos::Experimental::ROCm::fence(); + Kokkos::Experimental::ROCm().fence(); } #endif #if defined( KOKKOS_ENABLE_OPENMP ) if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { - Kokkos::OpenMP::fence(); + Kokkos::OpenMP::impl_static_fence(); } #endif +#if defined( KOKKOS_ENABLE_HPX ) + Kokkos::Experimental::HPX::impl_static_fence(); +#endif + #if defined( KOKKOS_ENABLE_THREADS ) if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { - Kokkos::Threads::fence(); + Kokkos::Threads::impl_static_fence(); } #endif #if defined( KOKKOS_ENABLE_SERIAL ) if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { - Kokkos::Serial::fence(); + Kokkos::Serial::impl_static_fence(); } #endif @@ -708,6 +739,12 @@ void print_configuration( std::ostream & out , const bool detail ) msg << "yes" << std::endl; #else msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_HPX: "; +#ifdef KOKKOS_ENABLE_HPX + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; #endif msg << " KOKKOS_ENABLE_THREADS: "; #ifdef KOKKOS_ENABLE_THREADS @@ -957,6 +994,9 @@ void print_configuration( std::ostream & out , const bool detail ) #ifdef KOKKOS_ENABLE_OPENMP OpenMP::print_configuration(msg, detail); #endif +#ifdef KOKKOS_ENABLE_HPX + Experimental::HPX::print_configuration(msg, detail); +#endif #if defined( KOKKOS_ENABLE_THREADS ) Threads::print_configuration(msg, detail); #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_EBO.hpp b/lib/kokkos/core/src/impl/Kokkos_EBO.hpp new file mode 100644 index 0000000000..69bb74e2c5 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_EBO.hpp @@ -0,0 +1,343 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EBO_HPP +#define KOKKOS_EBO_HPP + +//---------------------------------------------------------------------------- + +#include + +#include +//---------------------------------------------------------------------------- + + +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +struct NotOnDeviceCtorDisambiguator { }; + +template +struct NoCtorsNotOnDevice : std::false_type { }; + +template +struct DefaultCtorNotOnDevice : std::false_type { }; + +template <> +struct DefaultCtorNotOnDevice<> : std::true_type { }; + +template class CtorNotOnDevice = NoCtorsNotOnDevice> +struct EBOBaseImpl; + +template class CtorNotOnDevice> +struct EBOBaseImpl { + + /* + * Workaround for constexpr in C++11: we need to still call T(args...), but we + * can't do so in the body of a constexpr function (in C++11), and there's no + * data member to construct into. But we can construct into an argument + * of a delegating constructor... + */ + // TODO @minor DSH the destructor gets called too early with this workaround + struct _constexpr_14_workaround_tag { }; + struct _constexpr_14_workaround_no_device_tag { }; + KOKKOS_FORCEINLINE_FUNCTION + constexpr EBOBaseImpl(_constexpr_14_workaround_tag, T&&) noexcept { } + inline constexpr EBOBaseImpl(_constexpr_14_workaround_no_device_tag, T&&) noexcept { } + + template < + class... Args, + class _ignored = void, + typename std::enable_if< + std::is_void<_ignored>::value + && std::is_constructible::value + && !CtorNotOnDevice::value, + int + >::type = 0 + > + KOKKOS_FORCEINLINE_FUNCTION + constexpr explicit + EBOBaseImpl( + Args&&... args + ) noexcept(noexcept(T(std::forward(args)...))) + // still call the constructor + : EBOBaseImpl(_constexpr_14_workaround_tag{}, T(std::forward(args)...)) + { } + + template < + class... Args, + class _ignored=void, + typename std::enable_if< + std::is_void<_ignored>::value + && std::is_constructible::value + && CtorNotOnDevice::value, + long + >::type = 0 + > + inline constexpr explicit + EBOBaseImpl( + Args&&... args + ) noexcept(noexcept(T(std::forward(args)...))) + // still call the constructor + : EBOBaseImpl(_constexpr_14_workaround_no_device_tag{}, T(std::forward(args)...)) + { } + + KOKKOS_FORCEINLINE_FUNCTION + constexpr EBOBaseImpl(EBOBaseImpl const&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + constexpr EBOBaseImpl(EBOBaseImpl&&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + EBOBaseImpl& operator=(EBOBaseImpl const&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + EBOBaseImpl& operator=(EBOBaseImpl&&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + ~EBOBaseImpl() = default; + + KOKKOS_INLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + T& _ebo_data_member() & { + return *reinterpret_cast(this); + } + + KOKKOS_INLINE_FUNCTION + constexpr + T const& _ebo_data_member() const & { + return *reinterpret_cast(this); + } + + KOKKOS_INLINE_FUNCTION + T volatile& _ebo_data_member() volatile & { + return *reinterpret_cast(this); + } + + KOKKOS_INLINE_FUNCTION + T const volatile& _ebo_data_member() const volatile & { + return *reinterpret_cast(this); + } + + KOKKOS_INLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + T&& _ebo_data_member() && { + return std::move(*reinterpret_cast(this)); + } + +}; + +template class CTorsNotOnDevice> +struct EBOBaseImpl { + + T m_ebo_object; + + template < + class... Args, + class _ignored=void, + typename std::enable_if< + std::is_void<_ignored>::value + && !CTorsNotOnDevice::value + && std::is_constructible::value, + int + >::type = 0 + > + KOKKOS_FORCEINLINE_FUNCTION + constexpr explicit + EBOBaseImpl( + Args&&... args + ) noexcept(noexcept(T(std::forward(args)...))) + : m_ebo_object(std::forward(args)...) + { } + + template < + class... Args, + class _ignored=void, + typename std::enable_if< + std::is_void<_ignored>::value + && CTorsNotOnDevice::value + && std::is_constructible::value, + long + >::type = 0 + > + inline + constexpr explicit + EBOBaseImpl( + Args&&... args + ) noexcept(noexcept(T(std::forward(args)...))) + : m_ebo_object(std::forward(args)...) + { } + + + // TODO @tasking @minor DSH noexcept in the right places? + + KOKKOS_FORCEINLINE_FUNCTION + constexpr + EBOBaseImpl(EBOBaseImpl const&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + constexpr + EBOBaseImpl(EBOBaseImpl&&) noexcept = default; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + EBOBaseImpl& operator=(EBOBaseImpl const&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + EBOBaseImpl& operator=(EBOBaseImpl&&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + ~EBOBaseImpl() = default; + + KOKKOS_INLINE_FUNCTION + T& _ebo_data_member() & { + return m_ebo_object; + } + + KOKKOS_INLINE_FUNCTION + T const& _ebo_data_member() const & { + return m_ebo_object; + } + + KOKKOS_INLINE_FUNCTION + T volatile& _ebo_data_member() volatile & { + return m_ebo_object; + } + + KOKKOS_INLINE_FUNCTION + T const volatile& _ebo_data_member() const volatile & { + return m_ebo_object; + } + + KOKKOS_INLINE_FUNCTION + T&& _ebo_data_member() && { + return m_ebo_object; + } + +}; + +/** + * + * @tparam T + */ +template class CtorsNotOnDevice=NoCtorsNotOnDevice> +struct StandardLayoutNoUniqueAddressMemberEmulation + : EBOBaseImpl::value, CtorsNotOnDevice> +{ +private: + + using ebo_base_t = EBOBaseImpl::value, CtorsNotOnDevice>; + +public: + + using ebo_base_t::ebo_base_t; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + T& no_unique_address_data_member() & { + return this->ebo_base_t::_ebo_data_member(); + } + + KOKKOS_FORCEINLINE_FUNCTION + constexpr + T const& no_unique_address_data_member() const & { + return this->ebo_base_t::_ebo_data_member(); + } + + KOKKOS_FORCEINLINE_FUNCTION + T volatile& no_unique_address_data_member() volatile & { + return this->ebo_base_t::_ebo_data_member(); + } + + KOKKOS_FORCEINLINE_FUNCTION + T const volatile& no_unique_address_data_member() const volatile & { + return this->ebo_base_t::_ebo_data_member(); + } + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + T&& no_unique_address_data_member() && { + return this->ebo_base_t::_ebo_data_member(); + } +}; + +/** + * + * @tparam T + */ +template class CtorsNotOnDevice=NoCtorsNotOnDevice> +class NoUniqueAddressMemberEmulation + : private StandardLayoutNoUniqueAddressMemberEmulation +{ +private: + + using base_t = StandardLayoutNoUniqueAddressMemberEmulation; + +public: + + using base_t::base_t; + using base_t::no_unique_address_data_member; + +}; + + +} // end namespace Impl +} // end namespace Kokkos + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + +#endif /* #ifndef KOKKOS_EBO_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp index e7d5f9344c..3d634fe5d1 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp @@ -51,6 +51,10 @@ #include #endif +#ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE +# define KOKKOS_ABORT_MESSAGE_BUFFER_SIZE 2048 +#endif // ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE + namespace Kokkos { namespace Impl { @@ -83,6 +87,50 @@ void abort( const char * const message ) { } + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + +#if !defined(NDEBUG) || defined(KOKKOS_ENFORCE_CONTRACTS) || defined(KOKKOS_DEBUG) +# define KOKKOS_EXPECTS(...) \ + { \ + if(!bool(__VA_ARGS__)) { \ + ::Kokkos::abort( \ + "Kokkos contract violation:\n " \ + " Expected precondition `" #__VA_ARGS__ "` evaluated false." \ + ); \ + } \ + } +# define KOKKOS_ENSURES(...) \ + { \ + if(!bool(__VA_ARGS__)) { \ + ::Kokkos::abort( \ + "Kokkos contract violation:\n " \ + " Ensured postcondition `" #__VA_ARGS__ "` evaluated false." \ + ); \ + } \ + } +// some projects already define this for themselves, so don't mess them up +# ifndef KOKKOS_ASSERT +# define KOKKOS_ASSERT(...) \ + { \ + if(!bool(__VA_ARGS__)) { \ + ::Kokkos::abort( \ + "Kokkos contract violation:\n " \ + " Asserted condition `" #__VA_ARGS__ "` evaluated false." \ + ); \ + } \ + } +# endif // ifndef KOKKOS_ASSERT +#else // not debug mode +# define KOKKOS_EXPECTS(...) +# define KOKKOS_ENSURES(...) +# ifndef KOKKOS_ASSERT +# define KOKKOS_ASSERT(...) +# endif // ifndef KOKKOS_ASSERT +#endif // end debug mode ifdefs + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp b/lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp new file mode 100644 index 0000000000..3053d8d9d0 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp @@ -0,0 +1,307 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP +#define KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP + +#include +#include + +#include +#include + +namespace Kokkos { +namespace Impl { + +template < + class DeviceType, + size_t Size, + size_t Align=1, + class SizeType = typename DeviceType::execution_space::size_type +> +class FixedBlockSizeMemoryPool + : private MemorySpaceInstanceStorage +{ +public: + + using memory_space = typename DeviceType::memory_space; + using size_type = SizeType; + +private: + + using memory_space_storage_base = MemorySpaceInstanceStorage; + using tracker_type = Kokkos::Impl::SharedAllocationTracker; + using record_type = Kokkos::Impl::SharedAllocationRecord; + + struct alignas(Align) Block { union { char ignore; char data[Size]; }; }; + + static constexpr auto actual_size = sizeof(Block); + + // TODO shared allocation tracker + // TODO @optimization put the index values on different cache lines (CPU) or pages (GPU)? + + tracker_type m_tracker = { }; + size_type m_num_blocks = 0; + size_type m_first_free_idx = 0; + size_type m_last_free_idx = 0; + Kokkos::OwningRawPtr m_first_block = nullptr; + Kokkos::OwningRawPtr m_free_indices = nullptr; + + enum : size_type { IndexInUse = ~size_type(0) }; + +public: + + FixedBlockSizeMemoryPool( + memory_space const& mem_space, + size_type num_blocks + ) : memory_space_storage_base(mem_space), + m_tracker(), + m_num_blocks(num_blocks), + m_first_free_idx(0), + m_last_free_idx(num_blocks) + { + // TODO alignment? + auto block_record = record_type::allocate( + mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(Block) + ); + KOKKOS_ASSERT(intptr_t(block_record->data()) % Align == 0); + m_tracker.assign_allocated_record_to_uninitialized(block_record); + m_first_block = (Block*)block_record->data(); + + auto idx_record = record_type::allocate( + mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type) + ); + KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0); + m_tracker.assign_allocated_record_to_uninitialized(idx_record); + m_free_indices = (size_type*)idx_record->data(); + + for(size_type i = 0; i < num_blocks; ++i) { + m_free_indices[i] = i; + } + + Kokkos::memory_fence(); + } + + // For compatibility with MemoryPool<> + FixedBlockSizeMemoryPool( + memory_space const& mem_space, + size_t mempool_capacity, + unsigned, unsigned, unsigned + ) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size) + { /* forwarding ctor, must be empty */ } + + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default; + + + KOKKOS_INLINE_FUNCTION + void* allocate(size_type alloc_size) const noexcept + { + KOKKOS_EXPECTS(alloc_size <= Size); + auto free_idx_counter = Kokkos::atomic_fetch_add((volatile size_type*)&m_first_free_idx, size_type(1)); + auto free_idx_idx = free_idx_counter % m_num_blocks; + + // We don't have exclusive access to m_free_indices[free_idx_idx] because + // the allocate counter might have lapped us since we incremented it + auto current_free_idx = m_free_indices[free_idx_idx]; + size_type free_idx = IndexInUse; + free_idx = + Kokkos::atomic_compare_exchange(&m_free_indices[free_idx_idx], current_free_idx, free_idx); + Kokkos::memory_fence(); + + // TODO figure out how to decrement here? + + if(free_idx == IndexInUse) { + return nullptr; + } + else { + return (void*)&m_first_block[free_idx]; + } + } + + KOKKOS_INLINE_FUNCTION + void deallocate(void* ptr, size_type alloc_size) const noexcept + { + // figure out which block we are + auto offset = intptr_t(ptr) - intptr_t(m_first_block); + + KOKKOS_EXPECTS(offset % actual_size == 0 && offset/actual_size < m_num_blocks); + + Kokkos::memory_fence(); + auto last_idx_idx = Kokkos::atomic_fetch_add((volatile size_type*)&m_last_free_idx, size_type(1)); + last_idx_idx %= m_num_blocks; + m_free_indices[last_idx_idx] = offset / actual_size; + } + +}; + +#if 0 +template < + class DeviceType, + size_t Size, + size_t Align=1, + class SizeType = typename DeviceType::execution_space::size_type +> +class FixedBlockSizeChaseLevMemoryPool + : private MemorySpaceInstanceStorage +{ +public: + + using memory_space = typename DeviceType::memory_space; + using size_type = SizeType; + +private: + + using memory_space_storage_base = MemorySpaceInstanceStorage; + using tracker_type = Kokkos::Impl::SharedAllocationTracker; + using record_type = Kokkos::Impl::SharedAllocationRecord; + + struct alignas(Align) Block { union { char ignore; char data[Size]; }; }; + + static constexpr auto actual_size = sizeof(Block); + + tracker_type m_tracker = { }; + size_type m_num_blocks = 0; + size_type m_first_free_idx = 0; + size_type m_last_free_idx = 0; + + + enum : size_type { IndexInUse = ~size_type(0) }; + +public: + + FixedBlockSizeMemoryPool( + memory_space const& mem_space, + size_type num_blocks + ) : memory_space_storage_base(mem_space), + m_tracker(), + m_num_blocks(num_blocks), + m_first_free_idx(0), + m_last_free_idx(num_blocks) + { + // TODO alignment? + auto block_record = record_type::allocate( + mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(Block) + ); + KOKKOS_ASSERT(intptr_t(block_record->data()) % Align == 0); + m_tracker.assign_allocated_record_to_uninitialized(block_record); + m_first_block = (Block*)block_record->data(); + + auto idx_record = record_type::allocate( + mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type) + ); + KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0); + m_tracker.assign_allocated_record_to_uninitialized(idx_record); + m_free_indices = (size_type*)idx_record->data(); + + for(size_type i = 0; i < num_blocks; ++i) { + m_free_indices[i] = i; + } + + Kokkos::memory_fence(); + } + + // For compatibility with MemoryPool<> + FixedBlockSizeMemoryPool( + memory_space const& mem_space, + size_t mempool_capacity, + unsigned, unsigned, unsigned + ) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size) + { /* forwarding ctor, must be empty */ } + + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default; + + + KOKKOS_INLINE_FUNCTION + void* allocate(size_type alloc_size) const noexcept + { + KOKKOS_EXPECTS(alloc_size <= Size); + auto free_idx_counter = Kokkos::atomic_fetch_add((volatile size_type*)&m_first_free_idx, size_type(1)); + auto free_idx_idx = free_idx_counter % m_num_blocks; + + // We don't have exclusive access to m_free_indices[free_idx_idx] because + // the allocate counter might have lapped us since we incremented it + auto current_free_idx = m_free_indices[free_idx_idx]; + size_type free_idx = IndexInUse; + free_idx = + Kokkos::atomic_compare_exchange(&m_free_indices[free_idx_idx], current_free_idx, free_idx); + Kokkos::memory_fence(); + + // TODO figure out how to decrement here? + + if(free_idx == IndexInUse) { + return nullptr; + } + else { + return (void*)&m_first_block[free_idx]; + } + } + + KOKKOS_INLINE_FUNCTION + void deallocate(void* ptr, size_type alloc_size) const noexcept + { + // figure out which block we are + auto offset = intptr_t(ptr) - intptr_t(m_first_block); + + KOKKOS_EXPECTS(offset % actual_size == 0 && offset/actual_size < m_num_blocks); + + Kokkos::memory_fence(); + auto last_idx_idx = Kokkos::atomic_fetch_add((volatile size_type*)&m_last_free_idx, size_type(1)); + last_idx_idx %= m_num_blocks; + m_free_indices[last_idx_idx] = offset / actual_size; + } + +}; +#endif + +} // end namespace Impl +} // end namespace Kokkos + +#endif //KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp index 7d4ffb85c1..ea3480b48b 100644 --- a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp @@ -1432,7 +1432,10 @@ namespace Impl { template struct JoinLambdaAdapter::enable_if( & JoinOp::join ) )> { typedef ValueType value_type; - typedef StaticAssertSame assert_value_types_match; + static_assert( + std::is_same::value, + "JoinLambdaAdapter static_assert Fail: ValueType != JoinOp::value_type"); + const JoinOp& lambda; KOKKOS_INLINE_FUNCTION JoinLambdaAdapter(const JoinOp& lambda_):lambda(lambda_) {} diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp index d8cb7593bf..848746d265 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -420,15 +420,19 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr } // Iterate records to print orphaned memory ... +#ifdef KOKKOS_DEBUG void SharedAllocationRecord< Kokkos::HostSpace , void >:: print_records( std::ostream & s , const Kokkos::HostSpace & , bool detail ) { -#ifdef KOKKOS_DEBUG SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail ); -#else - throw_runtime_exception("SharedAllocationRecord::print_records only works with KOKKOS_DEBUG enabled"); -#endif } +#else +void SharedAllocationRecord< Kokkos::HostSpace , void >:: +print_records( std::ostream & , const Kokkos::HostSpace & , bool ) +{ + throw_runtime_exception("SharedAllocationRecord::print_records only works with KOKKOS_DEBUG enabled"); +} +#endif } // namespace Impl } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp new file mode 100644 index 0000000000..21b95f6985 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp @@ -0,0 +1,134 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include "Kokkos_Core.hpp" +#include "Kokkos_HostSpace_deepcopy.hpp" + +namespace Kokkos { + +namespace Impl { + +#ifndef KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT +#define KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT 10*8192 +#endif + +void hostspace_parallel_deepcopy(void * dst, const void * src, ptrdiff_t n) { + if((n policy_t; + + // Both src and dst are aligned the same way with respect to 8 byte words + if(reinterpret_cast(src)%8 == reinterpret_cast(dst)%8) { + char* dst_c = reinterpret_cast(dst); + const char* src_c = reinterpret_cast(src); + int count = 0; + // get initial bytes copied + while(reinterpret_cast(dst_c)%8!=0) { + *dst_c=*src_c; + dst_c++; src_c++; count++; + } + + // copy the bulk of the data + double* dst_p = reinterpret_cast(dst_c); + const double* src_p = reinterpret_cast(src_c); + Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_double",policy_t(0,(n-count)/8),[=](const ptrdiff_t i) { + dst_p[i] = src_p[i]; + }); + + // get final data copied + dst_c += ((n-count)/8) * 8; + src_c += ((n-count)/8) * 8; + char* dst_end = reinterpret_cast(dst)+n; + while(dst_c != dst_end) { + *dst_c = *src_c; + dst_c++; src_c++; + } + return; + } + + // Both src and dst are aligned the same way with respect to 4 byte words + if(reinterpret_cast(src)%4 == reinterpret_cast(dst)%4) { + char* dst_c = reinterpret_cast(dst); + const char* src_c = reinterpret_cast(src); + int count = 0; + // get initial bytes copied + while(reinterpret_cast(dst_c)%4!=0) { + *dst_c=*src_c; + dst_c++; src_c++; count++; + } + + // copy the bulk of the data + int32_t* dst_p = reinterpret_cast(dst_c); + const int32_t* src_p = reinterpret_cast(src_c); + Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_int",policy_t(0,(n-count)/4),[=](const ptrdiff_t i) { + dst_p[i] = src_p[i]; + }); + + // get final data copied + dst_c += ((n-count)/4) * 4; + src_c += ((n-count)/4) * 4; + char* dst_end = reinterpret_cast(dst)+n; + while(dst_c != dst_end) { + *dst_c = *src_c; + dst_c++; src_c++; + } + return; + } + + // Src and dst are not aligned the same way, we can only to byte wise copy. + { + char* dst_p = reinterpret_cast(dst); + const char* src_p = reinterpret_cast(src); + Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_char",policy_t(0,n),[=](const ptrdiff_t i) { + dst_p[i] = src_p[i]; + }); + } +} + +} // namespace Impl + +} // namespace Kokkos + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp new file mode 100644 index 0000000000..b8aea95363 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include + +namespace Kokkos { + +namespace Impl { + +void hostspace_parallel_deepcopy(void * dst, const void * src, ptrdiff_t n); + +} // namespace Impl + +} // namespace Kokkos + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index fff48e87f6..f44a13c574 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -52,6 +52,8 @@ #include #include +#include // std::numeric_limits + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -477,6 +479,9 @@ class HostThreadTeamMember { public: using scratch_memory_space = typename HostExecSpace::scratch_memory_space ; + using execution_space = HostExecSpace; + using thread_team_member = HostThreadTeamMember; + using host_thread_team_member = HostThreadTeamMember; private: @@ -490,8 +495,8 @@ public: constexpr HostThreadTeamMember( HostThreadTeamData & arg_data ) noexcept : m_scratch( arg_data.team_shared() , arg_data.team_shared_bytes() ) , m_data( arg_data ) - , m_league_rank(0) - , m_league_size(1) + , m_league_rank(arg_data.m_league_rank) + , m_league_size(arg_data.m_league_size) {} constexpr HostThreadTeamMember( HostThreadTeamData & arg_data @@ -630,6 +635,12 @@ public: KOKKOS_INLINE_FUNCTION typename std::enable_if< is_reducer< ReducerType >::value >::type team_reduce( ReducerType const & reducer ) const noexcept + { team_reduce(reducer,reducer.reference()); } + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer, typename ReducerType::value_type contribution ) const noexcept #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) { if ( 1 < m_data.m_team_size ) { @@ -640,7 +651,7 @@ public: // Non-root copies to their local buffer: /*reducer.copy( (value_type*) m_data.team_reduce_local() , reducer.data() );*/ - *((value_type*) m_data.team_reduce_local()) = reducer.reference(); + *((value_type*) m_data.team_reduce_local()) = contribution; } // Root does not overwrite shared memory until all threads arrive @@ -656,12 +667,13 @@ public: value_type * const src = (value_type*) m_data.team_member(i)->team_reduce_local(); - reducer.join( reducer.reference(), *src); + reducer.join( contribution, *src); } // Copy result to root member's buffer: // reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() ); - *((value_type*) m_data.team_reduce()) = reducer.reference(); + *((value_type*) m_data.team_reduce()) = contribution; + reducer.reference() = contribution; m_data.team_rendezvous_release(); // This thread released all other threads from 'team_rendezvous' // with a return value of 'false' @@ -670,6 +682,8 @@ public: // Copy from root member's buffer: reducer.reference() = *((value_type*) m_data.team_reduce()); } + } else { + reducer.reference() = contribution; } } #else @@ -795,50 +809,105 @@ public: namespace Kokkos { -template +template KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct > -TeamThreadRange( Impl::HostThreadTeamMember const & member - , iType const & count ) +Impl::TeamThreadRangeBoundariesStruct +TeamThreadRange( + Member const & member, + iType count, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) { return Impl::TeamThreadRangeBoundariesStruct - >(member,0,count); + (member,0,count); } -template +template KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct - < typename std::common_type< iType1, iType2 >::type - , Impl::HostThreadTeamMember > -TeamThreadRange( Impl::HostThreadTeamMember const & member - , iType1 const & begin , iType2 const & end ) +Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type< iType1, iType2 >::type, Member +> +TeamThreadRange( + Member const & member, + iType1 begin, + iType2 end, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) { return Impl::TeamThreadRangeBoundariesStruct < typename std::common_type< iType1, iType2 >::type - , Impl::HostThreadTeamMember >( member , begin , end ); + , Member >( member , begin , end ); } -template +template KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange - ( Impl::HostThreadTeamMember const & member - , const iType & count ) +Impl::TeamThreadRangeBoundariesStruct +TeamVectorRange( + Member const & member, + iType count, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) { - return Impl::ThreadVectorRangeBoundariesStruct >(member,count); + return + Impl::TeamThreadRangeBoundariesStruct + (member,0,count); } -template +template KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange - ( Impl::HostThreadTeamMember const & member - , const iType & arg_begin - , const iType & arg_end ) +Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type< iType1, iType2 >::type, Member +> +TeamVectorRange( + Member const & member, + iType1 begin, + iType2 end, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) { - return Impl::ThreadVectorRangeBoundariesStruct >(member,arg_begin,arg_end); + return + Impl::TeamThreadRangeBoundariesStruct + < typename std::common_type< iType1, iType2 >::type + , Member >( member , begin , end ); +} + +template +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct +ThreadVectorRange( + Member const & member, + iType count, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) +{ + return Impl::ThreadVectorRangeBoundariesStruct(member,count); +} + +template +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct +ThreadVectorRange( + Member const & member, + iType arg_begin, + iType arg_end, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) +{ + return Impl::ThreadVectorRangeBoundariesStruct(member,arg_begin,arg_end); } //---------------------------------------------------------------------------- @@ -848,11 +917,14 @@ ThreadVectorRange * * The range [0..N) is mapped to all threads of the the calling thread team. */ -template +template KOKKOS_INLINE_FUNCTION void parallel_for - ( Impl::TeamThreadRangeBoundariesStruct > const & loop_boundaries + ( Impl::TeamThreadRangeBoundariesStruct const & loop_boundaries , Closure const & closure + , typename std::enable_if< + Impl::is_host_thread_team_member::value + >::type const** = nullptr ) { for( iType i = loop_boundaries.start @@ -862,11 +934,14 @@ void parallel_for } } -template +template KOKKOS_INLINE_FUNCTION void parallel_for - ( Impl::ThreadVectorRangeBoundariesStruct > const & loop_boundaries + ( Impl::ThreadVectorRangeBoundariesStruct const & loop_boundaries , Closure const & closure + , typename std::enable_if< + Impl::is_host_thread_team_member::value + >::type const** = nullptr ) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP @@ -881,40 +956,47 @@ void parallel_for //---------------------------------------------------------------------------- -template< typename iType, class Space, class Closure, class Reducer > +template< typename iType, class Closure, class Reducer, class Member > KOKKOS_INLINE_FUNCTION -typename std::enable_if< Kokkos::is_reducer< Reducer >::value >::type +typename std::enable_if< + Kokkos::is_reducer< Reducer >::value + && Impl::is_host_thread_team_member::value +>::type parallel_reduce - ( Impl::TeamThreadRangeBoundariesStruct > + ( Impl::TeamThreadRangeBoundariesStruct const & loop_boundaries , Closure const & closure , Reducer const & reducer ) { - reducer.init( reducer.reference() ); + typename Reducer::value_type value; + reducer.init( value ); for( iType i = loop_boundaries.start ; i < loop_boundaries.end ; i += loop_boundaries.increment ) { - closure( i , reducer.reference() ); + closure( i , value ); } - - loop_boundaries.thread.team_reduce( reducer ); + + loop_boundaries.thread.team_reduce( reducer, value ); } -template< typename iType, class Space, typename Closure, typename ValueType > +template< typename iType, typename Closure, typename ValueType, typename Member > KOKKOS_INLINE_FUNCTION -typename std::enable_if< ! Kokkos::is_reducer::value >::type +typename std::enable_if< + ! Kokkos::is_reducer::value + && Impl::is_host_thread_team_member::value +>::type parallel_reduce - ( Impl::TeamThreadRangeBoundariesStruct > + ( Impl::TeamThreadRangeBoundariesStruct const & loop_boundaries , Closure const & closure , ValueType & result ) { - Sum reducer( result ); - - reducer.init( result ); + ValueType val; + Sum reducer( val ); + reducer.init( val ); for( iType i = loop_boundaries.start ; i < loop_boundaries.end @@ -923,6 +1005,7 @@ parallel_reduce } loop_boundaries.thread.team_reduce( reducer ); + result = reducer.reference(); } /*template< typename iType, class Space @@ -958,11 +1041,14 @@ void parallel_reduce * calling thread team and a summation of val is * performed and put into result. */ -template< typename iType, class Space , class Lambda, typename ValueType > +template< typename iType, class Lambda, typename ValueType, typename Member > KOKKOS_INLINE_FUNCTION -typename std::enable_if< ! Kokkos::is_reducer::value >::type +typename std::enable_if< + ! Kokkos::is_reducer::value + && Impl::is_host_thread_team_member::value +>::type parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, ValueType& result) { @@ -974,11 +1060,14 @@ parallel_reduce } } -template< typename iType, class Space , class Lambda, typename ReducerType > +template< typename iType, class Lambda, typename ReducerType, typename Member > KOKKOS_INLINE_FUNCTION -typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +typename std::enable_if< + Kokkos::is_reducer< ReducerType >::value + && Impl::is_host_thread_team_member::value +>::type parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { @@ -990,41 +1079,15 @@ parallel_reduce } } -/** \brief Intra-thread vector parallel_reduce. - * - * Executes lambda(iType i, ValueType & val) for each i=[0..N) - * - * The range [0..N) is mapped to all vector lanes of the the - * calling thread and a reduction of val is performed using - * JoinType(ValueType& val, const ValueType& update) - * and put into init_result. - * The input value of init_result is used as initializer for - * temporary variables of ValueType. Therefore * the input - * value should be the neutral element with respect to the - * join operation (e.g. '0 for +-' or * '1 for *'). - */ -template< typename iType, class Space - , class Lambda, class JoinType , typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& result) -{ - for( iType i = loop_boundaries.start ; - i < loop_boundaries.end ; - i += loop_boundaries.increment ) { - lambda(i,result); - } -} - //---------------------------------------------------------------------------- -template< typename iType, class Space, class Closure > +template< typename iType, class Closure, class Member > KOKKOS_INLINE_FUNCTION -void parallel_scan - ( Impl::TeamThreadRangeBoundariesStruct > const & loop_boundaries +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +parallel_scan + ( Impl::TeamThreadRangeBoundariesStruct const & loop_boundaries , Closure const & closure ) { @@ -1056,10 +1119,13 @@ void parallel_scan } -template< typename iType, class Space, class ClosureType > +template< typename iType, class ClosureType, class Member > KOKKOS_INLINE_FUNCTION -void parallel_scan - ( Impl::ThreadVectorRangeBoundariesStruct > const & loop_boundaries +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +parallel_scan + ( Impl::ThreadVectorRangeBoundariesStruct const & loop_boundaries , ClosureType const & closure ) { @@ -1083,47 +1149,65 @@ void parallel_scan //---------------------------------------------------------------------------- -template< class Space > +template< class Member > KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct > -PerTeam(const Impl::HostThreadTeamMember & member ) +Impl::ThreadSingleStruct +PerTeam( + Member const& member, + typename std::enable_if::value>::type const** = nullptr +) { - return Impl::ThreadSingleStruct >(member); + return Impl::ThreadSingleStruct(member); } -template< class Space > +template< class Member > KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct > -PerThread(const Impl::HostThreadTeamMember & member) +Impl::VectorSingleStruct +PerThread( + Member const& member, + typename std::enable_if::value>::type const** = nullptr +) { - return Impl::VectorSingleStruct >(member); + return Impl::VectorSingleStruct(member); } -template< class Space , class FunctorType > +template< class Member , class FunctorType > KOKKOS_INLINE_FUNCTION -void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember > & single , const FunctorType & functor ) +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +single( const Impl::ThreadSingleStruct & single , const FunctorType & functor ) { // 'single' does not perform a barrier. if ( single.team_member.team_rank() == 0 ) functor(); } -template< class Space , class FunctorType , typename ValueType > +template< class Member, class FunctorType , typename ValueType > KOKKOS_INLINE_FUNCTION -void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember > & single , const FunctorType & functor , ValueType & val ) +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +single( const Impl::ThreadSingleStruct & single , const FunctorType & functor , ValueType & val ) { single.team_member.team_broadcast( functor , val , 0 ); } -template< class Space , class FunctorType > +template< class Member, class FunctorType > KOKKOS_INLINE_FUNCTION -void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember > & , const FunctorType & functor ) +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +single( const Impl::VectorSingleStruct & , const FunctorType & functor ) { functor(); } -template< class Space , class FunctorType , typename ValueType > +template< class Member, class FunctorType , typename ValueType > KOKKOS_INLINE_FUNCTION -void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember > & , const FunctorType & functor , ValueType & val ) +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +single( const Impl::VectorSingleStruct & , const FunctorType & functor , ValueType & val ) { functor(val); } diff --git a/lib/kokkos/core/src/impl/Kokkos_LIFO.hpp b/lib/kokkos/core/src/impl/Kokkos_LIFO.hpp new file mode 100644 index 0000000000..43e9783beb --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_LIFO.hpp @@ -0,0 +1,431 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_LIFO_HPP +#define KOKKOS_IMPL_LIFO_HPP + +#include +#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA + +#include + +#include +#include +#include // KOKKOS_EXPECTS +#include + +#include // atomic_compare_exchange, atomic_fence + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +struct LockBasedLIFOCommon +{ + + using value_type = T; + + using node_type = SimpleSinglyLinkedListNode<>; + + static constexpr uintptr_t LockTag = ~uintptr_t(0); + static constexpr uintptr_t EndTag = ~uintptr_t(1); + + OwningRawPtr m_head = (node_type*)EndTag; + + KOKKOS_INLINE_FUNCTION + bool _try_push_node(node_type& node) { + + KOKKOS_EXPECTS(!node.is_enqueued()); + + auto* volatile & next = LinkedListNodeAccess::next_ptr(node); + + // store the head of the queue in a local variable + auto* old_head = m_head; + + // retry until someone locks the queue or we successfully compare exchange + while (old_head != (node_type*)LockTag) { + + // TODO @tasking @memory_order DSH this should have a memory order and not a memory fence + + // set task->next to the head of the queue + next = old_head; + + // fence to emulate acquire semantics on next and release semantics on + // the store of m_head + // Do not proceed until 'next' has been stored. + Kokkos::memory_fence(); + + // store the old head + auto* const old_head_tmp = old_head; + + // attempt to swap task with the old head of the queue + // as if this were done atomically: + // if(m_head == old_head) { + // m_head = &node; + // } + // old_head = m_head; + old_head = ::Kokkos::atomic_compare_exchange(&m_head, old_head, &node); + + if(old_head_tmp == old_head) return true; + } + + // Failed, replace 'task->m_next' value since 'task' remains + // not a member of a queue. + + // TODO @tasking @memory_order DSH this should have a memory order and not a memory fence + LinkedListNodeAccess::mark_as_not_enqueued(node); + + // fence to emulate acquire semantics on next + // Do not proceed until 'next' has been stored. + ::Kokkos::memory_fence(); + + return false; + } + + bool _is_empty() const noexcept { + // TODO @tasking @memory_order DSH make this an atomic load with memory order + return (volatile node_type*)this->m_head == (node_type*)EndTag; + } + +}; + +//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ + +template +class LockBasedLIFO + : private LockBasedLIFOCommon +{ + +private: + + using base_t = LockBasedLIFOCommon; + using node_type = typename base_t::node_type; + +public: + + using value_type = typename base_t::value_type; // = T + using intrusive_node_base_type = SimpleSinglyLinkedListNode<>; + +public: + + + LockBasedLIFO() = default; + LockBasedLIFO(LockBasedLIFO const&) = delete; + LockBasedLIFO(LockBasedLIFO&&) = delete; + LockBasedLIFO& operator=(LockBasedLIFO const&) = delete; + LockBasedLIFO& operator=(LockBasedLIFO&&) = delete; + + ~LockBasedLIFO() = default; + + + bool empty() const noexcept { + // TODO @tasking @memory_order DSH memory order + return this->_is_empty(); + } + + KOKKOS_INLINE_FUNCTION + OptionalRef pop(bool abort_on_locked = false) + { + // Put this in here to avoid requiring value_type to be complete until now. + static_assert( + std::is_base_of::value, + "Intrusive linked-list value_type must be derived from intrusive_node_base_type" + ); + + // We can't use the static constexpr LockTag directly because + // atomic_compare_exchange needs to bind a reference to that, and you + // can't do that with static constexpr variables. + auto* const lock_tag = (node_type*)base_t::LockTag; + + // TODO @tasking @memory_order DSH shouldn't this be a relaxed atomic load? + // start with the return value equal to the head + auto* rv = this->m_head; + + // Retry until the lock is acquired or the queue is empty. + while(rv != (node_type*)base_t::EndTag) { + + // The only possible values for the queue are + // (1) lock, (2) end, or (3) a valid task. + // Thus zero will never appear in the queue. + // + // If queue is locked then just read by guaranteeing the CAS will fail. + KOKKOS_ASSERT(rv != nullptr); + + if(rv == lock_tag) { + // TODO @tasking @memory_order DSH this should just be an atomic load followed by a continue + // just set rv to nullptr for now, effectively turning the + // atomic_compare_exchange below into a load + rv = nullptr; + if(abort_on_locked) { + break; + } + } + + auto* const old_rv = rv; + + // TODO @tasking @memory_order DSH this should be a weak compare exchange in a loop + rv = Kokkos::atomic_compare_exchange(&(this->m_head), old_rv, lock_tag); + + if(rv == old_rv) { + // CAS succeeded and queue is locked + // + // This thread has locked the queue and removed 'rv' from the queue. + // Extract the next entry of the queue from 'rv->m_next' + // and mark 'rv' as popped from a queue by setting + // 'rv->m_next = nullptr'. + // + // Place the next entry in the head of the queue, + // which also unlocks the queue. + // + // This thread has exclusive access to + // the queue and the popped task's m_next. + + // TODO @tasking @memory_order DSH check whether the volatile is needed here + auto* volatile& next = LinkedListNodeAccess::next_ptr(*rv); //->m_next; + + // This algorithm is not lockfree because a adversarial scheduler could + // context switch this thread at this point and the rest of the threads + // calling this method would never make forward progress + + // TODO @tasking @memory_order DSH I think this needs to be a atomic store release (and the memory fence needs to be removed) + // TODO @tasking DSH prove that this doesn't need to be a volatile store + // Lock is released here + this->m_head = next; + + // Mark rv as popped by assigning nullptr to the next + LinkedListNodeAccess::mark_as_not_enqueued(*rv); + + Kokkos::memory_fence(); + + return OptionalRef{ *static_cast(rv) }; + } + + // Otherwise, the CAS got a value that didn't match (either because + // another thread locked the queue and we observed the lock tag or because + // another thread replaced the head and now we want to try to lock the + // queue with that as the popped item. Either way, try again. + } + + // Return an empty OptionalRef by calling the default constructor + return { }; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef + steal() + { + // TODO @tasking @optimization DSH do this with fewer retries + return pop(/* abort_on_locked = */ true); + } + + KOKKOS_INLINE_FUNCTION + bool push(node_type& node) + { + while(!this->_try_push_node(node)) { /* retry until success */ } + // for consistency with push interface on other queue types: + return true; + } + + KOKKOS_INLINE_FUNCTION + bool push(node_type&& node) + { + // Just forward to the lvalue version + return push(node); + } + +}; + + +/** @brief A Multiple Producer, Single Consumer Queue with some special semantics + * + * This multi-producer, single consumer queue has the following semantics: + * + * - Any number of threads may call `try_emplace`/`try_push` + * + These operations are lock-free. + * - Exactly one thread calls `consume()`, and the call occurs exactly once + * in the lifetime of the queue. + * + This operation is lock-free (and wait-free w.r.t. producers) + * - Any calls to `try_push` that happen-before the call to + * `consume()` will succeed and return an true, such that the `consume()` + * call will visit that node. + * - Any calls to `try_push` for which the single call to `consume()` + * happens-before those calls will return false and the node given as + * an argument to `try_push` will not be visited by consume() + * + * + * @tparam T The type of items in the queue + * + */ +template +class SingleConsumeOperationLIFO + : private LockBasedLIFOCommon +{ +private: + + using base_t = LockBasedLIFOCommon; + using node_type = typename base_t::node_type; + + // Allows us to reuse the existing infrastructure for + static constexpr auto ConsumedTag = base_t::LockTag; + +public: + + using value_type = typename base_t::value_type; // = T + + KOKKOS_INLINE_FUNCTION + SingleConsumeOperationLIFO() noexcept = default; + + SingleConsumeOperationLIFO(SingleConsumeOperationLIFO const&) = delete; + SingleConsumeOperationLIFO(SingleConsumeOperationLIFO&&) = delete; + SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO const&) = delete; + SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO&&) = delete; + + KOKKOS_INLINE_FUNCTION + ~SingleConsumeOperationLIFO() = default; + + KOKKOS_INLINE_FUNCTION + bool empty() const noexcept { + // TODO @tasking @memory_order DSH memory order + return this->_is_empty(); + } + + KOKKOS_INLINE_FUNCTION + bool is_consumed() const noexcept { + // TODO @tasking @memory_order DSH memory order? + return this->m_head == (node_type*)ConsumedTag; + } + + KOKKOS_INLINE_FUNCTION + bool try_push(node_type& node) + { + return this->_try_push_node(node); + // Ensures: (return value is true) || (node.is_enqueued() == false); + } + + template + KOKKOS_INLINE_FUNCTION + void consume(Function&& f) { + auto* const consumed_tag = (node_type*)ConsumedTag; + + // Swap the Consumed tag into the head of the queue: + + // (local variable used for assertion only) + // TODO @tasking @memory_order DSH this should have memory order release, I think + Kokkos::memory_fence(); + auto old_head = Kokkos::atomic_exchange(&(this->m_head), consumed_tag); + + // Assert that the queue wasn't consumed before this + // This can't be an expects clause because the acquire fence on the read + // would be a side-effect + KOKKOS_ASSERT(old_head != consumed_tag); + + // We now have exclusive access to the queue; loop over it and call + // the user function + while(old_head != (node_type*)base_t::EndTag) { + + // get the Node to make the call with + auto* call_arg = old_head; + + // advance the head + old_head = LinkedListNodeAccess::next_ptr(*old_head); + + // Mark as popped before proceeding + LinkedListNodeAccess::mark_as_not_enqueued(*call_arg); + + // Call the user function + auto& arg = *static_cast(call_arg); + f(std::move(arg)); + + } + + } + +}; + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct TaskQueueTraitsLockBased +{ + + // TODO @tasking @documentation DSH document what concepts these match + + template + using ready_queue_type = LockBasedLIFO; + + template + using waiting_queue_type = SingleConsumeOperationLIFO; + + template + using intrusive_task_base_type = + typename ready_queue_type::intrusive_node_base_type; + + static constexpr auto ready_queue_insertion_may_fail = false; + +}; + + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined KOKKOS_ENABLE_TASKDAG */ +#endif /* #ifndef KOKKOS_IMPL_LIFO_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp b/lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp new file mode 100644 index 0000000000..78a6faca90 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp @@ -0,0 +1,206 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_LINKEDLISTNODE_HPP +#define KOKKOS_IMPL_LINKEDLISTNODE_HPP + +#include +#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA + +#include + +#include +#include +#include // KOKKOS_EXPECTS + +#include // atomic_compare_exchange, atomic_fence + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct LinkedListNodeAccess; + +template < + uintptr_t NotEnqueuedValue = 0, + template class PointerTemplate = std::add_pointer +> +struct SimpleSinglyLinkedListNode +{ + +private: + + using pointer_type = typename PointerTemplate::type; + + pointer_type m_next = reinterpret_cast(NotEnqueuedValue); + + // These are private because they are an implementation detail of the queue + // and should not get added to the value type's interface via the intrusive + // wrapper. + + KOKKOS_INLINE_FUNCTION + void mark_as_not_enqueued() noexcept { + // TODO @tasking @memory_order DSH make this an atomic store with memory order + m_next = (pointer_type)NotEnqueuedValue; + } + + KOKKOS_INLINE_FUNCTION + void mark_as_not_enqueued() volatile noexcept { + // TODO @tasking @memory_order DSH make this an atomic store with memory order + m_next = (pointer_type)NotEnqueuedValue; + } + + KOKKOS_INLINE_FUNCTION + pointer_type& _next_ptr() noexcept { + return m_next; + } + + KOKKOS_INLINE_FUNCTION + pointer_type volatile& _next_ptr() volatile noexcept { + return m_next; + } + + KOKKOS_INLINE_FUNCTION + pointer_type const& _next_ptr() const noexcept { + return m_next; + } + + KOKKOS_INLINE_FUNCTION + pointer_type const volatile& _next_ptr() const volatile noexcept { + return m_next; + } + + friend struct LinkedListNodeAccess; + +public: + + // KOKKOS_CONSTEXPR_14 + KOKKOS_INLINE_FUNCTION + bool is_enqueued() const noexcept { + // TODO @tasking @memory_order DSH make this an atomic load with memory order + return m_next != reinterpret_cast(NotEnqueuedValue); + } + + // KOKKOS_CONSTEXPR_14 + KOKKOS_INLINE_FUNCTION + bool is_enqueued() const volatile noexcept { + // TODO @tasking @memory_order DSH make this an atomic load with memory order + return m_next != reinterpret_cast(NotEnqueuedValue); + } + +}; + +/// Attorney for LinkedListNode, since user types inherit from it +struct LinkedListNodeAccess +{ + + template + KOKKOS_INLINE_FUNCTION + static void mark_as_not_enqueued(Node& node) noexcept { + node.mark_as_not_enqueued(); + } + + template + KOKKOS_INLINE_FUNCTION + static void mark_as_not_enqueued(Node volatile& node) noexcept { + node.mark_as_not_enqueued(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + next_ptr(Node& node) noexcept { + return node._next_ptr(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + next_ptr(Node volatile& node) noexcept { + return node._next_ptr(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + next_ptr(Node const& node) noexcept { + return node._next_ptr(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + prev_ptr(Node& node) noexcept { + return node._prev_ptr(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + prev_ptr(Node const& node) noexcept { + return node._prev_ptr(); + } + +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined KOKKOS_ENABLE_TASKDAG */ +#endif /* #ifndef KOKKOS_IMPL_LINKEDLISTNODE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp b/lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp new file mode 100644 index 0000000000..b4629df5b0 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp @@ -0,0 +1,140 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP +#define KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP + +#include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +namespace Kokkos { +namespace Impl { + +template +class MemoryPoolAllocator { +public: + + using memory_pool = MemoryPool; + +private: + + memory_pool m_pool; + +public: + + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator() = default; + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator(MemoryPoolAllocator const&) = default; + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator(MemoryPoolAllocator&&) = default; + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator& operator=(MemoryPoolAllocator const&) = default; + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator& operator=(MemoryPoolAllocator&&) = default; + KOKKOS_INLINE_FUNCTION + ~MemoryPoolAllocator() = default; + + KOKKOS_INLINE_FUNCTION + explicit MemoryPoolAllocator(memory_pool const& arg_pool) : m_pool(arg_pool) { } + KOKKOS_INLINE_FUNCTION + explicit MemoryPoolAllocator(memory_pool&& arg_pool) : m_pool(std::move(arg_pool)) { } + +public: + + using value_type = T; + using pointer = T*; + using size_type = typename MemoryPool::memory_space::size_type; + using difference_type = typename std::make_signed::type; + + template + struct rebind { + using other = MemoryPoolAllocator; + }; + + KOKKOS_INLINE_FUNCTION + pointer allocate(size_t n) { + void* rv = m_pool.allocate(n * sizeof(T)); + if(rv == nullptr) { + Kokkos::abort("Kokkos MemoryPool allocator failed to allocate memory"); + } + return reinterpret_cast(rv); + } + + KOKKOS_INLINE_FUNCTION + void deallocate(T* ptr, size_t n) { + m_pool.deallocate(ptr, n * sizeof(T)); + } + + KOKKOS_INLINE_FUNCTION + size_type max_size() const { + return m_pool.max_block_size(); + } + + KOKKOS_INLINE_FUNCTION + bool operator==(MemoryPoolAllocator const& other) const { + return m_pool == other.m_pool; + } + + KOKKOS_INLINE_FUNCTION + bool operator!=(MemoryPoolAllocator const& other) const { + return !(*this == other); + } + +}; + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + + +#endif /* #ifndef KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp new file mode 100644 index 0000000000..ed8d2be5ae --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp @@ -0,0 +1,616 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP +#define KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +// A *non*-concurrent linked list of tasks that failed to be enqueued +// (We can't reuse the wait queue for this because of the semantics of that +// queue that require it to be popped exactly once, and if a task has failed +// to be enqueued, it has already been marked ready) +template +struct FailedQueueInsertionLinkedListSchedulingInfo { + using task_base_type = TaskNode; + task_base_type* next = nullptr; +}; + +struct EmptyTaskSchedulingInfo { }; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template < + class ExecSpace, + class MemorySpace, + class TaskQueueTraits, + class MemoryPool +> +class MultipleTaskQueue; + +template +struct MultipleTaskQueueTeamEntry { +public: + + using task_base_type = TaskNode; + using runnable_task_base_type = RunnableTaskBase; + using ready_queue_type = typename TaskQueueTraits::template ready_queue_type; + using task_queue_traits = TaskQueueTraits; + using task_scheduling_info_type = typename std::conditional< + TaskQueueTraits::ready_queue_insertion_may_fail, + FailedQueueInsertionLinkedListSchedulingInfo, + EmptyTaskSchedulingInfo + >::type; + +private: + + // Number of allowed priorities + static constexpr int NumPriorities = 3; + + ready_queue_type m_ready_queues[NumPriorities][2]; + + task_base_type* m_failed_heads[NumPriorities][2]; + + KOKKOS_INLINE_FUNCTION + task_base_type*& + failed_head_for(runnable_task_base_type const& task) + { + return m_failed_heads[int(task.get_priority())][int(task.get_task_type())]; + } + + template + KOKKOS_INLINE_FUNCTION + OptionalRef + _pop_failed_insertion( + int priority, TaskType type, + typename std::enable_if< + task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, + void* + >::type = nullptr + ) { + auto* rv_ptr = m_failed_heads[priority][(int)type]; + if(rv_ptr) { + m_failed_heads[priority][(int)type] = + rv_ptr->as_runnable_task() + .template scheduling_info_as() + .next; + return OptionalRef{ *rv_ptr }; + } + else { + return OptionalRef{ nullptr }; + } + } + + template + KOKKOS_INLINE_FUNCTION + OptionalRef + _pop_failed_insertion( + int priority, TaskType type, + typename std::enable_if< + not task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, + void* + >::type = nullptr + ) { + return OptionalRef{ nullptr }; + } + +public: + + KOKKOS_INLINE_FUNCTION + MultipleTaskQueueTeamEntry() { + for(int iPriority = 0; iPriority < NumPriorities; ++iPriority) { + for(int iType = 0; iType < 2; ++iType) { + m_failed_heads[iPriority][iType] = nullptr; + } + } + } + + + KOKKOS_INLINE_FUNCTION + OptionalRef + try_to_steal_ready_task() + { + auto return_value = OptionalRef{}; + // prefer lower priority tasks when stealing + for(int i_priority = NumPriorities-1; i_priority >= 0; --i_priority) { + // Check for a single task with this priority + return_value = m_ready_queues[i_priority][TaskSingle].steal(); + if(return_value) return return_value; + + // Check for a team task with this priority + return_value = m_ready_queues[i_priority][TaskTeam].steal(); + if(return_value) return return_value; + + } + return return_value; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef + pop_ready_task() + { + auto return_value = OptionalRef{}; + for(int i_priority = 0; i_priority < NumPriorities; ++i_priority) { + return_value = _pop_failed_insertion(i_priority, TaskTeam); + if(not return_value) return_value = m_ready_queues[i_priority][TaskTeam].pop(); + if(return_value) return return_value; + + // Check for a single task with this priority + return_value = _pop_failed_insertion(i_priority, TaskSingle); + if(not return_value) return_value = m_ready_queues[i_priority][TaskSingle].pop(); + if(return_value) return return_value; + } + return return_value; + } + + KOKKOS_INLINE_FUNCTION + ready_queue_type& + team_queue_for(runnable_task_base_type const& task) + { + return m_ready_queues[int(task.get_priority())][int(task.get_task_type())]; + } + + + template + KOKKOS_INLINE_FUNCTION + void do_handle_failed_insertion( + runnable_task_base_type&& task, + typename std::enable_if< + task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, + void* + >::type = nullptr + ) + { + // failed insertions, if they happen, must be from the only thread that + // is allowed to push to m_ready_queues, so this linked-list insertion is not + // concurrent + auto& node = task.template scheduling_info_as(); + auto*& head = failed_head_for(task); + node.next = head; + head = &task; + } + + template + KOKKOS_INLINE_FUNCTION + void do_handle_failed_insertion( + runnable_task_base_type&& task, + typename std::enable_if< + not task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, + void* + >::type = nullptr + ) + { + Kokkos::abort("should be unreachable!"); + } + + + template + KOKKOS_INLINE_FUNCTION + void + flush_failed_insertions( + int priority, + int task_type, + typename std::enable_if< + task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, // just to make this dependent on template parameter + int + >::type = 0 + ) { + // TODO @tasking @minor DSH this somethimes gets some things out of LIFO order, which may be undesirable (but not a bug) + + + auto*& failed_head = m_failed_heads[priority][task_type]; + auto& team_queue = m_ready_queues[priority][task_type]; + + while(failed_head != nullptr) { + bool success = team_queue.push(*failed_head); + if(success) { + // Step to the next linked list element + failed_head = failed_head->as_runnable_task() + .template scheduling_info_as().next; + } + else { + // no more room, stop traversing and leave the head where it is + break; + } + } + } + + + template + KOKKOS_INLINE_FUNCTION + void + flush_failed_insertions( + int, int, + typename std::enable_if< + not task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, // just to make this dependent on template parameter + int + >::type = 0 + ) { } + + + KOKKOS_INLINE_FUNCTION + void + flush_all_failed_insertions() { + for(int iPriority = 0; iPriority < NumPriorities; ++iPriority) { + flush_failed_insertions(iPriority, (int)TaskType::TaskTeam); + flush_failed_insertions(iPriority, (int)TaskType::TaskSingle); + } + } + + + template + KOKKOS_INLINE_FUNCTION + void + do_schedule_runnable( + MultipleTaskQueue& queue, + RunnableTaskBase&& task, + TeamSchedulerInfo const& info + + ) { + // Push on any nodes that failed to enqueue + auto& team_queue = team_queue_for(task); + auto priority = task.get_priority(); + auto task_type = task.get_task_type(); + + // First schedule the task + queue.schedule_runnable_to_queue( + std::move(task), + team_queue, + info + ); + + // Task may be enqueued and may be run at any point; don't touch it (hence + // the use of move semantics) + flush_failed_insertions((int)priority, (int)task_type); + } + + + +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template < + class ExecSpace, + class MemorySpace, + class TaskQueueTraits, + class MemoryPool +> +class MultipleTaskQueue final + : public TaskQueueMemoryManager, + public TaskQueueCommonMixin>, + private ObjectWithVLAEmulation< + MultipleTaskQueue, + MultipleTaskQueueTeamEntry + > +{ +public: + + using task_queue_type = MultipleTaskQueue; // mark as task_queue concept + using task_queue_traits = TaskQueueTraits; + using task_base_type = TaskNode; + using ready_queue_type = typename TaskQueueTraits::template ready_queue_type; + +private: + + using base_t = TaskQueueMemoryManager; + using common_mixin_t = TaskQueueCommonMixin; + using vla_emulation_base_t = ObjectWithVLAEmulation< + MultipleTaskQueue, + MultipleTaskQueueTeamEntry + >; + + // Allow private inheritance from ObjectWithVLAEmulation + friend struct VLAEmulationAccess; + +public: + + struct SchedulerInfo { + using team_queue_id_t = int32_t; + static constexpr team_queue_id_t NoAssociatedTeam = -1; + team_queue_id_t team_association = NoAssociatedTeam; + + using scheduler_info_type = SchedulerInfo; + + KOKKOS_INLINE_FUNCTION + constexpr explicit SchedulerInfo(team_queue_id_t association) noexcept + : team_association(association) + { } + + KOKKOS_INLINE_FUNCTION + SchedulerInfo() = default; + + KOKKOS_INLINE_FUNCTION + SchedulerInfo(SchedulerInfo const&) = default; + + KOKKOS_INLINE_FUNCTION + SchedulerInfo(SchedulerInfo&&) = default; + + KOKKOS_INLINE_FUNCTION + SchedulerInfo& operator=(SchedulerInfo const&) = default; + + KOKKOS_INLINE_FUNCTION + SchedulerInfo& operator=(SchedulerInfo&&) = default; + + KOKKOS_INLINE_FUNCTION + ~SchedulerInfo() = default; + + }; + + using task_scheduling_info_type = typename std::conditional< + TaskQueueTraits::ready_queue_insertion_may_fail, + FailedQueueInsertionLinkedListSchedulingInfo, + EmptyTaskSchedulingInfo + >::type; + using team_scheduler_info_type = SchedulerInfo; + + using runnable_task_base_type = RunnableTaskBase; + + template + // requires TaskScheduler && TaskFunctor + using runnable_task_type = RunnableTask< + task_queue_traits, Scheduler, typename Functor::value_type, Functor + >; + + using aggregate_task_type = AggregateTask; + + // Number of allowed priorities + static constexpr int NumPriorities = 3; + + KOKKOS_INLINE_FUNCTION + constexpr typename vla_emulation_base_t::vla_entry_count_type + n_queues() const noexcept { return this->n_vla_entries(); } + +public: + + //---------------------------------------------------------------------------- + // {{{2 + + MultipleTaskQueue() = delete; + MultipleTaskQueue(MultipleTaskQueue const&) = delete; + MultipleTaskQueue(MultipleTaskQueue&&) = delete; + MultipleTaskQueue& operator=(MultipleTaskQueue const&) = delete; + MultipleTaskQueue& operator=(MultipleTaskQueue&&) = delete; + + MultipleTaskQueue( + typename base_t::execution_space const& arg_execution_space, + typename base_t::memory_space const&, + typename base_t::memory_pool const& arg_memory_pool + ) : base_t(arg_memory_pool), + vla_emulation_base_t( + Impl::TaskQueueSpecialization< + // TODO @tasking @generalization DSH avoid referencing SimpleTaskScheduler directly? + SimpleTaskScheduler + >::get_max_team_count(arg_execution_space) + ) + { } + + // end Constructors, destructors, and assignment }}}2 + //---------------------------------------------------------------------------- + + KOKKOS_FUNCTION + void + schedule_runnable( + runnable_task_base_type&& task, + team_scheduler_info_type const& info + ) { + auto team_association = info.team_association; + // Should only not be assigned if this is a host spawn... + if(team_association == team_scheduler_info_type::NoAssociatedTeam) { + team_association = 0; + } + this->vla_value_at(team_association).do_schedule_runnable(*this, std::move(task), info); + // Task may be enqueued and may be run at any point; don't touch it (hence + // the use of move semantics) + } + + KOKKOS_FUNCTION + OptionalRef + pop_ready_task( + team_scheduler_info_type const& info + ) + { + KOKKOS_EXPECTS(info.team_association != team_scheduler_info_type::NoAssociatedTeam); + + auto return_value = OptionalRef{}; + auto team_association = info.team_association; + + // always loop in order of priority first, then prefer team tasks over single tasks + auto& team_queue_info = this->vla_value_at(team_association); + + if(task_queue_traits::ready_queue_insertion_may_fail) { + team_queue_info.flush_all_failed_insertions(); + } + + return_value = team_queue_info.pop_ready_task(); + + if(not return_value) { + + // loop through the rest of the teams and try to steal + for( + auto isteal = (team_association + 1) % this->n_queues(); + isteal != team_association; + isteal = (isteal + 1) % this->n_queues() + ) { + return_value = this->vla_value_at(isteal).try_to_steal_ready_task(); + if(return_value) { break; } + } + + // Note that this is where we'd update the task's scheduling info + } + // if nothing was found, return a default-constructed (empty) OptionalRef + return return_value; + } + + + // TODO @tasking @generalization DSH make this a property-based customization point + KOKKOS_INLINE_FUNCTION + team_scheduler_info_type + initial_team_scheduler_info(int rank_in_league) const noexcept { + return team_scheduler_info_type{ + typename team_scheduler_info_type::team_queue_id_t(rank_in_league % n_queues()) + }; + } + + // TODO @tasking @generalization DSH make this a property-based customization point + static /* KOKKOS_CONSTEXPR_14 */ size_t + task_queue_allocation_size( + typename base_t::execution_space const& exec_space, + typename base_t::memory_space const&, + typename base_t::memory_pool const& + ) + { + using specialization = + Impl::TaskQueueSpecialization< + // TODO @tasking @generalization DSH avoid referencing SimpleTaskScheduler directly? + SimpleTaskScheduler + >; + + return vla_emulation_base_t::required_allocation_size( + /* num_vla_entries = */ specialization::get_max_team_count(exec_space) + ); + } + + // Provide a sensible default that can be overridden + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + runnable_task_base_type& ready_task, + runnable_task_base_type const& predecessor + ) const + { + // Do nothing; we're using the extra storage for the failure linked list + } + + // Provide a sensible default that can be overridden + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + aggregate_task_type& aggregate, + runnable_task_base_type const& predecessor + ) const + { + // Do nothing; we're using the extra storage for the failure linked list + } + + // Provide a sensible default that can be overridden + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + aggregate_task_type& aggregate, + aggregate_task_type const& predecessor + ) const + { + // Do nothing; we're using the extra storage for the failure linked list + } + + // Provide a sensible default that can be overridden + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + runnable_task_base_type& ready_task, + aggregate_task_type const& predecessor + ) const + { + // Do nothing; we're using the extra storage for the failure linked list + } + + KOKKOS_INLINE_FUNCTION + void + handle_failed_ready_queue_insertion( + runnable_task_base_type&& task, + ready_queue_type&, + team_scheduler_info_type const& info + ) { + KOKKOS_EXPECTS(info.team_association != team_scheduler_info_type::NoAssociatedTeam); + + this->vla_value_at(info.team_association).do_handle_failed_insertion( + std::move(task) + ); + } +}; + + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp b/lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp new file mode 100644 index 0000000000..bf83d1831c --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp @@ -0,0 +1,242 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_OPTIONALREF_HPP +#define KOKKOS_IMPL_OPTIONALREF_HPP + +#include + +#include + +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +namespace Kokkos { +namespace Impl { + +struct InPlaceTag { }; + +template +struct OptionalRef { +private: + + ObservingRawPtr m_value = nullptr; + +public: + + using value_type = T; + + KOKKOS_INLINE_FUNCTION + OptionalRef() = default; + + KOKKOS_INLINE_FUNCTION + OptionalRef(OptionalRef const&) = default; + + KOKKOS_INLINE_FUNCTION + OptionalRef(OptionalRef&&) = default; + + KOKKOS_INLINE_FUNCTION + OptionalRef& operator=(OptionalRef const&) = default; + + KOKKOS_INLINE_FUNCTION + // Can't return a reference to volatile OptionalRef, since GCC issues a warning about + // reference to volatile not accessing the underlying value + void + operator=(OptionalRef const volatile& other) volatile noexcept + { + m_value = other.m_value; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef& operator=(OptionalRef&&) = default; + + KOKKOS_INLINE_FUNCTION + ~OptionalRef() = default; + + KOKKOS_INLINE_FUNCTION + explicit OptionalRef(T& arg_value) : m_value(&arg_value) { } + + KOKKOS_INLINE_FUNCTION + explicit OptionalRef(std::nullptr_t) : m_value(nullptr) { } + + KOKKOS_INLINE_FUNCTION + OptionalRef& operator=(T& arg_value) { m_value = &arg_value; return *this; } + + KOKKOS_INLINE_FUNCTION + OptionalRef& operator=(std::nullptr_t) { m_value = nullptr; return *this; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + OptionalRef::type> + as_volatile() volatile noexcept { + return + OptionalRef::type>(*(*this)); + } + + KOKKOS_INLINE_FUNCTION + OptionalRef::type>::type> + as_volatile() const volatile noexcept { + return + OptionalRef::type>::type>(*(*this)); + } + + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + T& operator*() & { + KOKKOS_EXPECTS(this->has_value()); + return *m_value; + } + + KOKKOS_INLINE_FUNCTION + T const& operator*() const & { + KOKKOS_EXPECTS(this->has_value()); + return *m_value; + } + + KOKKOS_INLINE_FUNCTION + T volatile& operator*() volatile & { + KOKKOS_EXPECTS(this->has_value()); + return *m_value; + } + + KOKKOS_INLINE_FUNCTION + T const volatile& operator*() const volatile & { + KOKKOS_EXPECTS(this->has_value()); + return *m_value; + } + + KOKKOS_INLINE_FUNCTION + T&& operator*() && { + KOKKOS_EXPECTS(this->has_value()); + return std::move(*m_value); + } + + KOKKOS_INLINE_FUNCTION + T* operator->() { + KOKKOS_EXPECTS(this->has_value()); + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T const* operator->() const { + KOKKOS_EXPECTS(this->has_value()); + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T volatile* operator->() volatile { + KOKKOS_EXPECTS(this->has_value()); + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T const volatile* operator->() const volatile { + KOKKOS_EXPECTS(this->has_value()); + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T* get() { + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T const* get() const { + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T volatile* get() volatile { + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T const volatile* get() const volatile { + return m_value; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + operator bool() { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + operator bool() const { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + operator bool() volatile { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + operator bool() const volatile { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + bool has_value() { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + bool has_value() const { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + bool has_value() volatile { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + bool has_value() const volatile { return m_value != nullptr; } + +}; + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + + +#endif /* #ifndef KOKKOS_IMPL_OPTIONALREF_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp index d84a854622..687a0e9c37 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -55,104 +55,7 @@ namespace Kokkos { namespace Impl { -template class TaskQueue< Kokkos::Serial > ; - -void TaskQueueSpecialization< Kokkos::Serial >::execute - ( TaskQueue< Kokkos::Serial > * const queue ) -{ - using exec_space = Kokkos::Serial ; - using tqs_queue_type = TaskQueue< exec_space > ; - using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< exec_space > ; - - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - - // Set default buffers - serial_resize_thread_team_data( 0 /* global reduce buffer */ - , 512 /* team reduce buffer */ - , 0 /* team shared buffer */ - , 0 /* thread local buffer */ - ); - - Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); - - Member exec( *data ); - - // Loop until all queues are empty - while ( 0 < queue->m_ready_count ) { - - task_root_type * task = end ; - - for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] ); - } - } - - if ( end != task ) { - - // pop_ready_task resulted in lock == task->m_next - // In the executing state - - (*task->m_apply)( task , & exec ); - -#if 0 - printf( "TaskQueue::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n" - , uintptr_t(task) - , uintptr_t(task->m_wait) - , uintptr_t(task->m_next) - , task->m_task_type - , task->m_priority - , task->m_ref_count ); -#endif - - // If a respawn then re-enqueue otherwise the task is complete - // and all tasks waiting on this task are updated. - queue->complete( task ); - } - else if ( 0 != queue->m_ready_count ) { - Kokkos::abort("TaskQueue::execute ERROR: ready_count"); - } - } -} - -void TaskQueueSpecialization< Kokkos::Serial > :: - iff_single_thread_recursive_execute( - TaskQueue< Kokkos::Serial > * const queue ) -{ - using exec_space = Kokkos::Serial ; - using tqs_queue_type = TaskQueue< exec_space > ; - using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< exec_space > ; - - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - - Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); - - Member exec( *data ); - - // Loop until no runnable task - - task_root_type * task = end ; - - do { - - task = end ; - - for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] ); - } - } - - if ( end == task ) break ; - - (*task->m_apply)( task , & exec ); - - queue->complete( task ); - - } while(1); -} +template class TaskQueue; }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp index 2fec5dfb89..c379a12fb1 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp @@ -47,7 +47,11 @@ #include #if defined( KOKKOS_ENABLE_TASKDAG ) +#include + #include +#include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -55,32 +59,217 @@ namespace Kokkos { namespace Impl { -//---------------------------------------------------------------------------- - -template<> -class TaskQueueSpecialization< Kokkos::Serial > +template +class TaskQueueSpecialization< + SimpleTaskScheduler +> { public: - using execution_space = Kokkos::Serial ; - using memory_space = Kokkos::HostSpace ; - using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; - using task_base_type = Kokkos::Impl::TaskBase< void , void , void > ; - using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; + // Note: Scheduler may be an incomplete type at class scope (but not inside + // of the methods, obviously) + + using execution_space = Kokkos::Serial; + using memory_space = Kokkos::HostSpace; + using scheduler_type = SimpleTaskScheduler; + using member_type = TaskTeamMemberAdapter< + HostThreadTeamMember, scheduler_type + >; static - void iff_single_thread_recursive_execute( queue_type * const ); + void execute(scheduler_type const& scheduler) + { + using task_base_type = typename scheduler_type::task_base_type; - static - void execute( queue_type * const ); + // Set default buffers + serial_resize_thread_team_data( + 0, /* global reduce buffer */ + 512, /* team reduce buffer */ + 0, /* team shared buffer */ + 0 /* thread local buffer */ + ); - template< typename TaskType > - static - typename TaskType::function_type - get_function_pointer() { return TaskType::apply ; } + Impl::HostThreadTeamData& self = *Impl::serial_get_thread_team_data(); + + auto& queue = scheduler.queue(); + auto team_scheduler = scheduler.get_team_scheduler(0); + + member_type member(scheduler, self); + + auto current_task = OptionalRef(nullptr); + + while(not queue.is_done()) { + + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. + + // pop a task off + current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info()); + + // run the task + if(current_task) { + current_task->as_runnable_task().run(member); + // Respawns are handled in the complete function + queue.complete( + (*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info() + ); + } + + } + + } + + static constexpr uint32_t + get_max_team_count(execution_space const&) noexcept + { + return 1; + } + + template + static void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) + { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } }; -extern template class TaskQueue< Kokkos::Serial > ; +//---------------------------------------------------------------------------- + +template +class TaskQueueSpecializationConstrained< + Scheduler, + typename std::enable_if< + std::is_same::value + >::type +> +{ +public: + + // Note: Scheduler may be an incomplete type at class scope (but not inside + // of the methods, obviously) + + using execution_space = Kokkos::Serial; + using memory_space = Kokkos::HostSpace; + using scheduler_type = Scheduler; + using member_type = TaskTeamMemberAdapter< + HostThreadTeamMember, scheduler_type + >; + + static + void iff_single_thread_recursive_execute(scheduler_type const& scheduler) { + using task_base_type = TaskBase; + using queue_type = typename scheduler_type::queue_type; + + task_base_type * const end = (task_base_type *) task_base_type::EndTag ; + + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); + + member_type exec( scheduler, *data ); + + // Loop until no runnable task + + task_base_type * task = end ; + + auto* const queue = scheduler.m_queue; + + do { + + task = end ; + + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task , & exec ); + + queue->complete( task ); + + } while(1); + + } + + static + void execute(scheduler_type const& scheduler) + { + using task_base_type = TaskBase; + using queue_type = typename scheduler_type::queue_type; + + task_base_type * const end = (task_base_type *) task_base_type::EndTag ; + + // Set default buffers + serial_resize_thread_team_data( + 0, /* global reduce buffer */ + 512, /* team reduce buffer */ + 0, /* team shared buffer */ + 0 /* thread local buffer */ + ); + + auto* const queue = scheduler.m_queue; + + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); + + member_type exec( scheduler, *data ); + + // Loop until all queues are empty + while ( 0 < queue->m_ready_count ) { + + task_base_type * task = end ; + + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); + } + } + + if ( end != task ) { + + // pop_ready_task resulted in lock == task->m_next + // In the executing state + + (*task->m_apply)( task , & exec ); + +#if 0 + printf( "TaskQueue::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , task->m_task_type + , task->m_priority + , task->m_ref_count ); +#endif + + // If a respawn then re-enqueue otherwise the task is complete + // and all tasks waiting on this task are updated. + queue->complete( task ); + } + else if ( 0 != queue->m_ready_count ) { + Kokkos::abort("TaskQueue::execute ERROR: ready_count"); + } + } + } + + template + static void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) + { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } +}; + +extern template class TaskQueue< Kokkos::Serial, typename Kokkos::Serial::memory_space > ; }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp index 658f1db06b..77eb69d081 100644 --- a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp @@ -48,11 +48,11 @@ namespace Impl { __thread int SharedAllocationRecord::t_tracking_enabled = 1; +#ifdef KOKKOS_DEBUG bool SharedAllocationRecord< void , void >:: is_sane( SharedAllocationRecord< void , void > * arg_record ) { -#ifdef KOKKOS_DEBUG SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ; bool ok = root != 0 && root->use_count() == 0 ; @@ -102,16 +102,23 @@ is_sane( SharedAllocationRecord< void , void > * arg_record ) } } return ok ; -#else - Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled"); - return false ; -#endif } +#else + +bool +SharedAllocationRecord< void , void >:: +is_sane( SharedAllocationRecord< void , void > * ) +{ + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled"); + return false ; +} +#endif //#ifdef KOKKOS_DEBUG + +#ifdef KOKKOS_DEBUG SharedAllocationRecord * SharedAllocationRecord::find( SharedAllocationRecord * const arg_root , void * const arg_data_ptr ) { -#ifdef KOKKOS_DEBUG SharedAllocationRecord * root_next = 0 ; static constexpr SharedAllocationRecord * zero = nullptr; @@ -130,11 +137,15 @@ SharedAllocationRecord::find( SharedAllocationRecord * con Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking"); } return r ; +} #else +SharedAllocationRecord * +SharedAllocationRecord::find( SharedAllocationRecord * const , void * const ) +{ Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::find only works with KOKKOS_DEBUG enabled"); return nullptr; -#endif } +#endif /**\brief Construct and insert into 'arg_root' tracking set. @@ -271,6 +282,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record ) return arg_record ; } +#ifdef KOKKOS_DEBUG void SharedAllocationRecord< void , void >:: print_host_accessible_records( std::ostream & s @@ -278,7 +290,6 @@ print_host_accessible_records( std::ostream & s , const SharedAllocationRecord * const root , const bool detail ) { -#ifdef KOKKOS_DEBUG const SharedAllocationRecord< void , void > * r = root ; char buffer[256] ; @@ -339,12 +350,20 @@ print_host_accessible_records( std::ostream & s r = r->m_next ; } while ( r != root ); } +} #else +void +SharedAllocationRecord< void , void >:: +print_host_accessible_records( std::ostream & + , const char * const + , const SharedAllocationRecord * const + , const bool ) +{ Kokkos::Impl::throw_runtime_exception( "Kokkos::Impl::SharedAllocationRecord::print_host_accessible_records" " only works with KOKKOS_DEBUG enabled"); -#endif } +#endif } /* namespace Impl */ } /* namespace Kokkos */ diff --git a/lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp b/lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp new file mode 100644 index 0000000000..c2dbc96814 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp @@ -0,0 +1,646 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SIMPLETASKSCHEDULER_HPP +#define KOKKOS_SIMPLETASKSCHEDULER_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include +//---------------------------------------------------------------------------- + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +// TODO @tasking @cleanup move this +template +struct DefaultDestroy { + T* managed_object; + KOKKOS_FUNCTION + void destroy_shared_allocation() { + managed_object->~T(); + } +}; + + +template +class ExecutionSpaceInstanceStorage + : private NoUniqueAddressMemberEmulation +{ +private: + + using base_t = NoUniqueAddressMemberEmulation; + +protected: + + constexpr explicit + ExecutionSpaceInstanceStorage() + : base_t() + { } + + KOKKOS_INLINE_FUNCTION + constexpr explicit + ExecutionSpaceInstanceStorage(ExecutionSpace const& arg_execution_space) + : base_t(arg_execution_space) + { } + + KOKKOS_INLINE_FUNCTION + constexpr explicit + ExecutionSpaceInstanceStorage(ExecutionSpace&& arg_execution_space) + : base_t(std::move(arg_execution_space)) + { } + + KOKKOS_INLINE_FUNCTION + ExecutionSpace& execution_space_instance() & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + ExecutionSpace const& execution_space_instance() const & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + ExecutionSpace&& execution_space_instance() && + { + return std::move(*this).no_unique_address_data_member(); + } +}; + + +template +class MemorySpaceInstanceStorage + : private NoUniqueAddressMemberEmulation +{ +private: + + using base_t = NoUniqueAddressMemberEmulation; + +protected: + + MemorySpaceInstanceStorage() + : base_t() + { } + + KOKKOS_INLINE_FUNCTION + MemorySpaceInstanceStorage(MemorySpace const& arg_memory_space) + : base_t(arg_memory_space) + { } + + KOKKOS_INLINE_FUNCTION + constexpr explicit + MemorySpaceInstanceStorage(MemorySpace&& arg_memory_space) + : base_t(arg_memory_space) + { } + + KOKKOS_INLINE_FUNCTION + MemorySpace& memory_space_instance() & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + MemorySpace const& memory_space_instance() const & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + MemorySpace&& memory_space_instance() && + { + return std::move(*this).no_unique_address_data_member(); + } +}; + +} // end namespace Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template + // requires ExecutionSpace && TaskQueue +class SimpleTaskScheduler + : public Impl::TaskSchedulerBase, + private Impl::ExecutionSpaceInstanceStorage, + private Impl::MemorySpaceInstanceStorage, + private Impl::NoUniqueAddressMemberEmulation +{ +public: + // TODO @tasking @generalization (maybe?) don't force QueueType to be complete here + + using scheduler_type = SimpleTaskScheduler; // tag as scheduler concept + using execution_space = ExecSpace; + using task_queue_type = QueueType; + using memory_space = typename task_queue_type::memory_space; + using memory_pool = typename task_queue_type::memory_pool; + + using team_scheduler_info_type = typename task_queue_type::team_scheduler_info_type; + using task_scheduling_info_type = typename task_queue_type::task_scheduling_info_type; + using specialization = Impl::TaskQueueSpecialization; + using member_type = typename specialization::member_type; + + template + using runnable_task_type = typename QueueType::template runnable_task_type; + + using task_base_type = typename task_queue_type::task_base_type; + using runnable_task_base_type = typename task_queue_type::runnable_task_base_type; + + using task_queue_traits = typename QueueType::task_queue_traits; + + template + using future_type = Kokkos::BasicFuture; + template + using future_type_for_functor = future_type; + +private: + + template + friend class BasicFuture; + + using track_type = Kokkos::Impl::SharedAllocationTracker; + using execution_space_storage = Impl::ExecutionSpaceInstanceStorage; + using memory_space_storage = Impl::MemorySpaceInstanceStorage; + using team_scheduler_info_storage = Impl::NoUniqueAddressMemberEmulation; + + track_type m_track; + task_queue_type* m_queue = nullptr; + + KOKKOS_INLINE_FUNCTION + static constexpr task_base_type* _get_task_ptr(std::nullptr_t) { return nullptr; } + + template + KOKKOS_INLINE_FUNCTION + static constexpr task_base_type* _get_task_ptr(future_type&& f) + { + return f.m_task; + } + + template < + int TaskEnum, + class DepTaskType, + class FunctorType + > + KOKKOS_FUNCTION + future_type_for_functor::type> + _spawn_impl( + DepTaskType arg_predecessor_task, + TaskPriority arg_priority, + typename runnable_task_base_type::function_type apply_function_ptr, + typename runnable_task_base_type::destroy_type destroy_function_ptr, + FunctorType&& functor + ) + { + KOKKOS_EXPECTS(m_queue != nullptr); + + using functor_future_type = future_type_for_functor::type>; + using task_type = typename task_queue_type::template runnable_task_type< + FunctorType, scheduler_type + >; + + // Reference count starts at two: + // +1 for the matching decrement when task is complete + // +1 for the future + auto& runnable_task = *m_queue->template allocate_and_construct( + /* functor = */ std::forward(functor), + /* apply_function_ptr = */ apply_function_ptr, + /* task_type = */ static_cast(TaskEnum), + /* priority = */ arg_priority, + /* queue_base = */ m_queue, + /* initial_reference_count = */ 2 + ); + + if(arg_predecessor_task != nullptr) { + m_queue->initialize_scheduling_info_from_predecessor( + runnable_task, *arg_predecessor_task + ); + runnable_task.set_predecessor(*arg_predecessor_task); + arg_predecessor_task->decrement_and_check_reference_count(); + } + else { + m_queue->initialize_scheduling_info_from_team_scheduler_info( + runnable_task, team_scheduler_info() + ); + } + + auto rv = functor_future_type(&runnable_task); + + Kokkos::memory_fence(); // fence to ensure dependent stores are visible + + m_queue->schedule_runnable( + std::move(runnable_task), + team_scheduler_info() + ); + // note that task may be already completed even here, so don't touch it again + + return rv; + } + + +public: + + //---------------------------------------------------------------------------- + // {{{2 + + SimpleTaskScheduler() = default; + + explicit + SimpleTaskScheduler( + execution_space const& arg_execution_space, + memory_space const& arg_memory_space, + memory_pool const& arg_memory_pool + ) : execution_space_storage(arg_execution_space), + memory_space_storage(arg_memory_space) + { + // Ask the task queue how much space it needs (usually will just be + // sizeof(task_queue_type), but some queues may need additional storage + // dependent on runtime conditions or properties of the execution space) + auto const allocation_size = task_queue_type::task_queue_allocation_size( + arg_execution_space, + arg_memory_space, + arg_memory_pool + ); + + // TODO @tasking @generalization DSH better encapsulation of the SharedAllocationRecord pattern + using record_type = Impl::SharedAllocationRecord< + memory_space, Impl::DefaultDestroy + >; + + // Allocate space for the task queue + auto* record = record_type::allocate( + memory_space(), "TaskQueue", allocation_size + ); + m_queue = new (record->data()) task_queue_type( + arg_execution_space, + arg_memory_space, + arg_memory_pool + ); + record->m_destroy.managed_object = m_queue; + m_track.assign_allocated_record_to_uninitialized(record); + } + + explicit + SimpleTaskScheduler( + execution_space const& arg_execution_space, + memory_pool const& pool + ) : SimpleTaskScheduler(arg_execution_space, memory_space{}, pool) + { /* forwarding ctor, must be empty */ } + + explicit + SimpleTaskScheduler(memory_pool const& pool) + : SimpleTaskScheduler(execution_space{}, memory_space{}, pool) + { /* forwarding ctor, must be empty */ } + + SimpleTaskScheduler( + memory_space const & arg_memory_space, + size_t const mempool_capacity, + unsigned const mempool_min_block_size, // = 1u << 6 + unsigned const mempool_max_block_size, // = 1u << 10 + unsigned const mempool_superblock_size // = 1u << 12 + ) : SimpleTaskScheduler( + execution_space{}, + arg_memory_space, + memory_pool( + arg_memory_space, mempool_capacity, mempool_min_block_size, + mempool_max_block_size, mempool_superblock_size + ) + ) + { /* forwarding ctor, must be empty */ } + + // end Constructors, destructor, and assignment }}}2 + //---------------------------------------------------------------------------- + + // Note that this is an expression of shallow constness + KOKKOS_INLINE_FUNCTION + task_queue_type& queue() const + { + KOKKOS_EXPECTS(m_queue != nullptr); + return *m_queue; + } + + KOKKOS_INLINE_FUNCTION + SimpleTaskScheduler + get_team_scheduler(int rank_in_league) const noexcept + { + KOKKOS_EXPECTS(m_queue != nullptr); + auto rv = SimpleTaskScheduler{ *this }; + rv.team_scheduler_info() = m_queue->initial_team_scheduler_info(rank_in_league); + return rv; + } + + KOKKOS_INLINE_FUNCTION + execution_space const& get_execution_space() const { return this->execution_space_instance(); } + + KOKKOS_INLINE_FUNCTION + team_scheduler_info_type& team_scheduler_info() & + { + return this->team_scheduler_info_storage::no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + team_scheduler_info_type const& team_scheduler_info() const & + { + return this->team_scheduler_info_storage::no_unique_address_data_member(); + } + + //---------------------------------------------------------------------------- + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE + // For backwards compatibility purposes only + KOKKOS_DEPRECATED + KOKKOS_INLINE_FUNCTION + memory_pool* + memory() const noexcept KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE + { + if(m_queue != nullptr) return &(m_queue->get_memory_pool()); + else return nullptr; + } + #endif + + //---------------------------------------------------------------------------- + + template + KOKKOS_FUNCTION + static + Kokkos::BasicFuture + spawn( + Impl::TaskPolicyWithScheduler&& arg_policy, + typename runnable_task_base_type::function_type arg_function, + typename runnable_task_base_type::destroy_type arg_destroy, + FunctorType&& arg_functor + ) + { + return std::move(arg_policy.scheduler()).template _spawn_impl( + _get_task_ptr(std::move(arg_policy.predecessor())), + arg_policy.priority(), + arg_function, + arg_destroy, + std::forward(arg_functor) + ); + } + + template + KOKKOS_FUNCTION + Kokkos::BasicFuture + spawn( + Impl::TaskPolicyWithPredecessor&& arg_policy, + FunctorType&& arg_functor + ) + { + static_assert( + std::is_same::value, + "Can't create a task policy from a scheduler and a future from a different scheduler" + ); + + using task_type = runnable_task_type; + typename task_type::function_type const ptr = task_type::apply; + typename task_type::destroy_type const dtor = task_type::destroy; + + return _spawn_impl( + std::move(arg_policy).predecessor().m_task, + arg_policy.priority(), + ptr, dtor, + std::forward(arg_functor) + ); + } + + template + KOKKOS_FUNCTION + static void + respawn( + FunctorType* functor, + BasicFuture const& predecessor, + TaskPriority priority = TaskPriority::Regular + ) { + using task_type = typename task_queue_type::template runnable_task_type< + FunctorType, scheduler_type + >; + + auto& task = *static_cast(functor); + + KOKKOS_EXPECTS(!task.get_respawn_flag()); + + task.set_priority(priority); + task.set_predecessor(*predecessor.m_task); + task.set_respawn_flag(true); + } + + template + KOKKOS_FUNCTION + static void + respawn( + FunctorType* functor, + scheduler_type const&, + TaskPriority priority = TaskPriority::Regular + ) { + using task_type = typename task_queue_type::template runnable_task_type< + FunctorType, scheduler_type + >; + + auto& task = *static_cast(functor); + + KOKKOS_EXPECTS(!task.get_respawn_flag()); + + task.set_priority(priority); + KOKKOS_ASSERT(not task.has_predecessor()); + task.set_respawn_flag(true); + } + + + template + KOKKOS_FUNCTION + future_type + when_all(BasicFuture const predecessors[], int n_predecessors) { + + // TODO @tasking @generalization DSH propagate scheduling info + + using task_type = typename task_queue_type::aggregate_task_type; + + future_type rv; + + if(n_predecessors > 0) { + task_queue_type* queue_ptr = nullptr; + + // Loop over the predecessors to find the queue and increment the reference + // counts + for(int i_pred = 0; i_pred < n_predecessors; ++i_pred) { + + auto* predecessor_task_ptr = predecessors[i_pred].m_task; + + if(predecessor_task_ptr != nullptr) { + // TODO @tasking @cleanup DSH figure out when this is allowed to be nullptr (if at all anymore) + + // Increment reference count to track subsequent assignment. + // TODO @tasking @optimization DSH figure out if this reference count increment is necessary + predecessor_task_ptr->increment_reference_count(); + + // TODO @tasking @cleanup DSH we should just set a boolean here instead to make this more readable + queue_ptr = m_queue; + } + + } // end loop over predecessors + + // This only represents a non-ready future if at least one of the predecessors + // has a task (and thus, a queue) + if(queue_ptr != nullptr) { + auto& q = *queue_ptr; + + auto* aggregate_task_ptr = q.template allocate_and_construct_with_vla_emulation< + task_type, task_base_type* + >( + /* n_vla_entries = */ n_predecessors, + /* aggregate_predecessor_count = */ n_predecessors, + /* queue_base = */ &q, + /* initial_reference_count = */ 2 + ); + + rv = future_type(aggregate_task_ptr); + + for(int i_pred = 0; i_pred < n_predecessors; ++i_pred) { + aggregate_task_ptr->vla_value_at(i_pred) = predecessors[i_pred].m_task; + } + + Kokkos::memory_fence(); // we're touching very questionable memory, so be sure to fence + + q.schedule_aggregate(std::move(*aggregate_task_ptr), team_scheduler_info()); + // the aggregate may be processed at any time, so don't touch it after this + } + } + + return rv; + } + + template + KOKKOS_FUNCTION + future_type + when_all(int n_calls, F&& func) + { + // TODO @tasking @generalization DSH propagate scheduling info? + + // later this should be std::invoke_result_t + using generated_type = decltype(func(0)); + using task_type = typename task_queue_type::aggregate_task_type; + + static_assert( + is_future::value, + "when_all function must return a Kokkos future (an instance of Kokkos::BasicFuture)" + ); + static_assert( + std::is_base_of::value, + "when_all function must return a Kokkos::BasicFuture of a compatible scheduler type" + ); + + auto* aggregate_task = m_queue->template allocate_and_construct_with_vla_emulation< + task_type, task_base_type* + >( + /* n_vla_entries = */ n_calls, + /* aggregate_predecessor_count = */ n_calls, + /* queue_base = */ m_queue, + /* initial_reference_count = */ 2 + ); + + auto rv = future_type(aggregate_task); + + for(int i_call = 0; i_call < n_calls; ++i_call) { + + auto generated_future = func(i_call); + + if(generated_future.m_task != nullptr) { + generated_future.m_task->increment_reference_count(); + aggregate_task->vla_value_at(i_call) = generated_future.m_task; + + KOKKOS_ASSERT(m_queue == generated_future.m_task->ready_queue_base_ptr() + && "Queue mismatch in when_all" + ); + } + + } + + Kokkos::memory_fence(); + + m_queue->schedule_aggregate(std::move(*aggregate_task), team_scheduler_info()); + // This could complete at any moment, so don't touch anything after this + + return rv; + } + +}; + + +template +inline +void wait(SimpleTaskScheduler const& scheduler) +{ + using scheduler_type = SimpleTaskScheduler; + scheduler_type::specialization::execute(scheduler); +} + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_SIMPLETASKSCHEDULER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp new file mode 100644 index 0000000000..d73028eb5b --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp @@ -0,0 +1,207 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_SINGLETASKQUEUE_HPP +#define KOKKOS_IMPL_SINGLETASKQUEUE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template < + class ExecSpace, + class MemorySpace, + class TaskQueueTraits, + class MemoryPool +> +class SingleTaskQueue + : public TaskQueueMemoryManager, + public TaskQueueCommonMixin> +{ +private: + + using base_t = TaskQueueMemoryManager; + using common_mixin_t = TaskQueueCommonMixin; + + struct EmptyTeamSchedulerInfo { }; + struct EmptyTaskSchedulingInfo { }; + +public: + + using task_queue_type = SingleTaskQueue; // mark as task_queue concept + using task_queue_traits = TaskQueueTraits; + using task_base_type = TaskNode; + using ready_queue_type = typename TaskQueueTraits::template ready_queue_type; + + using team_scheduler_info_type = EmptyTeamSchedulerInfo; + using task_scheduling_info_type = EmptyTaskSchedulingInfo; + + using runnable_task_base_type = RunnableTaskBase; + + template + // requires TaskScheduler && TaskFunctor + using runnable_task_type = RunnableTask< + task_queue_traits, Scheduler, typename Functor::value_type, Functor + >; + + using aggregate_task_type = AggregateTask; + + // Number of allowed priorities + static constexpr int NumQueue = 3; + +private: + + ready_queue_type m_ready_queues[NumQueue][2]; + +public: + + //---------------------------------------------------------------------------- + // {{{2 + + SingleTaskQueue() = delete; + SingleTaskQueue(SingleTaskQueue const&) = delete; + SingleTaskQueue(SingleTaskQueue&&) = delete; + SingleTaskQueue& operator=(SingleTaskQueue const&) = delete; + SingleTaskQueue& operator=(SingleTaskQueue&&) = delete; + + explicit + SingleTaskQueue( + typename base_t::execution_space const&, + typename base_t::memory_space const&, + typename base_t::memory_pool const& arg_memory_pool + ) + : base_t(arg_memory_pool) + { } + + ~SingleTaskQueue() { + for(int i_priority = 0; i_priority < NumQueue; ++i_priority) { + KOKKOS_EXPECTS(m_ready_queues[i_priority][TaskTeam].empty()); + KOKKOS_EXPECTS(m_ready_queues[i_priority][TaskSingle].empty()); + } + } + + // end Constructors, destructors, and assignment }}}2 + //---------------------------------------------------------------------------- + + KOKKOS_FUNCTION + void + schedule_runnable( + runnable_task_base_type&& task, + team_scheduler_info_type const& info + ) { + this->schedule_runnable_to_queue( + std::move(task), + m_ready_queues[int(task.get_priority())][int(task.get_task_type())], + info + ); + // Task may be enqueued and may be run at any point; don't touch it (hence + // the use of move semantics) + } + + KOKKOS_FUNCTION + OptionalRef + pop_ready_task( + team_scheduler_info_type const& info + ) + { + OptionalRef return_value; + // always loop in order of priority first, then prefer team tasks over single tasks + for(int i_priority = 0; i_priority < NumQueue; ++i_priority) { + + // Check for a team task with this priority + return_value = m_ready_queues[i_priority][TaskTeam].pop(); + if(return_value) return return_value; + + // Check for a single task with this priority + return_value = m_ready_queues[i_priority][TaskSingle].pop(); + if(return_value) return return_value; + + } + // if nothing was found, return a default-constructed (empty) OptionalRef + return return_value; + } + + KOKKOS_INLINE_FUNCTION + constexpr team_scheduler_info_type + initial_team_scheduler_info(int) const noexcept { return { }; } + +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_SINGLETASKQUEUE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp new file mode 100644 index 0000000000..b0c06fb26e --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp @@ -0,0 +1,329 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKBASE_HPP +#define KOKKOS_IMPL_TASKBASE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include + +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** \brief Base class for task management, access, and execution. + * + * Inheritance structure to allow static_cast from the task root type + * and a task's FunctorType. + * + * // Enable a functor to access the base class + * // and provide memory for result value. + * TaskBase< Space , ResultType , FunctorType > + * : TaskBase< void , void , void > + * , FunctorType + * { ... }; + * Followed by memory allocated for result value. + * + * + * States of a task: + * + * Constructing State, NOT IN a linked list + * m_wait == 0 + * m_next == 0 + * + * Scheduling transition : Constructing -> Waiting + * before: + * m_wait == 0 + * m_next == this task's initial dependence, 0 if none + * after: + * m_wait == EndTag + * m_next == EndTag + * + * Waiting State, IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == next of linked list of tasks + * + * transition : Waiting -> Executing + * before: + * m_next == EndTag + * after:: + * m_next == LockTag + * + * Executing State, NOT IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == LockTag + * + * Respawn transition : Executing -> Executing-Respawn + * before: + * m_next == LockTag + * after: + * m_next == this task's updated dependence, 0 if none + * + * Executing-Respawn State, NOT IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == this task's updated dependence, 0 if none + * + * transition : Executing -> Complete + * before: + * m_wait == head of linked list + * after: + * m_wait == LockTag + * + * Complete State, NOT IN a linked list + * m_wait == LockTag: cannot add dependence (<=> complete) + * m_next == LockTag: not a member of a wait queue + * + */ +class TaskBase +{ +public: + + enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 }; + enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) }; + + template friend class Kokkos::BasicTaskScheduler ; + + using queue_type = TaskQueueBase; + + using function_type = void(*)( TaskBase * , void * ); + typedef void (* destroy_type) ( TaskBase * ); + + // sizeof(TaskBase) == 48 + + function_type m_apply = nullptr; ///< Apply function pointer + queue_type* m_queue = nullptr; ///< Pointer to the scheduler + TaskBase* m_next = nullptr; ///< next in linked list of ready tasks + TaskBase* m_wait = nullptr; ///< Queue of tasks waiting on this + int32_t m_ref_count = 0; + int32_t m_alloc_size = 0; + int32_t m_dep_count ; ///< Aggregate's number of dependences + int16_t m_task_type ; ///< Type of task + int16_t m_priority ; ///< Priority of runnable task + + TaskBase( TaskBase && ) = delete ; + TaskBase( const TaskBase & ) = delete ; + TaskBase & operator = ( TaskBase && ) = delete ; + TaskBase & operator = ( const TaskBase & ) = delete ; + +#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND + KOKKOS_INLINE_FUNCTION ~TaskBase() {}; +#else + KOKKOS_INLINE_FUNCTION ~TaskBase() = default; +#endif + + KOKKOS_INLINE_FUNCTION constexpr + TaskBase() + : m_apply( nullptr ) + , m_queue( nullptr ) + , m_next( nullptr ) + , m_wait( nullptr ) + , m_ref_count( 0 ) + , m_alloc_size( 0 ) + , m_dep_count( 0 ) + , m_task_type( 0 ) + , m_priority( 0 ) + {} + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + TaskBase * volatile * aggregate_dependences() volatile + { return reinterpret_cast( this + 1 ); } + + KOKKOS_INLINE_FUNCTION + bool requested_respawn() + { + // This should only be called when a task has finished executing and is + // in the transition to either the complete or executing-respawn state. + TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag ); + return lock != m_next; + } + + KOKKOS_INLINE_FUNCTION + void add_dependence( TaskBase* dep ) + { + // Precondition: lock == m_next + + TaskBase * const lock = (TaskBase *) LockTag ; + + // Assign dependence to m_next. It will be processed in the subsequent + // call to schedule. Error if the dependence is reset. + if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) { + Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); + } + + if ( 0 != dep ) { + // The future may be destroyed upon returning from this call + // so increment reference count to track this assignment. + Kokkos::atomic_increment( &(dep->m_ref_count) ); + } + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int32_t reference_count() const + { return *((int32_t volatile *)( & m_ref_count )); } + +}; + +static_assert( sizeof(TaskBase) == 48 + , "Verifying expected sizeof(TaskBase)" ); + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class Scheduler, typename ResultType , class FunctorType > +class Task + : public TaskBase, + public FunctorType +{ +public: + + Task() = delete ; + Task( Task && ) = delete ; + Task( const Task & ) = delete ; + Task & operator = ( Task && ) = delete ; + Task & operator = ( const Task & ) = delete ; + + + using root_type = TaskBase; + using functor_type = FunctorType ; + using result_type = ResultType ; + + using specialization = TaskQueueSpecialization ; + using member_type = typename specialization::member_type ; + + KOKKOS_INLINE_FUNCTION + void apply_functor( member_type * const member , void * ) + { this->functor_type::operator()( *member ); } + + template< typename T > + KOKKOS_INLINE_FUNCTION + void apply_functor( member_type * const member + , T * const result ) + { this->functor_type::operator()( *member , *result ); } + + KOKKOS_FUNCTION static + void destroy( root_type * root ) + { + TaskResult::destroy(root); + } + + KOKKOS_FUNCTION static + void apply( root_type * root , void * exec ) + { + Task* const task = static_cast< Task * >( root ); + member_type * const member = reinterpret_cast< member_type * >( exec ); + result_type * const result = TaskResult< result_type >::ptr( task ); + + // Task may be serial or team. + // If team then must synchronize before querying if respawn was requested. + // If team then only one thread calls destructor. + + const bool only_one_thread = +#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) + 0 == threadIdx.x && 0 == threadIdx.y ; +#else + 0 == member->team_rank(); +#endif + + task->apply_functor( member , result ); + + member->team_barrier(); + + if ( only_one_thread && !(task->requested_respawn()) ) { + // Did not respawn, destroy the functor to free memory. + task->functor_type::~functor_type(); + // Cannot destroy and deallocate the task until its dependences + // have been processed. + } + } + + // Constructor for runnable task + KOKKOS_INLINE_FUNCTION constexpr + Task( FunctorType && arg_functor ) + : root_type() , functor_type( std::move(arg_functor) ) + { } + + KOKKOS_INLINE_FUNCTION + ~Task() = delete; +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKBASE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp new file mode 100644 index 0000000000..35f8853f1f --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp @@ -0,0 +1,758 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKNODE_HPP +#define KOKKOS_IMPL_TASKNODE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +enum TaskType : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2, TaskSpecial = -1 }; + +//============================================================================== + +/** Intrusive base class for things allocated with a Kokkos::MemoryPool + * + * @warning Memory pools assume that the address of this class is the same + * as the address of the most derived type that was allocated to + * have the given size. As a consequence, when interacting with + * multiple inheritance, this must always be the first base class + * of any derived class that uses it! + * @todo Consider inverting inheritance structure to avoid this problem? + * + * @tparam CountType type of integer used to store the allocation size + */ +template +class alignas(void*) PoolAllocatedObjectBase { +public: + + using pool_allocation_size_type = CountType; + +private: + + pool_allocation_size_type m_alloc_size; + +public: + + + KOKKOS_INLINE_FUNCTION + constexpr explicit PoolAllocatedObjectBase(pool_allocation_size_type allocation_size) + : m_alloc_size(allocation_size) + { } + + KOKKOS_INLINE_FUNCTION + CountType get_allocation_size() const noexcept { return m_alloc_size; } + +}; + +//============================================================================== + + +// TODO @tasking @cleanup DSH move this? +template +class ReferenceCountedBase { +public: + + using reference_count_size_type = CountType; + +private: + + reference_count_size_type m_ref_count = 0; + +public: + + KOKKOS_INLINE_FUNCTION + constexpr explicit + ReferenceCountedBase(reference_count_size_type initial_reference_count) + : m_ref_count(initial_reference_count) + { + // This can't be here because it breaks constexpr + // KOKKOS_EXPECTS(initial_reference_count > 0); + } + + /** Decrement the reference count, + * and return true iff this decrement caused + * the reference count to become zero + */ + KOKKOS_INLINE_FUNCTION + bool decrement_and_check_reference_count() + { + // TODO @tasking @memory_order DSH memory order + auto old_count = Kokkos::atomic_fetch_add(&m_ref_count, -1); + + KOKKOS_ASSERT(old_count > 0 && "reference count greater less than zero!"); + + return (old_count == 1); + } + + KOKKOS_INLINE_FUNCTION + void increment_reference_count() + { + Kokkos::atomic_increment(&m_ref_count); + } + +}; + +template +class AggregateTask; + +template +class RunnableTaskBase; + +//============================================================================== + +template +class TaskNode + : public PoolAllocatedObjectBase, // size 4, must be first! + public ReferenceCountedBase, // size 4 + public TaskQueueTraits::template intrusive_task_base_type> // size 8+ +{ +public: + + using priority_type = int16_t; + +private: + + using task_base_type = TaskNode; + using pool_allocated_base_type = PoolAllocatedObjectBase; + using reference_counted_base_type = ReferenceCountedBase; + using task_queue_traits = TaskQueueTraits; + using waiting_queue_type = + typename task_queue_traits::template waiting_queue_type; + + waiting_queue_type m_wait_queue; // size 8+ + + // TODO @tasking @cleanup DSH eliminate this, or make its purpose a bit more clear. It's only used in BasicFuture, and only for deallocation purposes + TaskQueueBase* m_ready_queue_base; + + TaskType m_task_type; // size 2 + priority_type m_priority; // size 2 + bool m_is_respawning = false; + +public: + + KOKKOS_INLINE_FUNCTION + constexpr + TaskNode( + TaskType task_type, + TaskPriority priority, + TaskQueueBase* queue_base, + reference_count_size_type initial_reference_count, + pool_allocation_size_type allocation_size + ) : pool_allocated_base_type( + /* allocation_size = */ allocation_size + ), + reference_counted_base_type( + /* initial_reference_count = */ initial_reference_count + ), + m_wait_queue(), + m_ready_queue_base(queue_base), + m_task_type(task_type), + m_priority(static_cast(priority)), + m_is_respawning(false) + { } + + TaskNode() = delete; + TaskNode(TaskNode const&) = delete; + TaskNode(TaskNode&&) = delete; + TaskNode& operator=(TaskNode const&) = delete; + TaskNode& operator=(TaskNode&&) = delete; + + KOKKOS_INLINE_FUNCTION + bool is_aggregate() const noexcept { return m_task_type == TaskType::Aggregate; } + + KOKKOS_INLINE_FUNCTION + bool is_runnable() const noexcept { return m_task_type != TaskType::Aggregate; } + + KOKKOS_INLINE_FUNCTION + bool is_runnable() const volatile noexcept { return m_task_type != TaskType::Aggregate; } + + KOKKOS_INLINE_FUNCTION + bool is_single_runnable() const noexcept { return m_task_type == TaskType::TaskSingle; } + + KOKKOS_INLINE_FUNCTION + bool is_team_runnable() const noexcept { return m_task_type == TaskType::TaskTeam; } + + KOKKOS_INLINE_FUNCTION + TaskType get_task_type() const noexcept { return m_task_type; } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase& + as_runnable_task() & { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast&>(*this); + } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase const& + as_runnable_task() const & { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast const&>(*this); + } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase volatile& + as_runnable_task() volatile & { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast volatile&>(*this); + } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase const volatile& + as_runnable_task() const volatile & { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast const volatile&>(*this); + } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase&& + as_runnable_task() && { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast&&>(*this); + } + + template + KOKKOS_INLINE_FUNCTION + AggregateTask& + as_aggregate() & { + KOKKOS_EXPECTS(this->is_aggregate()); + return static_cast&>(*this); + } + + template + KOKKOS_INLINE_FUNCTION + AggregateTask const& + as_aggregate() const & { + KOKKOS_EXPECTS(this->is_aggregate()); + return static_cast const&>(*this); + } + + template + KOKKOS_INLINE_FUNCTION + AggregateTask&& + as_aggregate() && { + KOKKOS_EXPECTS(this->is_aggregate()); + return static_cast&&>(*this); + } + + KOKKOS_INLINE_FUNCTION + bool try_add_waiting(task_base_type& depends_on_this) { + return m_wait_queue.try_push(depends_on_this); + } + + template + KOKKOS_INLINE_FUNCTION + void consume_wait_queue(Function&& f) { + KOKKOS_EXPECTS(not m_wait_queue.is_consumed()); + m_wait_queue.consume(std::forward(f)); + } + + KOKKOS_INLINE_FUNCTION + bool wait_queue_is_consumed() const noexcept { + // TODO @tasking @memory_order DSH memory order + return m_wait_queue.is_consumed(); + } + + KOKKOS_INLINE_FUNCTION + TaskQueueBase* + ready_queue_base_ptr() const noexcept { + return m_ready_queue_base; + } + + KOKKOS_INLINE_FUNCTION + void set_priority(TaskPriority priority) noexcept { + KOKKOS_EXPECTS(!this->is_enqueued()); + m_priority = (priority_type)priority; + } + + KOKKOS_INLINE_FUNCTION + void set_priority(TaskPriority priority) volatile noexcept { + KOKKOS_EXPECTS(!this->is_enqueued()); + m_priority = (priority_type)priority; + } + + KOKKOS_INLINE_FUNCTION + TaskPriority get_priority() const noexcept { + return (TaskPriority)m_priority; + } + + KOKKOS_INLINE_FUNCTION + bool get_respawn_flag() const { return m_is_respawning; } + + KOKKOS_INLINE_FUNCTION + void set_respawn_flag(bool value = true) { + m_is_respawning = value; + } + + KOKKOS_INLINE_FUNCTION + void set_respawn_flag(bool value = true) volatile { + m_is_respawning = value; + } + +}; + +//============================================================================== + +template +class SchedulingInfoStorage; + +//============================================================================== + +template +class SchedulingInfoStorage + : public BaseType, // must be first base class for allocation reasons!!! + private NoUniqueAddressMemberEmulation +{ + +private: + + using base_t = BaseType; + using task_scheduling_info_type = SchedulingInfo; + +public: + + using base_t::base_t; + + KOKKOS_INLINE_FUNCTION + task_scheduling_info_type& scheduling_info() & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + task_scheduling_info_type const& scheduling_info() const & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + task_scheduling_info_type&& scheduling_info() && + { + return std::move(*this).no_unique_address_data_member(); + } + +}; + + +//============================================================================== + +template +class alignas(16) AggregateTask final + : public SchedulingInfoStorage< + TaskNode, + SchedulingInfo + >, // must be first base class for allocation reasons!!! + public ObjectWithVLAEmulation< + AggregateTask, + OwningRawPtr> + > +{ +private: + + using base_t = SchedulingInfoStorage< + TaskNode, + SchedulingInfo + >; + using vla_base_t = ObjectWithVLAEmulation< + AggregateTask, + OwningRawPtr> + >; + + using task_base_type = TaskNode; + +public: + + using aggregate_task_type = AggregateTask; // concept marker + + template + // requires std::is_constructible_v + KOKKOS_INLINE_FUNCTION + constexpr explicit + AggregateTask( + int32_t aggregate_predecessor_count, + Args&&... args + ) : base_t( + TaskType::Aggregate, + TaskPriority::Regular, // all aggregates are regular priority + std::forward(args)... + ), + vla_base_t(aggregate_predecessor_count) + { } + + KOKKOS_INLINE_FUNCTION + int32_t dependence_count() const { return this->n_vla_entries(); } + +}; + +//KOKKOS_IMPL_IS_CONCEPT(aggregate_task); + +//============================================================================== + + +template +class RunnableTaskBase + : public TaskNode // must be first base class for allocation reasons!!! +{ +private: + + using base_t = TaskNode; + +public: + + using task_base_type = TaskNode; + using function_type = void(*)( task_base_type * , void * ); + using destroy_type = void(*)( task_base_type * ); + using runnable_task_type = RunnableTaskBase; + +private: + + function_type m_apply; + task_base_type* m_predecessor = nullptr; + +public: + + template + // requires std::is_constructible_v + KOKKOS_INLINE_FUNCTION + constexpr explicit + RunnableTaskBase( + function_type apply_function_ptr, + Args&&... args + ) : base_t(std::forward(args)...), + m_apply(apply_function_ptr) + { } + + KOKKOS_INLINE_FUNCTION + bool has_predecessor() const { return m_predecessor != nullptr; } + + KOKKOS_INLINE_FUNCTION + void clear_predecessor() { m_predecessor = nullptr; } + + KOKKOS_INLINE_FUNCTION + void clear_predecessor() volatile { m_predecessor = nullptr; } + + template + KOKKOS_INLINE_FUNCTION + SchedulingInfo& + scheduling_info_as() + { + using info_storage_type = SchedulingInfoStorage; + + return static_cast(this)->scheduling_info(); + } + + template + KOKKOS_INLINE_FUNCTION + SchedulingInfo const& + scheduling_info_as() const + { + using info_storage_type = SchedulingInfoStorage; + + return static_cast(this)->scheduling_info(); + } + + + KOKKOS_INLINE_FUNCTION + task_base_type& get_predecessor() const { + KOKKOS_EXPECTS(m_predecessor != nullptr); + return *m_predecessor; + } + + KOKKOS_INLINE_FUNCTION + void set_predecessor(task_base_type& predecessor) + { + KOKKOS_EXPECTS(m_predecessor == nullptr); + // Increment the reference count so that predecessor doesn't go away + // before this task is enqueued. + // (should be memory order acquire) + predecessor.increment_reference_count(); + m_predecessor = &predecessor; + } + + KOKKOS_INLINE_FUNCTION + void acquire_predecessor_from(runnable_task_type& other) + { + KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor); + // since we're transfering, no need to modify the reference count + m_predecessor = other.m_predecessor; + other.m_predecessor = nullptr; + } + + KOKKOS_INLINE_FUNCTION + void acquire_predecessor_from(runnable_task_type& other) volatile + { + KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor); + // since we're transfering, no need to modify the reference count + m_predecessor = other.m_predecessor; + other.m_predecessor = nullptr; + } + + template + KOKKOS_INLINE_FUNCTION + void run(TeamMember& member) { + (*m_apply)(this, &member); + } +}; + +//KOKKOS_IMPL_IS_CONCEPT(runnable_task); + +//============================================================================== + +template +class TaskResultStorage : public Base +{ +private: + + using base_t = Base; + + alignas(Base) ResultType m_value = ResultType{}; + + +public: + + using base_t::base_t; + + KOKKOS_INLINE_FUNCTION + ResultType* value_pointer() { + // Over-alignment makes this a non-standard-layout class, + // so alignas() doesn't work + //static_assert( + // offsetof(TaskResultStorage, m_value) == sizeof(Base), + // "TaskResultStorage must be POD for layout purposes" + //); + return &m_value; + } + + KOKKOS_INLINE_FUNCTION + ResultType& value_reference() { return m_value; } + +}; + + +// TODO @tasking @optimization DSH optimization for empty types (in addition to void) +template +class TaskResultStorage : public Base +{ +private: + + using base_t = Base; + +public: + + using base_t::base_t; + + KOKKOS_INLINE_FUNCTION + void* value_pointer() noexcept { return nullptr; } + + KOKKOS_INLINE_FUNCTION + void value_reference() noexcept { } + +}; + +//============================================================================== + +template < + class TaskQueueTraits, + class Scheduler, + class ResultType, + class FunctorType +> +class alignas(16) RunnableTask + : // using nesting of base classes to control layout; multiple empty base classes + // may not be ABI compatible with CUDA on Windows + public TaskResultStorage< + ResultType, + SchedulingInfoStorage< + RunnableTaskBase, + typename Scheduler::task_queue_type::task_scheduling_info_type + > + >, // must be first base class + public FunctorType +{ +private: + using base_t = TaskResultStorage< + ResultType, + SchedulingInfoStorage< + RunnableTaskBase, + typename Scheduler::task_queue_type::task_scheduling_info_type + > + >; + + using runnable_task_base_type = RunnableTaskBase; + using scheduler_type = Scheduler; + using scheduling_info_type = + typename scheduler_type::task_scheduling_info_type; + using scheduling_info_storage_base = base_t; + + using task_base_type = TaskNode; + using specialization = TaskQueueSpecialization; + using member_type = typename specialization::member_type; + using result_type = ResultType; + using functor_type = FunctorType; + +public: + + template + // requires std::is_constructible_v + KOKKOS_INLINE_FUNCTION + constexpr explicit + RunnableTask( + FunctorType&& functor, + Args&&... args + ) : base_t( + std::forward(args)... + ), + functor_type(std::move(functor)) + { } + + KOKKOS_INLINE_FUNCTION + ~RunnableTask() = delete; + + KOKKOS_INLINE_FUNCTION + void update_scheduling_info( + member_type& member + ) { + // TODO @tasking @generalization DSH call a queue-specific hook here; for now, this info is already updated elsewhere + // this->scheduling_info() = member.scheduler().scheduling_info(); + } + + KOKKOS_INLINE_FUNCTION + void apply_functor(member_type* member, void*) + { + update_scheduling_info(*member); + this->functor_type::operator()(*member); + } + + template + KOKKOS_INLINE_FUNCTION + void apply_functor(member_type* member, T* val) + { + update_scheduling_info(*member); + //this->functor_type::operator()(*member, *val); + this->functor_type::operator()(*member, *val); + } + + KOKKOS_FUNCTION static + void destroy( task_base_type * root ) + { + //TaskResult::destroy(root); + } + + KOKKOS_FUNCTION static + void apply(task_base_type* self, void* member_as_void) + { + using task_type = Impl::RunnableTask*; + auto* const task = static_cast(self); + auto* const member = reinterpret_cast(member_as_void); + + // Now that we're over-aligning the result storage, this isn't a problem any more + //static_assert(std::is_standard_layout::value, + // "Tasks must be standard layout" + //); + //static_assert(std::is_pod::value, + // "Tasks must be PODs" + //); + + // Task may be serial or team. + // If team then must synchronize before querying if respawn was requested. + // If team then only one thread calls destructor. + + const bool only_one_thread = +#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) + 0 == threadIdx.x && 0 == threadIdx.y ; +#else + 0 == member->team_rank(); +#endif + + // Ensure that the respawn flag is set to zero + self->set_respawn_flag(false); + + //task->apply_functor(member, TaskResult::ptr(task)); + task->apply_functor(member, task->value_pointer()); + + member->team_barrier(); + + if ( only_one_thread && !(task->get_respawn_flag()) ) { + // Did not respawn, destroy the functor to free memory. + task->functor_type::~functor_type(); + // Cannot destroy and deallocate the task until its dependences + // have been processed. + } + } + +}; + +} /* namespace Impl */ + + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKNODE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp new file mode 100644 index 0000000000..85e665fffc --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp @@ -0,0 +1,195 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_TASKPOLICYDATA_HPP +#define KOKKOS_IMPL_TASKPOLICYDATA_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template +struct TaskPolicyWithPredecessor +{ +private: + + DepFutureType m_predecessor; + Kokkos::TaskPriority m_priority; + +public: + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor( + DepFutureType arg_predecessor, + Kokkos::TaskPriority arg_priority + ) : m_predecessor(std::move(arg_predecessor)), + m_priority(arg_priority) + { } + + TaskPolicyWithPredecessor() = delete; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor(TaskPolicyWithPredecessor const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor(TaskPolicyWithPredecessor&&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor&&) = default; + + KOKKOS_INLINE_FUNCTION + ~TaskPolicyWithPredecessor() = default; + + KOKKOS_INLINE_FUNCTION + DepFutureType&& predecessor() && { + return std::move(m_predecessor); + } + + KOKKOS_INLINE_FUNCTION + constexpr TaskPriority priority() const { return m_priority; } + + KOKKOS_INLINE_FUNCTION + static constexpr int task_type() noexcept { return TaskEnum; } + +}; + +// TODO @tasking @cleanup DSH clean this up. Using nullptr_t here is too clever +template +struct TaskPolicyWithScheduler +{ +public: + + using predecessor_future_type = PredecessorFuture; + +private: + + Scheduler m_scheduler; + Kokkos::TaskPriority m_priority; + predecessor_future_type m_predecessor; + +public: + + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler( + Scheduler arg_scheduler, + Kokkos::TaskPriority arg_priority + ) : m_scheduler(std::move(arg_scheduler)), + m_priority(arg_priority) + { } + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler( + Scheduler arg_scheduler, + predecessor_future_type arg_predecessor, + Kokkos::TaskPriority arg_priority + ) : m_scheduler(std::move(arg_scheduler)), + m_priority(arg_priority), + m_predecessor(std::move(arg_predecessor)) + { } + + TaskPolicyWithScheduler() = delete; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler(TaskPolicyWithScheduler const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler(TaskPolicyWithScheduler&&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler&&) = default; + + KOKKOS_INLINE_FUNCTION + ~TaskPolicyWithScheduler() = default; + + KOKKOS_INLINE_FUNCTION + Scheduler& scheduler() & { + return m_scheduler; + } + + KOKKOS_INLINE_FUNCTION + constexpr TaskPriority priority() const { return m_priority; } + + KOKKOS_INLINE_FUNCTION + predecessor_future_type& predecessor() & { + return m_predecessor; + } + + KOKKOS_INLINE_FUNCTION + static constexpr bool has_predecessor() noexcept + { + return not std::is_same::value; + } + + KOKKOS_INLINE_FUNCTION + static constexpr int task_type() noexcept { return TaskEnum; } + +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKPOLICYDATA_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp index eacf0837fa..1adcfe4cc4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -49,27 +49,24 @@ #include #if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + #include #include #include -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class Space , typename ResultType , class FunctorType > -class TaskBase ; - -template< typename Space > -class TaskQueue ; - -template< typename Space > -class TaskQueueSpecialization ; - -} /* namespace Impl */ -} /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -77,240 +74,29 @@ class TaskQueueSpecialization ; namespace Kokkos { namespace Impl { -/** \brief Base class for task management, access, and execution. - * - * Inheritance structure to allow static_cast from the task root type - * and a task's FunctorType. - * - * // Enable a functor to access the base class - * // and provide memory for result value. - * TaskBase< Space , ResultType , FunctorType > - * : TaskBase< void , void , void > - * , FunctorType - * { ... }; - * Followed by memory allocated for result value. - * - * - * States of a task: - * - * Constructing State, NOT IN a linked list - * m_wait == 0 - * m_next == 0 - * - * Scheduling transition : Constructing -> Waiting - * before: - * m_wait == 0 - * m_next == this task's initial dependence, 0 if none - * after: - * m_wait == EndTag - * m_next == EndTag - * - * Waiting State, IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == next of linked list of tasks - * - * transition : Waiting -> Executing - * before: - * m_next == EndTag - * after:: - * m_next == LockTag - * - * Executing State, NOT IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == LockTag - * - * Respawn transition : Executing -> Executing-Respawn - * before: - * m_next == LockTag - * after: - * m_next == this task's updated dependence, 0 if none - * - * Executing-Respawn State, NOT IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == this task's updated dependence, 0 if none - * - * transition : Executing -> Complete - * before: - * m_wait == head of linked list - * after: - * m_wait == LockTag - * - * Complete State, NOT IN a linked list - * m_wait == LockTag: cannot add dependence (<=> complete) - * m_next == LockTag: not a member of a wait queue - * - */ -template<> -class TaskBase< void , void , void > -{ -public: - - enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 }; - enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) }; - - template< typename > friend class Kokkos::TaskScheduler ; - - typedef TaskQueue< void > queue_type ; - - typedef void (* function_type) ( TaskBase * , void * ); - - // sizeof(TaskBase) == 48 - - function_type m_apply ; ///< Apply function pointer - queue_type * m_queue ; ///< Pointer to queue - TaskBase * m_wait ; ///< Linked list of tasks waiting on this - TaskBase * m_next ; ///< Waiting linked-list next - int32_t m_ref_count ; ///< Reference count - int32_t m_alloc_size ; ///< Allocation size - int32_t m_dep_count ; ///< Aggregate's number of dependences - int16_t m_task_type ; ///< Type of task - int16_t m_priority ; ///< Priority of runnable task - - TaskBase( TaskBase && ) = delete ; - TaskBase( const TaskBase & ) = delete ; - TaskBase & operator = ( TaskBase && ) = delete ; - TaskBase & operator = ( const TaskBase & ) = delete ; - -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION ~TaskBase() {}; -#else - KOKKOS_INLINE_FUNCTION ~TaskBase() = default; -#endif - - KOKKOS_INLINE_FUNCTION constexpr - TaskBase() - : m_apply( 0 ) - , m_queue( 0 ) - , m_wait( 0 ) - , m_next( 0 ) - , m_ref_count( 0 ) - , m_alloc_size( 0 ) - , m_dep_count( 0 ) - , m_task_type( 0 ) - , m_priority( 0 ) - {} - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - TaskBase * volatile * aggregate_dependences() volatile - { return reinterpret_cast( this + 1 ); } - - KOKKOS_INLINE_FUNCTION - bool requested_respawn() - { - // This should only be called when a task has finished executing and is - // in the transition to either the complete or executing-respawn state. - TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag ); - return lock != m_next; - } - - KOKKOS_INLINE_FUNCTION - void add_dependence( TaskBase* dep ) - { - // Precondition: lock == m_next - - TaskBase * const lock = (TaskBase *) LockTag ; - - // Assign dependence to m_next. It will be processed in the subsequent - // call to schedule. Error if the dependence is reset. - if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) { - Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); - } - - if ( 0 != dep ) { - // The future may be destroyed upon returning from this call - // so increment reference count to track this assignment. - Kokkos::atomic_increment( &(dep->m_ref_count) ); - } - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - int32_t reference_count() const - { return *((int32_t volatile *)( & m_ref_count )); } - -}; - -static_assert( sizeof(TaskBase) == 48 - , "Verifying expected sizeof(TaskBase)" ); - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -template< typename ResultType > -struct TaskResult { - - enum : int32_t { size = sizeof(ResultType) }; - - using reference_type = ResultType & ; - - KOKKOS_INLINE_FUNCTION static - ResultType * ptr( TaskBase * task ) - { - return reinterpret_cast< ResultType * > - ( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) ); - } - - KOKKOS_INLINE_FUNCTION static - reference_type get( TaskBase * task ) - { return *ptr( task ); } -}; - -template<> -struct TaskResult< void > { - - enum : int32_t { size = 0 }; - - using reference_type = void ; - - KOKKOS_INLINE_FUNCTION static - void * ptr( TaskBase * ) { return (void*) 0 ; } - - KOKKOS_INLINE_FUNCTION static - reference_type get( TaskBase * ) {} -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template<> -class TaskQueue< void > {}; /** \brief Manage task allocation, deallocation, and scheduling. * * Task execution is deferred to the TaskQueueSpecialization. * All other aspects of task management have shared implementation. */ -template< typename ExecSpace > -class TaskQueue : public TaskQueue { -private: +template< typename ExecSpace, typename MemorySpace > +class TaskQueue : public TaskQueueBase { +protected: - friend class TaskQueueSpecialization< ExecSpace > ; - friend class Kokkos::TaskScheduler< ExecSpace > ; + template + friend struct TaskQueueSpecialization; + template + friend class TaskQueueSpecializationConstrained; + template + friend class Kokkos::BasicTaskScheduler; - using execution_space = ExecSpace ; - using specialization = TaskQueueSpecialization< execution_space > ; - using memory_space = typename specialization::memory_space ; - using device_type = Kokkos::Device< execution_space , memory_space > ; - using memory_pool = Kokkos::MemoryPool< device_type > ; - using task_root_type = Kokkos::Impl::TaskBase ; + using execution_space = ExecSpace; + using memory_space = MemorySpace; + using device_type = Kokkos::Device< execution_space , memory_space > ; + using memory_pool = Kokkos::MemoryPool< device_type > ; + using task_root_type = Kokkos::Impl::TaskBase; + using team_queue_type = TaskQueue; struct Destroy { TaskQueue * m_queue ; @@ -325,8 +111,8 @@ private: memory_pool m_memory ; task_root_type * volatile m_ready[ NumQueue ][ 2 ]; - long m_accum_alloc ; // Accumulated number of allocations - int m_count_alloc ; // Current number of allocations + //long m_accum_alloc ; // Accumulated number of allocations + int m_count_alloc = 0 ; // Current number of allocations int m_max_alloc ; // Maximum number of allocations int m_ready_count ; // Number of ready or executing @@ -347,8 +133,8 @@ private: // task->m_next is the dependence or zero // Postcondition: // task->m_next is linked list membership - KOKKOS_FUNCTION void schedule_runnable( task_root_type * const ); - KOKKOS_FUNCTION void schedule_aggregate( task_root_type * const ); + KOKKOS_FUNCTION void schedule_runnable(task_root_type*); + KOKKOS_FUNCTION void schedule_aggregate(task_root_type*); // Reschedule a task // Precondition: @@ -381,23 +167,29 @@ private: KOKKOS_FUNCTION static void decrement( task_root_type * task ); + public: - // If and only if the execution space is a single thread - // then execute ready tasks. KOKKOS_INLINE_FUNCTION - void iff_single_thread_recursive_execute() - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - specialization::iff_single_thread_recursive_execute( this ); -#endif - } + int allocation_count() const noexcept { return m_count_alloc; } - void execute() { specialization::execute( this ); } + + KOKKOS_INLINE_FUNCTION + void initialize_team_queues(int pool_size) const noexcept { } + + KOKKOS_INLINE_FUNCTION + task_root_type* attempt_to_steal_task() const noexcept { return nullptr; } + + KOKKOS_INLINE_FUNCTION + team_queue_type& get_team_queue(int team_rank) { return *this; } + + //void execute() { specialization::execute( this ); } template< typename FunctorType > void proc_set_apply( typename task_root_type::function_type * ptr ) { + using specialization = + TaskQueueSpecialization>; specialization::template proc_set_apply< FunctorType >( ptr ); } @@ -451,9 +243,7 @@ public: { using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + using task_type = Impl::Task ; enum : size_t { align = ( 1 << 4 ) , align_mask = align - 1 }; enum : size_t { task_size = sizeof(task_type) }; @@ -480,86 +270,6 @@ public: //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { -namespace Impl { - -template< class ExecSpace , typename ResultType , class FunctorType > -class TaskBase - : public TaskBase< void , void , void > - , public FunctorType -{ -private: - - TaskBase() = delete ; - TaskBase( TaskBase && ) = delete ; - TaskBase( const TaskBase & ) = delete ; - TaskBase & operator = ( TaskBase && ) = delete ; - TaskBase & operator = ( const TaskBase & ) = delete ; - -public: - - using root_type = TaskBase< void , void , void > ; - using functor_type = FunctorType ; - using result_type = ResultType ; - - using specialization = TaskQueueSpecialization< ExecSpace > ; - using member_type = typename specialization::member_type ; - - KOKKOS_INLINE_FUNCTION - void apply_functor( member_type * const member , void * ) - { functor_type::operator()( *member ); } - - template< typename T > - KOKKOS_INLINE_FUNCTION - void apply_functor( member_type * const member - , T * const result ) - { functor_type::operator()( *member , *result ); } - - KOKKOS_FUNCTION static - void apply( root_type * root , void * exec ) - { - TaskBase * const task = static_cast< TaskBase * >( root ); - member_type * const member = reinterpret_cast< member_type * >( exec ); - result_type * const result = TaskResult< result_type >::ptr( task ); - - // Task may be serial or team. - // If team then must synchronize before querying if respawn was requested. - // If team then only one thread calls destructor. - - const bool only_one_thread = -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) - 0 == threadIdx.x && 0 == threadIdx.y ; -#else - 0 == member->team_rank(); -#endif - - task->apply_functor( member , result ); - - member->team_barrier(); - - if ( only_one_thread && !(task->requested_respawn()) ) { - // Did not respawn, destroy the functor to free memory. - static_cast(task)->~functor_type(); - // Cannot destroy and deallocate the task until its dependences - // have been processed. - } - } - - // Constructor for runnable task - KOKKOS_INLINE_FUNCTION constexpr - TaskBase( FunctorType && arg_functor ) - : root_type() , functor_type( arg_functor ) {} - - KOKKOS_INLINE_FUNCTION - ~TaskBase() {} -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp new file mode 100644 index 0000000000..b0685506d4 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp @@ -0,0 +1,569 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_TASKQUEUECOMMON_HPP +#define KOKKOS_IMPL_TASKQUEUECOMMON_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/// @brief CRTP Base class implementing the ready count parts common to most task queues +template +class TaskQueueCommonMixin +{ +private: + + int32_t m_ready_count = 0; + + // CRTP boilerplate + KOKKOS_INLINE_FUNCTION + Derived& _self() { return *static_cast(this); } + +public: + + //---------------------------------------------------------------------------- + // {{{2 + + TaskQueueCommonMixin() + : m_ready_count(0) + { + // TODO @tasking @memory_order DSH figure out if I need this store to be atomic + } + + ~TaskQueueCommonMixin() { + KOKKOS_EXPECTS((Kokkos::memory_fence(), m_ready_count < 1)); + KOKKOS_EXPECTS(m_ready_count == 0); + } + + // end Constructors, destructor, and assignment }}}2 + //---------------------------------------------------------------------------- + + + //---------------------------------------------------------------------------- + // {{{2 + +private: + + // This would be more readable with a lambda, but that comes with + // all the baggage associated with a lambda (compilation times, bugs with + // nvcc, etc.), so we'll use a simple little helper functor here. + template + struct _schedule_waiting_tasks_operation { + TaskNode const& m_predecessor; + Derived& m_queue; + TeamSchedulerInfo const& m_info; + KOKKOS_INLINE_FUNCTION + void operator()(TaskNode&& task) const noexcept + // requires Same + { + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + if(task.is_runnable()) // KOKKOS_LIKELY + { + // TODO @tasking @optimiazation DSH check this outside of the loop ? + if(m_predecessor.is_runnable()) { + m_queue.update_scheduling_info_from_completed_predecessor( + /* ready_task = */ task.as_runnable_task(), + /* predecessor = */ m_predecessor.as_runnable_task() + ); + } + else { + KOKKOS_ASSERT(m_predecessor.is_aggregate()); + m_queue.update_scheduling_info_from_completed_predecessor( + /* ready_task = */ task.as_runnable_task(), + /* predecessor = */ m_predecessor.template as_aggregate() + ); + } + m_queue.schedule_runnable( + std::move(task).as_runnable_task(), + m_info + ); + } + else { + // The scheduling info update happens inside of schedule_aggregate + m_queue.schedule_aggregate( + std::move(task).template as_aggregate(), + m_info + ); + } + } + }; + +protected: + + template + KOKKOS_FUNCTION + void _complete_finished_task( + TaskNode&& task, + TeamSchedulerInfo const& info + ) { + task.consume_wait_queue( + _schedule_waiting_tasks_operation{ + task, + _self(), + info + } + ); + bool should_delete = task.decrement_and_check_reference_count(); + if(should_delete) { + _self().deallocate(std::move(task)); + } + } + + KOKKOS_INLINE_FUNCTION + void _increment_ready_count() { + // TODO @tasking @memory_order DSH memory order + Kokkos::atomic_increment(&this->m_ready_count); + } + + KOKKOS_INLINE_FUNCTION + void _decrement_ready_count() { + // TODO @tasking @memory_order DSH memory order + Kokkos::atomic_decrement(&this->m_ready_count); + Kokkos::memory_fence(); + } + +public: + + KOKKOS_INLINE_FUNCTION + bool is_done() const noexcept { + // TODO @tasking @memory_order DSH Memory order, instead of volatile + return (*(volatile int*)(&m_ready_count)) == 0; + } + + KOKKOS_INLINE_FUNCTION + int32_t ready_count() const noexcept { + // TODO @tasking @memory_order DSH Memory order, instead of volatile + return (*(volatile int*)(&m_ready_count)); + } + + template + KOKKOS_FUNCTION + void + complete( + RunnableTaskBase&& task, + TeamSchedulerInfo const& info + ) + { + if(task.get_respawn_flag()) { + _self().schedule_runnable(std::move(task), info); + } + else { + _complete_finished_task(std::move(task), info); + } + // A runnable task was popped from a ready queue finished executing. + // If respawned into a ready queue then the ready count was incremented + // so decrement whether respawned or not. If finished, all of the + // tasks waiting on this have been enqueued (either in the ready queue + // or the next waiting queue, in the case of an aggregate), and the + // ready count has been incremented for each of those, preventing + // quiescence. Thus, it's safe to decrement the ready count here. + // TODO @tasking @memory_order DSH memory order? (probably release) + _decrement_ready_count(); + } + + template + KOKKOS_FUNCTION + void + complete( + AggregateTask&& task, + TeamSchedulerInfo const& info + ) { + // TODO @tasking DSH old code has a ifndef __HCC_ACCELERATOR__ here; figure out why + _complete_finished_task(std::move(task), info); + } + + // end Task and queue completion }}}2 + //---------------------------------------------------------------------------- + + + //---------------------------------------------------------------------------- + // {{{2 + +public: + + // This isn't actually generic; the template parameters are just to keep + // Derived from having to be complete + template + KOKKOS_INLINE_FUNCTION + void + schedule_runnable_to_queue( + RunnableTaskBase&& task, + ReadyQueueType& ready_queue, + TeamSchedulerInfo const& info + ) + { + bool task_is_ready = true; + bool scheduling_info_updated = false; + + // do this before enqueueing and potentially losing exclusive access to task + bool task_is_respawning = task.get_respawn_flag(); + + // clear the respawn flag, since we're handling the respawn (if any) here. + // We must make sure this is written through the cache, since the next + // thread to access it might be a Cuda thread from a different thread block. + ((RunnableTaskBase volatile&)task).set_respawn_flag(false); + + if(task.has_predecessor()) { + // save the predecessor into a local variable, then clear it from the + // task before adding it to the wait queue of the predecessor + // (We have exclusive access to the task's predecessor, so we don't need + // to do this atomically) + // TODO @tasking @internal_documentation DSH document that we expect exclusive access to `task` in this function + auto& predecessor = task.get_predecessor(); + // This needs a load/store fence here, technically + // making this a release store would also do this + ((RunnableTaskBase volatile&)task).clear_predecessor(); + + // TODO @tasking @memory_order DSH remove this fence in favor of memory orders + Kokkos::memory_fence(); // for now + + // Try to add the task to the predecessor's waiting queue. If it fails, + // the predecessor is already done + bool predecessor_not_ready = predecessor.try_add_waiting(task); + + // NOTE: if the predecessor was not ready and the task was enqueued, + // we've lost exclusive access and should nt touch task again + + // If the predecessor is not done, then task is not ready + task_is_ready = not predecessor_not_ready; + + if(task_is_ready and predecessor.is_runnable()) { + // this is our last chance to update the scheduling info before + // predecessor is potentially deleted + _self().update_scheduling_info_from_completed_predecessor( + /* ready_task = */ task, + /* predecessor = */ predecessor.as_runnable_task() + ); + scheduling_info_updated = true; + } + + if(task_is_respawning) { + // Reference count for predecessor was incremented when + // respawn called set_dependency() + // so that if predecessor completed prior to the + // above try_add_waiting(), predecessor would not be destroyed. + // predecessor reference count can now be decremented, + // which may deallocate it. + bool should_delete = predecessor.decrement_and_check_reference_count(); + if(should_delete) { + // TODO @tasking @cleanup DSH better encapsulation of this! + _self().deallocate(std::move(predecessor)); + } + } + // Note! predecessor may be destroyed at this point, so don't add anything + // here + } + + if(scheduling_info_updated) { + // We need to go back to the queue itself and see if it wants to schedule + // somewhere else + _self().schedule_runnable(std::move(task), info); + } + // Put it in the appropriate ready queue if it's ready + else if(task_is_ready) { + // Increment the ready count + _self()._increment_ready_count(); + // and enqueue the task + // (can't move because the task isn't expired unless the push succeeds + bool push_success = ready_queue.push(task); + if(not push_success) { + _self().handle_failed_ready_queue_insertion( + std::move(task), ready_queue, info + ); + } + } + + // Task may be enqueued and may be run at any point; don't touch it (hence + // the use of move semantics) + } + + template + KOKKOS_INLINE_FUNCTION + void + handle_failed_ready_queue_insertion( + RunnableTaskBase&& task, + ReadyQueueType& ready_queue, + TeamSchedulerInfo const& info + ) { + Kokkos::abort("Unhandled failure of ready task queue insertion!\n"); + } + + // This isn't actually generic; the template parameters are just to keep + // Derived from having to be complete + template + KOKKOS_FUNCTION + void + schedule_aggregate( + AggregateTask&& aggregate, + TeamSchedulerInfo const& info + ) + { + // Because the aggregate is being scheduled, should not be in any queue + KOKKOS_EXPECTS(not aggregate.is_enqueued()); + + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + using team_scheduler_info_type = typename Derived::team_scheduler_info_type; + static_assert( + std::is_same::value, + "SchedulingInfo type mismatch!" + ); + + bool incomplete_dependence_found = false; + + for(auto*& predecessor_ptr_ref : aggregate) { + + // if a previous scheduling operation hasn't already set the predecessor + // to nullptr, try to enqueue the aggregate into the predecessorendence's waiting + // queue + if(predecessor_ptr_ref != nullptr) { + + // Swap the pointer onto the stack and set the one in the aggregate VLA + // to nullptr before we try to add it to the waiting queue so that some + // other thread doesn't also get to here and find the pointer to be + // not null (since as soon as we try and schedule the aggregate, we + // potentially lose exclusive access to it if that enqueueing operation + // succeeds. The swap doesn't need to happen atomically since we have + // exclusive access to aggregate until an insertion succeeds + auto* predecessor_ptr = std::move(predecessor_ptr_ref); + + // TODO @tasking @memory_order DSH I think this needs to be a store release so that it doesn't get reordered after the queue insertion + predecessor_ptr_ref = nullptr; + + // TODO @tasking @memory_order DSH remove this fence in favor of memory orders + Kokkos::memory_fence(); + + // If adding the aggregate to the waiting queue succeeds, the predecessor is not + // complete + bool pred_not_ready = predecessor_ptr->try_add_waiting(aggregate); + + // NOTE! At this point it is unsafe to access aggregate (unless the + // enqueueing failed, so we can't use move semantics to expire it) + + // we found an incomplete dependence, so we can't make task's successors + // ready yet + incomplete_dependence_found = pred_not_ready; + + if(not pred_not_ready) { + // A predecessor was done, and we didn't enqueue the aggregate + // Update the aggregate's scheduling info (we still have exclusive + // access to it here) + if(predecessor_ptr->is_runnable()) { + _self().update_scheduling_info_from_completed_predecessor( + aggregate, predecessor_ptr->as_runnable_task() + ); + } + else { + KOKKOS_ASSERT(predecessor_ptr->is_aggregate()); + _self().update_scheduling_info_from_completed_predecessor( + aggregate, (*predecessor_ptr).template as_aggregate() + ); + } + } + + // the reference count for the predecessor was incremented when we put + // it into the predecessor list, so decrement it here + bool should_delete = predecessor_ptr->decrement_and_check_reference_count(); + if(should_delete) { + // TODO @tasking @cleanup DSH better encapsulation of this! + _self().deallocate(std::move(*predecessor_ptr)); + } + + // Stop the loop if we found an incomplete dependence + if(incomplete_dependence_found) break; + } + } + + // NOTE: it's not safe to access aggregate any more if an incomplete dependence + // was found, because some other thread could have already popped it off + // of another waiting queue + + if(not incomplete_dependence_found) { + // all of the predecessors were completed, so we can complete `task` + _self().complete(std::move(aggregate), info); + } + // Note!! task may have been deleted at this point, so don't add anything here! + } + + // Provide a sensible default that can be overridden + template + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + RunnableTaskBase& ready_task, + RunnableTaskBase const& predecessor + ) const + { + // by default, tell a ready task to use the scheduling info of its most + // recent predecessor + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + ready_task.template scheduling_info_as() = + predecessor.template scheduling_info_as(); + } + + // Provide a sensible default that can be overridden + template + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + AggregateTask& aggregate, + RunnableTaskBase const& predecessor + ) const + { + // by default, tell a ready task to use the scheduling info of its most + // recent predecessor + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + aggregate.scheduling_info() = + predecessor.template scheduling_info_as(); + } + + // Provide a sensible default that can be overridden + template + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + AggregateTask& aggregate, + AggregateTask const& predecessor + ) const + { + // by default, tell a ready task to use the scheduling info of its most + // recent predecessor + aggregate.scheduling_info() = predecessor.scheduling_info(); + } + + // Provide a sensible default that can be overridden + template + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + RunnableTaskBase& ready_task, + AggregateTask const& predecessor + ) const + { + // by default, tell a ready task to use the scheduling info of its most + // recent predecessor + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + ready_task.template scheduling_info_as() = + predecessor.scheduling_info(); + } + + template + KOKKOS_INLINE_FUNCTION + void initialize_scheduling_info_from_predecessor( + TaskNode& task, + TaskNode& predecessor + ) const + { + /* do nothing by default */ + } + + template + KOKKOS_INLINE_FUNCTION + void initialize_scheduling_info_from_team_scheduler_info( + TaskNode& task, + TeamSchedulerInfo const& info + ) const + { + /* do nothing by default */ + } + + template < + class ExecutionSpace, + class MemorySpace, + class MemoryPool + > + static /* KOKKOS_CONSTEXPR_14 */ size_t + task_queue_allocation_size( + ExecutionSpace const&, + MemorySpace const&, + MemoryPool const& + ) + // requires Same + // && Same + // && Same + { + static_assert( + std::is_same::value + && std::is_same::value + && std::is_same::value, + "Type mismatch in task_queue_allocation_size customization point" + ); + + return sizeof(Derived); + } + + // end Scheduling }}}2 + //---------------------------------------------------------------------------- + +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUECOMMON_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp new file mode 100644 index 0000000000..c3ed1d6c71 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp @@ -0,0 +1,251 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP +#define KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template < + class ExecSpace, + class MemorySpace, + class MemoryPool = Kokkos::MemoryPool> +> +class TaskQueueMemoryManager + : public TaskQueueBase +{ +public: + + using execution_space = ExecSpace; + using memory_space = MemorySpace; + using device_type = Kokkos::Device; + using memory_pool = MemoryPool; + using allocation_size_type = size_t; + +private: + + memory_pool m_pool; + // TODO @tasking @generalization DSH re-enable this with a flag in the type + //long m_accum_alloc = 0; + int m_count_alloc = 0; + int m_max_alloc = 0; + + struct _allocation_result { + bool success; + void* pointer; + }; + + KOKKOS_INLINE_FUNCTION + _allocation_result + _do_pool_allocate(allocation_size_type requested_size) { + // KOKKOS_EXPECTS(requested_size >= 0); generates a warning when allocation_size_type is unsigned + if(requested_size == 0 ) { + return { true, nullptr }; + } + else { + void* data = m_pool.allocate(static_cast(requested_size)); + + //Kokkos::atomic_increment(&m_accum_alloc); // memory_order_relaxed + Kokkos::atomic_increment(&m_count_alloc); // memory_order_relaxed + // TODO @tasking @minor DSH make this thread safe? (otherwise, it's just an approximation, which is probably fine...) + if(m_max_alloc < m_count_alloc) m_max_alloc = m_count_alloc; + + return { data != nullptr, data }; + } + } + + template + KOKKOS_INLINE_FUNCTION + T* + _do_contruct(void* allocated, allocation_size_type allocated_size, Args&&... args) { + + static_assert( + std::is_base_of, T>::value, + "TaskQueueMemoryManager can only allocate objects with PoolAllocatedObjectBase base class" + ); + + // TODO @tasking DSH figure out why this isn't working + //static_assert( + // std::is_constructible::value, + // "TaskQueueMemoryManager can't construct object of the requested type from the " + // " allocation size and the given arguments" + //); + + + auto rv = new (allocated) T( + std::forward(args)..., + allocated_size + ); + + // It feels like there should be a way to check this at compile-time + KOKKOS_ASSERT( + (intptr_t)(rv) == (intptr_t)(static_cast*>(rv)) + && "PoolAllocatedObjectBase must be the first base class of the allocated type" + ); + + return rv; + + } + + +public: + + explicit + TaskQueueMemoryManager(memory_pool const& pool) + : m_pool(pool) + { } + + + template + KOKKOS_FUNCTION + T* + allocate_and_construct(Args&&... args) + // requires + // std::is_base_of_v, T> + // && std::is_constructible_v + { + constexpr auto allocation_size = sizeof(T); + + + auto result = _do_pool_allocate(allocation_size); + + KOKKOS_ASSERT(result.success && "Memory allocation failure"); + + auto rv = _do_contruct(result.pointer, allocation_size, std::forward(args)...); + + KOKKOS_ENSURES(intptr_t(rv) % alignof(T) == 0 && "alignment not preserved!"); + + return rv; + } + + template + KOKKOS_INLINE_FUNCTION + T* + allocate_and_construct_with_vla_emulation( + allocation_size_type n_vla_entries, + Args&&... args + ) + // requires + // std::is_base_of_v, T> + // && std::is_base_of, T>::value + // && std::is_constructible_v + { + + + static_assert( + std::is_base_of, T>::value, + "Can't append emulated variable length array of type with greater alignment than" + " the type to which the VLA is being appended" + ); + + using vla_emulation_base = ObjectWithVLAEmulation; + + auto const allocation_size = vla_emulation_base::required_allocation_size(n_vla_entries); + auto result = _do_pool_allocate(allocation_size); + + KOKKOS_ASSERT(result.success && "Memory allocation failure"); + + auto rv = _do_contruct(result.pointer, allocation_size, std::forward(args)...); + + KOKKOS_ENSURES(intptr_t(rv) % alignof(T) == 0); + + return rv; + } + + template + KOKKOS_INLINE_FUNCTION + void deallocate(PoolAllocatedObjectBase&& obj) + { + m_pool.deallocate((void*)&obj, 1); + Kokkos::atomic_decrement(&m_count_alloc); // memory_order_relaxed + } + + KOKKOS_INLINE_FUNCTION + memory_pool& get_memory_pool() { return m_pool; } + KOKKOS_INLINE_FUNCTION + memory_pool const& get_memory_pool() const { return m_pool; } + + KOKKOS_INLINE_FUNCTION + int allocation_count() const noexcept { return m_count_alloc; } +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//////////////////////////////////////////////////////////////////////////////// +// END OLD CODE +//////////////////////////////////////////////////////////////////////////////// + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp new file mode 100644 index 0000000000..17c357ff31 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp @@ -0,0 +1,286 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP +#define KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename ExecSpace, typename MemorySpace = typename ExecSpace::memory_space > +class LeagueQueueCollection; + +template +class TaskQueueMultiple : public TaskQueue { +private: + + using base_t = TaskQueue; + using queue_collection_t = LeagueQueueCollection; + + int m_league_rank = static_cast(KOKKOS_INVALID_INDEX); + + // This pointer is owning only if m_league_rank == 0 + queue_collection_t* m_other_queues = nullptr; + + +public: + + struct Destroy { + TaskQueueMultiple* m_queue ; + void destroy_shared_allocation(); + }; + + + using team_queue_type = TaskQueueMultiple; + + TaskQueueMultiple( + int arg_league_rank, + queue_collection_t* arg_other_queues, + typename base_t::memory_pool const& arg_memory_pool + ) + : base_t(arg_memory_pool), + m_league_rank(arg_league_rank), + m_other_queues(arg_other_queues) + { } + + explicit TaskQueueMultiple( + typename base_t::memory_pool const& arg_memory_pool + ) + : base_t(arg_memory_pool), + m_league_rank(0) + { + void* other_queues_buffer = typename base_t::memory_space{}.allocate(sizeof(queue_collection_t)); + m_other_queues = new(other_queues_buffer) queue_collection_t(this); + } + + ~TaskQueueMultiple() { + if(m_league_rank == 0 && m_other_queues != nullptr) { + m_other_queues->~queue_collection_t(); + typename base_t::memory_space{}.deallocate(m_other_queues, sizeof(queue_collection_t)); + } + // rest of destruction is handled in the base class + } + + //---------------------------------------- + + void initialize_team_queues(int arg_league_size) const noexcept { + m_other_queues->initialize_team_queues(arg_league_size, this->m_memory); + } + + KOKKOS_INLINE_FUNCTION + team_queue_type& get_team_queue(int arg_league_rank) noexcept { + if(arg_league_rank == m_league_rank) return *this; + else return m_other_queues->get_team_queue(arg_league_rank); + } + + KOKKOS_INLINE_FUNCTION + typename base_t::task_root_type* + attempt_to_steal_task() noexcept { + TaskBase* rv = nullptr; + auto* const end_tag = reinterpret_cast(TaskBase::EndTag); + + if (m_other_queues == nullptr) { + Kokkos::abort("attempted to steal task before queues were initialized!"); + } + + // Loop by priority and then type, and then team + for ( int i = 0 ; i < base_t::NumQueue; ++i ) { + for ( int j = 0 ; j < 2; ++j ) { + // for now, always start by trying to steal from team zero + for(int iteam = 0; iteam < m_other_queues->size(); ++iteam) { + if(iteam == m_league_rank) continue; + auto& steal_from = get_team_queue(iteam); + if( *((volatile int *) & steal_from.m_ready_count) > 0 ) { + // we've found at least one queue that's not done, so even if we can't + // pop something off of it we shouldn't return a nullptr indicating + // completion. rv will be end_tag when the pop fails + rv = base_t::pop_ready_task(&steal_from.m_ready[i][j]); + if(rv != end_tag) { + // task stolen. + // first increment our ready count, then decrement the ready count + // on the other queue: + Kokkos::atomic_increment(&this->m_ready_count); + Kokkos::atomic_decrement(&steal_from.m_ready_count); + return rv; + } + } + } + } + } + + // at this point, rv will only be nullptr if *all* of the queues had an + // m_ready_count of 0. This indicates quiescence. If at least some of them + // had non-zero, there would have been at least one pop_ready_task that + // was called and returned end_tag if it couldn't pop a task + return rv; + } + + +}; + +template +class LeagueQueueCollection { +private: + + using execution_space = ExecSpace; + using memory_space = MemorySpace; + using device_type = Kokkos::Device; + using memory_pool = Kokkos::MemoryPool; + using team_queue_type = TaskQueueMultiple; + using team_scheduler_type = BasicTaskScheduler; + using specialization = TaskQueueSpecialization; + + enum : long { max_num_queues = 6 }; //specialization::max_league_size }; + + // this is a non-owning pointer + team_queue_type* m_rank_zero_queue = nullptr; + // This really needs to be an optional> + union optional_queue { + KOKKOS_INLINE_FUNCTION + optional_queue() : uninitialized(0) { } + KOKKOS_INLINE_FUNCTION + ~optional_queue() { uninitialized = 0; } + char uninitialized; + team_queue_type initialized; + } m_queues[max_num_queues]; + int m_size = static_cast(KOKKOS_INVALID_INDEX); + +public: + + LeagueQueueCollection() = delete; + LeagueQueueCollection(LeagueQueueCollection const&) = delete; + LeagueQueueCollection(LeagueQueueCollection&&) = delete; + LeagueQueueCollection& operator=(LeagueQueueCollection const&) = delete; + LeagueQueueCollection& operator=(LeagueQueueCollection&&) = delete; + + ~LeagueQueueCollection() { + // destroy only the initialized queues that we own + for(int iteam = 0; iteam < m_size - 1; ++iteam) { + m_queues[iteam].initialized.~team_queue_type(); + m_queues[iteam].uninitialized = 0; + } + } + + KOKKOS_INLINE_FUNCTION + explicit LeagueQueueCollection( + team_queue_type* arg_rank_zero_queue + ) : m_rank_zero_queue(arg_rank_zero_queue), + m_size(1) + { } + + void initialize_team_queues( + int arg_count, memory_pool const& arg_memory_pool + ) noexcept + { + arg_count = std::min((int)max_num_queues, arg_count); + //assert(arg_count <= max_num_queues); + if(arg_count > m_size) { + for(int i = m_size; i < arg_count; ++i) { + new(&m_queues[i-1].initialized) team_queue_type(i, this, arg_memory_pool); + } + m_size = arg_count; + } + } + + KOKKOS_INLINE_FUNCTION + constexpr int size() const noexcept { return m_size; } + + KOKKOS_INLINE_FUNCTION + constexpr bool initialized() const noexcept { return m_size != int(KOKKOS_INVALID_INDEX); } + + KOKKOS_INLINE_FUNCTION + team_queue_type& get_team_queue(int iteam) { + iteam %= max_num_queues; + #if !defined(__HCC_ACCELERATOR__) && !defined(__CUDA_ARCH__) + assert(initialized()); + assert(iteam < m_size); + assert(iteam >= 0); + #endif + if(iteam == 0) return *m_rank_zero_queue; + else return m_queues[iteam-1].initialized; + } + +}; + + +} /* namespace Impl */ +} /* namespace Kokkos */ + + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#include + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp new file mode 100644 index 0000000000..81bcc96831 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp @@ -0,0 +1,72 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP +#define KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include + +#define KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING_MULTIPLE 0 + +namespace Kokkos { +namespace Impl { + +template +void TaskQueueMultiple::Destroy::destroy_shared_allocation() { +// KOKKOS WORKAROUND for CUDA 10.1 with GCC 7.3.0 +#if(KOKKOS_COMPILER_CUDA_VERSION==101) && defined(KOKKOS_COMPILER_NVCC) && (KOKKOS_COMPILER_GNU>=730) + (*m_queue).get_team_queue(0).~TaskQueueMultiple(); +#else + m_queue->get_team_queue(0).~TaskQueueMultiple(); +#endif +} + +} /* namespace Impl */ +} /* namespace Kokkos */ + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp index 5bcf672ff6..b5f8db0085 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -41,6 +41,8 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_TASKQUEUE_IMPL_HPP +#define KOKKOS_IMPL_TASKQUEUE_IMPL_HPP #include #if defined( KOKKOS_ENABLE_TASKDAG ) @@ -51,22 +53,22 @@ namespace Impl { //---------------------------------------------------------------------------- -template< typename ExecSpace > -void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation() +template< typename ExecSpace, typename MemorySpace > +void TaskQueue< ExecSpace, MemorySpace >::Destroy::destroy_shared_allocation() { m_queue->~TaskQueue(); } //---------------------------------------------------------------------------- -template< typename ExecSpace > -TaskQueue< ExecSpace >::TaskQueue - ( typename TaskQueue< ExecSpace >::memory_pool const & arg_memory_pool ) +template< typename ExecSpace, typename MemorySpace> +TaskQueue< ExecSpace, MemorySpace>::TaskQueue + ( typename TaskQueue< ExecSpace, MemorySpace>::memory_pool const & arg_memory_pool ) : m_memory( arg_memory_pool ) , m_ready() - , m_accum_alloc(0) - , m_count_alloc(0) - , m_max_alloc(0) + //, m_accum_alloc(0) + //, m_count_alloc(0) + //, m_max_alloc(0) , m_ready_count(0) { for ( int i = 0 ; i < NumQueue ; ++i ) { @@ -77,8 +79,8 @@ TaskQueue< ExecSpace >::TaskQueue //---------------------------------------------------------------------------- -template< typename ExecSpace > -TaskQueue< ExecSpace >::~TaskQueue() +template< typename ExecSpace, typename MemorySpace> +TaskQueue< ExecSpace, MemorySpace>::~TaskQueue() { // Verify that queues are empty and ready count is zero @@ -97,10 +99,10 @@ TaskQueue< ExecSpace >::~TaskQueue() //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::decrement - ( TaskQueue< ExecSpace >::task_root_type * task ) +void TaskQueue< ExecSpace, MemorySpace>::decrement + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * task ) { task_root_type volatile & t = *task ; @@ -121,8 +123,13 @@ void TaskQueue< ExecSpace >::decrement ( t.m_next == (task_root_type *) task_root_type::LockTag ) ) { // Reference count is zero and task is complete, deallocate. - TaskQueue< ExecSpace > * const queue = - static_cast< TaskQueue< ExecSpace > * >( t.m_queue ); + //TaskQueue< ExecSpace, MemorySpace> * const queue = + // static_cast( t.m_scheduler )->m_queue; + auto* const volatile queue = static_cast(t.m_queue); + + // TODO @tasking @minor DSH this should call the destructor for a non-trivially destructible type (possibly just ignore this in the old version, though?) + // (Can't just do this; it needs to be queued since it's device code + // if(task->m_destroy) task->m_destroy(task); queue->deallocate( task , t.m_alloc_size ); } @@ -133,32 +140,32 @@ void TaskQueue< ExecSpace >::decrement //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n ) +size_t TaskQueue< ExecSpace, MemorySpace>::allocate_block_size( size_t n ) { return m_memory.allocate_block_size( n ); } -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void * TaskQueue< ExecSpace >::allocate( size_t n ) +void * TaskQueue< ExecSpace, MemorySpace>::allocate( size_t n ) { void * const p = m_memory.allocate(n); if ( p ) { - Kokkos::atomic_increment( & m_accum_alloc ); + //Kokkos::atomic_increment( & m_accum_alloc ); Kokkos::atomic_increment( & m_count_alloc ); - if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ; + //if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ; } return p ; } -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::deallocate( void * p , size_t n ) +void TaskQueue< ExecSpace, MemorySpace>::deallocate( void * p , size_t n ) { m_memory.deallocate( p , n ); Kokkos::atomic_decrement( & m_count_alloc ); @@ -166,11 +173,11 @@ void TaskQueue< ExecSpace >::deallocate( void * p , size_t n ) //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -bool TaskQueue< ExecSpace >::push_task - ( TaskQueue< ExecSpace >::task_root_type * volatile * const queue - , TaskQueue< ExecSpace >::task_root_type * const task +bool TaskQueue< ExecSpace, MemorySpace>::push_task + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * volatile * const queue + , TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task ) { // Push task into a concurrently pushed and popped queue. @@ -200,20 +207,29 @@ bool TaskQueue< ExecSpace >::push_task Kokkos::abort("TaskQueue::push_task ERROR: already a member of another queue" ); } - task_root_type * y = *queue ; + // store the head of the queue + task_root_type * old_head = *queue ; - while ( lock != y ) { + while ( old_head != lock ) { - next = y ; + // set task->next to the head of the queue + next = old_head; // Do not proceed until 'next' has been stored. Kokkos::memory_fence(); - task_root_type * const x = y ; + // store the old head + task_root_type * const old_head_tmp = old_head; - y = Kokkos::atomic_compare_exchange(queue,y,task); + // attempt to swap task with the old head of the queue + // as if this were done atomically: + // if(*queue == old_head) { + // *queue = task; + // } + // old_head = *queue; + old_head = Kokkos::atomic_compare_exchange(queue, old_head, task); - if ( x == y ) return true ; + if(old_head_tmp == old_head) return true; } // Failed, replace 'task->m_next' value since 'task' remains @@ -229,11 +245,11 @@ bool TaskQueue< ExecSpace >::push_task //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -typename TaskQueue< ExecSpace >::task_root_type * -TaskQueue< ExecSpace >::pop_ready_task - ( TaskQueue< ExecSpace >::task_root_type * volatile * const queue ) +typename TaskQueue< ExecSpace, MemorySpace>::task_root_type * +TaskQueue< ExecSpace, MemorySpace>::pop_ready_task + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * volatile * const queue ) { // Pop task from a concurrently pushed and popped ready task queue. // The queue is a linked list where 'task->m_next' form the links. @@ -280,6 +296,10 @@ TaskQueue< ExecSpace >::pop_ready_task task_root_type * volatile & next = task->m_next ; + // This algorithm is not lockfree because a adversarial scheduler could + // context switch this thread at this point and the rest of the threads + // calling this method would never make forward progress + *queue = next ; next = lock ; Kokkos::memory_fence(); @@ -304,10 +324,10 @@ TaskQueue< ExecSpace >::pop_ready_task //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::schedule_runnable - ( TaskQueue< ExecSpace >::task_root_type * const task ) +void TaskQueue< ExecSpace, MemorySpace>::schedule_runnable + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task ) { // Schedule a runnable task upon construction / spawn // and upon completion of other tasks that 'task' is waiting on. @@ -389,6 +409,8 @@ void TaskQueue< ExecSpace >::schedule_runnable Kokkos::memory_fence(); + // If we don't have a dependency, or if pushing onto the wait queue of that dependency + // failed (since the only time that queue should be locked is when the task is transitioning to complete??!?) const bool is_ready = ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); @@ -431,10 +453,10 @@ void TaskQueue< ExecSpace >::schedule_runnable // from a queue and processed it as appropriate. } -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::schedule_aggregate - ( TaskQueue< ExecSpace >::task_root_type * const task ) +void TaskQueue< ExecSpace, MemorySpace>::schedule_aggregate + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task ) { // Schedule an aggregate task upon construction // and upon completion of other tasks that 'task' is waiting on. @@ -556,9 +578,9 @@ void TaskQueue< ExecSpace >::schedule_aggregate //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::reschedule( task_root_type * task ) +void TaskQueue< ExecSpace, MemorySpace>::reschedule( task_root_type * task ) { // Precondition: // task is in Executing state @@ -578,10 +600,10 @@ void TaskQueue< ExecSpace >::reschedule( task_root_type * task ) //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::complete - ( TaskQueue< ExecSpace >::task_root_type * task ) +void TaskQueue< ExecSpace, MemorySpace>::complete + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * task ) { // Complete a runnable task that has finished executing // or a when_all task when all of its dependeneces are complete. @@ -679,4 +701,5 @@ void TaskQueue< ExecSpace >::complete } /* namespace Kokkos */ #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_IMPL_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp new file mode 100644 index 0000000000..d45ebff00b --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp @@ -0,0 +1,151 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKRESULT_HPP +#define KOKKOS_IMPL_TASKRESULT_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include +#include + +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename ResultType > +struct TaskResult { + + enum : int32_t { size = sizeof(ResultType) }; + + using reference_type = ResultType & ; + + template + KOKKOS_INLINE_FUNCTION static + ResultType * ptr( PoolAllocatedObjectBase* task ) + { + return reinterpret_cast< ResultType * > + ( reinterpret_cast< char * >(task) + task->get_allocation_size() - sizeof(ResultType) ); + } + + KOKKOS_INLINE_FUNCTION static + ResultType * ptr( TaskBase* task ) + { + return reinterpret_cast< ResultType * > + ( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) ); + } + + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskBase* task ) + { return *ptr( task ); } + + template + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskNode* task ) + { return *ptr( task ); } + + KOKKOS_INLINE_FUNCTION static + void destroy( TaskBase* task ) + { get(task).~ResultType(); } + + + //template + //KOKKOS_INLINE_FUNCTION static + //void destroy( TaskNode* task ) + //{ get(task).~ResultType(); } +}; + +template<> +struct TaskResult< void > { + + enum : int32_t { size = 0 }; + + using reference_type = void ; + + template + KOKKOS_INLINE_FUNCTION static + void* ptr( TaskNode* task ) + { return nullptr; } + + KOKKOS_INLINE_FUNCTION static + void * ptr( TaskBase* ) { return (void*) nullptr ; } + + template + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskNode* task ) + { /* Should never be called */ } + + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskBase* ) {} + + KOKKOS_INLINE_FUNCTION static + void destroy( TaskBase* task ) + { } + + //template + //KOKKOS_INLINE_FUNCTION static + //void destroy( TaskNode* task ) + //{ } +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKRESULT_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp new file mode 100644 index 0000000000..4bf3f4fa94 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp @@ -0,0 +1,135 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TASKTEAMMEMBER_HPP +#define KOKKOS_TASKTEAMMEMBER_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include +//---------------------------------------------------------------------------- + +#include +#include + +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +class TaskTeamMemberAdapter : public TeamMember { +private: + + Scheduler m_scheduler; + +public: + + //---------------------------------------- + + // Forward everything but the Scheduler to the constructor of the TeamMember + // type that we're adapting + template + KOKKOS_INLINE_FUNCTION + explicit TaskTeamMemberAdapter( + typename std::enable_if< + std::is_constructible::value, + Scheduler + >::type arg_scheduler, + Args&&... args + ) // TODO @tasking @minor DSH noexcept specification + : TeamMember(std::forward(args)...), + m_scheduler(std::move(arg_scheduler).get_team_scheduler(this->league_rank())) + { } + + // (rule of 6 constructors) + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter() = default; + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter(TaskTeamMemberAdapter const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter(TaskTeamMemberAdapter&&) = default; + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter&&) = default; + + KOKKOS_INLINE_FUNCTION ~TaskTeamMemberAdapter() = default; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + Scheduler const& scheduler() const noexcept { return m_scheduler; } + + KOKKOS_INLINE_FUNCTION + Scheduler& scheduler() noexcept { return m_scheduler; } + + //---------------------------------------- + +}; + +} // end namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_TASKTEAMMEMBER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp index 475a696719..a5af82838f 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp @@ -483,6 +483,54 @@ struct is_integral_constant< integral_constant > : public true_ enum { integral_value = v }; }; +//---------------------------------------------------------------------------- + +template +class TypeList; + +//---------------------------------------------------------------------------- + +template +struct ReverseTypeList; + +template +struct ReverseTypeList> { + template + struct impl { + using type = typename ReverseTypeList>::template impl::type; + }; + using type = typename impl<>::type; +}; + +template <> +struct ReverseTypeList> { + template + struct impl { + using type = TypeList; + }; + using type = TypeList<>; +}; + +//---------------------------------------------------------------------------- + +template +struct make_all_extents_into_pointers +{ + using type = T; +}; + +template +struct make_all_extents_into_pointers +{ + using type = typename make_all_extents_into_pointers::type*; +}; + +template +struct make_all_extents_into_pointers +{ + using type = typename make_all_extents_into_pointers::type*; +}; + } // namespace Impl } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp b/lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp new file mode 100644 index 0000000000..48e1851e60 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp @@ -0,0 +1,295 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_VLAEMULATION_HPP +#define KOKKOS_IMPL_VLAEMULATION_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include + +#include // KOKKOS_EXPECTS + +#include // std::is_abstract<>, ... + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template < + class Derived, + class VLAValueType, + class EntryCountType = int32_t +> +struct ObjectWithVLAEmulation; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +/** @brief Attorney to enable private CRTP inheritance from ObjectWithVLAEmulation + */ +struct VLAEmulationAccess { +private: + + template + friend struct ObjectWithVLAEmulation; + + template + KOKKOS_FORCEINLINE_FUNCTION + static constexpr Derived* + _cast_to_derived(ObjectWithVLAEmulation* base) noexcept + { + return static_cast(base); + } + + template + KOKKOS_FORCEINLINE_FUNCTION + static constexpr Derived const* + _cast_to_derived(ObjectWithVLAEmulation const* base) noexcept + { + return static_cast(base); + } + +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +/** \brief A CRTP base class for a type that includes a variable-length array by allocation + * + * The storage for the derived type must be allocated manually and the objects + * (both derived type and VLA objects) must be constructed with placement new. + * Obviously, this can't be done for objects on the stack. + * + * Note: Though most uses of this currently delete the copy and move constructor + * in the `Derived` type, this type is intended to have value semantics. + * + * \todo @documentation elaborate on implications of value semantics for this class template + * + */ +template < + class Derived, + class VLAValueType, + class EntryCountType /* = int32_t */ +> +struct ObjectWithVLAEmulation { +public: + + using object_type = Derived; + using vla_value_type = VLAValueType; + using vla_entry_count_type = EntryCountType; + + using iterator = VLAValueType*; + using const_iterator = typename std::add_const::type*; + + + // TODO @tasking @minor DSH require that Derived be marked final? (note that std::is_final is C++14) + // TODO @tasking @minor DSH delete non-placement operator new for Derived type? + +private: + + vla_entry_count_type m_num_entries; + + // CRTP boilerplate + + KOKKOS_FORCEINLINE_FUNCTION + /* KOKKOS_CONSTEXPR_14 */ + Derived* _this() noexcept { return VLAEmulationAccess::_cast_to_derived(this); } + + KOKKOS_FORCEINLINE_FUNCTION + /* KOKKOS_CONSTEXPR_14 */ + Derived const* _this() const noexcept { return VLAEmulationAccess::_cast_to_derived(this); } + + // Note: can't be constexpr because of reinterpret_cast + KOKKOS_FORCEINLINE_FUNCTION + /* KOKKOS_CONSTEXPR_14 */ + vla_value_type* _vla_pointer() noexcept { + // The data starts right after the aligned storage of Derived + return reinterpret_cast(_this() + 1); + } + + // Note: can't be constexpr because of reinterpret_cast + KOKKOS_FORCEINLINE_FUNCTION + /* KOKKOS_CONSTEXPR_14 */ + vla_value_type const* _vla_pointer() const noexcept { + // The data starts right after the aligned storage of Derived + return reinterpret_cast(_this() + 1); + } + +public: + + KOKKOS_INLINE_FUNCTION + static /* KOKKOS_CONSTEXPR_14 */ size_t + required_allocation_size(vla_entry_count_type num_vla_entries) { + KOKKOS_EXPECTS(num_vla_entries >= 0); + return sizeof(Derived) + num_vla_entries * sizeof(VLAValueType); + } + + //---------------------------------------------------------------------------- + // {{{2 + + // TODO @tasking @optimization DSH specialization for trivially constructible VLAValueType? + // TODO @tasking @minor DSH SFINAE-out this constructor for non-default contructible vla_value_types + KOKKOS_INLINE_FUNCTION + explicit + ObjectWithVLAEmulation(vla_entry_count_type num_entries) + noexcept(noexcept(vla_value_type())) + : m_num_entries(num_entries) + { + // Note: We can't do this at class scope because it unnecessarily requires + // object_type to be a complete type + static_assert( + alignof(object_type) >= alignof(vla_value_type), + "Can't append emulated variable length array of type with greater alignment than" + " the type to which the VLA is being appended" + ); + + // Note: We can't do this at class scope because it unnecessarily requires + // vla_value_type to be a complete type + static_assert( + not std::is_abstract::value, + "Can't use abstract type with VLA emulation" + ); + + KOKKOS_EXPECTS(num_entries >= 0); + for(vla_entry_count_type i = 0; i < m_num_entries; ++i) { + new (_vla_pointer() + i) vla_value_type(); + } + } + + KOKKOS_INLINE_FUNCTION + ~ObjectWithVLAEmulation() + noexcept(noexcept(std::declval().~vla_value_type())) + { + for(auto&& value : *this) { value.~vla_value_type(); } + } + + // TODO @tasking @new_feature DSH constrained analogs for move and copy ctors and assignment ops + // TODO @tasking @new_feature DSH forwarding in_place constructor + // TODO @tasking @new_feature DSH initializer_list constructor? + + // end Constructors, destructor, and assignment }}}2 + //---------------------------------------------------------------------------- + + + KOKKOS_INLINE_FUNCTION + constexpr EntryCountType n_vla_entries() const noexcept { return m_num_entries; } + + + //---------------------------------------------------------------------------- + // {{{2 + + KOKKOS_INLINE_FUNCTION + object_type& object() & { return static_cast(*this); } + + KOKKOS_INLINE_FUNCTION + object_type const& object() const & { return static_cast(*this); } + + KOKKOS_INLINE_FUNCTION + object_type&& object() && { return static_cast(*this); } + + + KOKKOS_INLINE_FUNCTION + vla_value_type& vla_value_at(vla_entry_count_type n) & + { + KOKKOS_EXPECTS(n < n_vla_entries()); + return _vla_pointer()[n]; + } + + KOKKOS_INLINE_FUNCTION + vla_value_type const& vla_value_at(vla_entry_count_type n) const & + { + KOKKOS_EXPECTS(n < n_vla_entries()); + return _vla_pointer()[n]; + } + + KOKKOS_INLINE_FUNCTION + vla_value_type& vla_value_at(vla_entry_count_type n) && + { + KOKKOS_EXPECTS(n < n_vla_entries()); + return _vla_pointer()[n]; + } + + // end Accessing the object and the VLA values }}}2 + //---------------------------------------------------------------------------- + + + //---------------------------------------------------------------------------- + // {{{2 + + KOKKOS_INLINE_FUNCTION + iterator begin() noexcept { return _vla_pointer(); } + + KOKKOS_INLINE_FUNCTION + const_iterator begin() const noexcept { return _vla_pointer(); } + + KOKKOS_INLINE_FUNCTION + const_iterator cbegin() noexcept { return _vla_pointer(); } + + KOKKOS_INLINE_FUNCTION + iterator end() noexcept { return _vla_pointer() + m_num_entries; } + + KOKKOS_INLINE_FUNCTION + const_iterator end() const noexcept { return _vla_pointer() + m_num_entries; } + + KOKKOS_INLINE_FUNCTION + const_iterator cend() noexcept { return _vla_pointer() + m_num_entries; } + + // end Iterators }}}2 + //---------------------------------------------------------------------------- + +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_VLAEMULATION_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp index e1539d10b0..07774da279 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp @@ -367,6 +367,8 @@ public: // Can only convert to View::array_type + enum { is_assignable_data_type = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value && + (DstTraits::rank==SrcTraits::rank+1)}; enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value && std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value }; diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index 773f336281..b2d8dea20a 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -275,7 +276,7 @@ struct ALL_t { constexpr const ALL_t & operator()() const { return *this ; } KOKKOS_INLINE_FUNCTION - constexpr bool operator == ( const ALL_t & right) const { return true;} + constexpr bool operator == ( const ALL_t & ) const { return true;} }; }} // namespace Kokkos::Impl @@ -1548,7 +1549,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight template< class DimRHS > KOKKOS_INLINE_FUNCTION constexpr ViewOffset - ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs + ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub ) : m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 ) @@ -2319,7 +2320,7 @@ struct ViewDataHandle< Traits , && std::is_same< typename Traits::specialize , void >::value && - Traits::memory_traits::Atomic + Traits::memory_traits::is_atomic )>::type > { typedef typename Traits::value_type value_type ; @@ -2348,16 +2349,16 @@ struct ViewDataHandle< Traits , typename std::enable_if<( std::is_same< typename Traits::specialize , void >::value && - (!Traits::memory_traits::Aligned) + (!Traits::memory_traits::is_aligned) && - Traits::memory_traits::Restrict + Traits::memory_traits::is_restrict #ifdef KOKKOS_ENABLE_CUDA && (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) #endif && - (!Traits::memory_traits::Atomic) + (!Traits::memory_traits::is_atomic) )>::type > { typedef typename Traits::value_type value_type ; @@ -2366,17 +2367,17 @@ struct ViewDataHandle< Traits , typedef Kokkos::Impl::SharedAllocationTracker track_type ; KOKKOS_INLINE_FUNCTION - static handle_type assign( value_type * arg_data_ptr + static value_type* assign( value_type * arg_data_ptr , track_type const & /*arg_tracker*/ ) { - return handle_type( arg_data_ptr ); + return (value_type*)( arg_data_ptr ); } KOKKOS_INLINE_FUNCTION - static handle_type assign( handle_type const arg_data_ptr + static value_type* assign( handle_type const arg_data_ptr , size_t offset ) { - return handle_type( arg_data_ptr + offset ); + return (value_type*)( arg_data_ptr + offset ); } }; @@ -2385,16 +2386,16 @@ struct ViewDataHandle< Traits , typename std::enable_if<( std::is_same< typename Traits::specialize , void >::value && - Traits::memory_traits::Aligned + Traits::memory_traits::is_aligned && - (!Traits::memory_traits::Restrict) + (!Traits::memory_traits::is_restrict) #ifdef KOKKOS_ENABLE_CUDA && (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) #endif && - (!Traits::memory_traits::Atomic) + (!Traits::memory_traits::is_atomic) )>::type > { typedef typename Traits::value_type value_type ; @@ -2428,16 +2429,16 @@ struct ViewDataHandle< Traits , typename std::enable_if<( std::is_same< typename Traits::specialize , void >::value && - Traits::memory_traits::Aligned + Traits::memory_traits::is_aligned && - Traits::memory_traits::Restrict + Traits::memory_traits::is_restrict #ifdef KOKKOS_ENABLE_CUDA && (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) #endif && - (!Traits::memory_traits::Atomic) + (!Traits::memory_traits::is_atomic) )>::type > { typedef typename Traits::value_type value_type ; @@ -2446,23 +2447,23 @@ struct ViewDataHandle< Traits , typedef Kokkos::Impl::SharedAllocationTracker track_type ; KOKKOS_INLINE_FUNCTION - static handle_type assign( value_type * arg_data_ptr + static value_type* assign( value_type * arg_data_ptr , track_type const & /*arg_tracker*/ ) { if ( reinterpret_cast(arg_data_ptr) % Impl::MEMORY_ALIGNMENT ) { Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute"); } - return handle_type( arg_data_ptr ); + return (value_type*)( arg_data_ptr ); } KOKKOS_INLINE_FUNCTION - static handle_type assign( handle_type const arg_data_ptr + static value_type* assign( handle_type const arg_data_ptr , size_t offset ) { if ( reinterpret_cast(arg_data_ptr+offset) % Impl::MEMORY_ALIGNMENT ) { Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute"); } - return handle_type( arg_data_ptr + offset ); + return (value_type*)( arg_data_ptr + offset ); } }; }} // namespace Kokkos::Impl @@ -2955,7 +2956,8 @@ private: }; public: - + enum { is_assignable_data_type = is_assignable_value_type && + is_assignable_dimension }; enum { is_assignable = is_assignable_space && is_assignable_value_type && is_assignable_dimension && @@ -3052,7 +3054,8 @@ private: , typename SrcTraits::dimension >::value }; public: - + enum { is_assignable_data_type = is_assignable_value_type && + is_assignable_dimension }; enum { is_assignable = is_assignable_space && is_assignable_value_type && is_assignable_dimension }; @@ -3062,7 +3065,7 @@ public: typedef ViewMapping< SrcTraits , void > SrcType ; KOKKOS_INLINE_FUNCTION - static bool assignable_layout_check(DstType & dst, const SrcType & src) //Runtime check + static bool assignable_layout_check(DstType &, const SrcType & src) //Runtime check { size_t strides[9]; bool assignable = true; @@ -3134,6 +3137,73 @@ public: // Subview mapping. // Deduce destination view type from source view traits and subview arguments +template +struct SubViewDataTypeImpl; + +/* base case */ +template +struct SubViewDataTypeImpl< + void, + ValueType, + Experimental::Extents<> +> +{ using type = ValueType; }; + +/* for integral args, subview doesn't have that dimension */ +template +struct SubViewDataTypeImpl< + typename std::enable_if::type>::value>::type, + ValueType, + Experimental::Extents, + Integral, Args... +> : SubViewDataTypeImpl< + void, ValueType, + Experimental::Extents, + Args... + > +{ }; + + +/* for ALL slice, subview has the same dimension */ +template +struct SubViewDataTypeImpl< + void, + ValueType, + Experimental::Extents, + ALL_t, Args... +> : SubViewDataTypeImpl< + void, typename ApplyExtent::type, + Experimental::Extents, + Args... + > +{ }; + + +/* for pair-style slice, subview has dynamic dimension, since pair doesn't give static sizes */ +/* Since we don't allow interleaving of dynamic and static extents, make all of the dimensions to the left dynamic */ +template +struct SubViewDataTypeImpl< + typename std::enable_if::value>::type, + ValueType, + Experimental::Extents, + PairLike, Args... +> : SubViewDataTypeImpl< + void, typename make_all_extents_into_pointers::type*, + Experimental::Extents, + Args... + > +{ }; + + +template +struct SubViewDataType + : SubViewDataTypeImpl< + void, ValueType, Exts, Args... + > +{ }; + +//---------------------------------------------------------------------------- + template< class SrcTraits , class ... Args > struct ViewMapping < typename std::enable_if<( @@ -3201,17 +3271,25 @@ private: typedef typename SrcTraits::value_type value_type ; - typedef typename std::conditional< rank == 0 , value_type , - typename std::conditional< rank == 1 , value_type * , - typename std::conditional< rank == 2 , value_type ** , - typename std::conditional< rank == 3 , value_type *** , - typename std::conditional< rank == 4 , value_type **** , - typename std::conditional< rank == 5 , value_type ***** , - typename std::conditional< rank == 6 , value_type ****** , - typename std::conditional< rank == 7 , value_type ******* , - value_type ******** - >::type >::type >::type >::type >::type >::type >::type >::type - data_type ; + using data_type = + typename SubViewDataType< + value_type, + typename Kokkos::Impl::ParseViewExtents< + typename SrcTraits::data_type + >::type, + Args... + >::type; + //typedef typename std::conditional< rank == 0 , value_type , + // typename std::conditional< rank == 1 , value_type * , + // typename std::conditional< rank == 2 , value_type ** , + // typename std::conditional< rank == 3 , value_type *** , + // typename std::conditional< rank == 4 , value_type **** , + // typename std::conditional< rank == 5 , value_type ***** , + // typename std::conditional< rank == 6 , value_type ****** , + // typename std::conditional< rank == 7 , value_type ******* , + // value_type ******** + // >::type >::type >::type >::type >::type >::type >::type >::type + // data_type ; public: diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp index 716b9ceca5..a8645db451 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp @@ -50,6 +50,9 @@ namespace Kokkos { namespace Impl { +// =========================================================================== +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + // View mapping for rank two tiled array template< class L > @@ -208,11 +211,17 @@ struct ViewMapping } }; +#endif // KOKKOS_ENABLE_DEPRECATED_CODE +// =============================================================================== + } /* namespace Impl */ } /* namespace Kokkos */ namespace Kokkos { +// ============================================================================== +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + template< typename T , unsigned N0 , unsigned N1 , class ... P > KOKKOS_INLINE_FUNCTION Kokkos::View< T[N0][N1] , LayoutLeft , P... > @@ -229,6 +238,9 @@ tile_subview( const Kokkos::View,P...> & ( src , SrcLayout() , i_tile0 , i_tile1 ); } +#endif // KOKKOS_ENABLE_DEPRECATED_CODE +// =============================================================================== + } /* namespace Kokkos */ //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index fad4e1d45e..6a480daa8d 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -11,6 +11,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) ENDIF() SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) +# TODO get the C++ standard flag from KOKKOS_CXX_STANDARD SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) @@ -21,6 +22,17 @@ TRIBITS_ADD_LIBRARY( TESTONLY ) +IF(NOT KOKKOS_HAS_TRILINOS) +target_compile_options( + kokkos_gtest + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> +) +target_link_libraries( + kokkos_gtest + PUBLIC ${KOKKOS_LD_FLAGS} +) +ENDIF() + # # Define the tests # @@ -29,69 +41,212 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) IF(Kokkos_ENABLE_Serial) - TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_Serial - SOURCES - UnitTestMainInit.cpp - serial/TestSerial_AtomicOperations_int.cpp - serial/TestSerial_AtomicOperations_unsignedint.cpp - serial/TestSerial_AtomicOperations_longint.cpp - serial/TestSerial_AtomicOperations_unsignedlongint.cpp - serial/TestSerial_AtomicOperations_longlongint.cpp - serial/TestSerial_AtomicOperations_double.cpp - serial/TestSerial_AtomicOperations_float.cpp - serial/TestSerial_AtomicViews.cpp - serial/TestSerial_Atomics.cpp - serial/TestSerial_Complex.cpp - serial/TestSerial_Init.cpp - serial/TestSerial_MDRange_a.cpp - serial/TestSerial_MDRange_b.cpp - serial/TestSerial_MDRange_c.cpp - serial/TestSerial_MDRange_d.cpp - serial/TestSerial_MDRange_e.cpp - serial/TestSerial_Other.cpp - serial/TestSerial_RangePolicy.cpp - serial/TestSerial_Reductions.cpp - serial/TestSerial_Reducers_a.cpp - serial/TestSerial_Reducers_b.cpp - serial/TestSerial_Reducers_c.cpp - serial/TestSerial_Reducers_d.cpp - serial/TestSerial_Scan.cpp - serial/TestSerial_SharedAlloc.cpp - serial/TestSerial_SubView_a.cpp - serial/TestSerial_SubView_b.cpp - serial/TestSerial_SubView_c01.cpp - serial/TestSerial_SubView_c02.cpp - serial/TestSerial_SubView_c03.cpp - serial/TestSerial_SubView_c04.cpp - serial/TestSerial_SubView_c05.cpp - serial/TestSerial_SubView_c06.cpp - serial/TestSerial_SubView_c07.cpp - serial/TestSerial_SubView_c08.cpp - serial/TestSerial_SubView_c09.cpp - serial/TestSerial_SubView_c10.cpp - serial/TestSerial_SubView_c11.cpp - serial/TestSerial_SubView_c12.cpp - serial/TestSerial_SubView_c13.cpp - serial/TestSerial_Team.cpp - serial/TestSerial_TeamReductionScan.cpp - serial/TestSerial_TeamScratch.cpp - serial/TestSerial_ViewAPI_a.cpp - serial/TestSerial_ViewAPI_b.cpp - serial/TestSerial_ViewAPI_c.cpp - serial/TestSerial_ViewAPI_d.cpp - serial/TestSerial_ViewAPI_e.cpp - serial/TestSerial_ViewMapping_a.cpp - serial/TestSerial_ViewMapping_b.cpp - serial/TestSerial_ViewMapping_subview.cpp - serial/TestSerial_ViewOfClass.cpp - serial/TestSerial_Crs.cpp - serial/TestSerial_WorkGraph.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} - ) + IF(KOKKOS_SEPARATE_TESTS) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Atomics + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_AtomicOperations_int.cpp + serial/TestSerial_AtomicOperations_unsignedint.cpp + serial/TestSerial_AtomicOperations_longint.cpp + serial/TestSerial_AtomicOperations_unsignedlongint.cpp + serial/TestSerial_AtomicOperations_longlongint.cpp + serial/TestSerial_AtomicOperations_double.cpp + serial/TestSerial_AtomicOperations_float.cpp + serial/TestSerial_AtomicOperations_complexdouble.cpp + serial/TestSerial_AtomicOperations_complexfloat.cpp + serial/TestSerial_AtomicViews.cpp + serial/TestSerial_Atomics.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_SubView + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_SubView_a.cpp + serial/TestSerial_SubView_b.cpp + serial/TestSerial_SubView_c01.cpp + serial/TestSerial_SubView_c02.cpp + serial/TestSerial_SubView_c03.cpp + serial/TestSerial_SubView_c04.cpp + serial/TestSerial_SubView_c05.cpp + serial/TestSerial_SubView_c06.cpp + serial/TestSerial_SubView_c07.cpp + serial/TestSerial_SubView_c08.cpp + serial/TestSerial_SubView_c09.cpp + serial/TestSerial_SubView_c10.cpp + serial/TestSerial_SubView_c11.cpp + serial/TestSerial_SubView_c12.cpp + serial/TestSerial_SubView_c13.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_ViewAPI + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_ViewAPI_a.cpp + serial/TestSerial_ViewAPI_b.cpp + serial/TestSerial_ViewAPI_c.cpp + serial/TestSerial_ViewAPI_d.cpp + serial/TestSerial_ViewAPI_e.cpp + serial/TestSerial_ViewOfClass.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_ViewMapping + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_ViewMapping_a.cpp + serial/TestSerial_ViewMapping_b.cpp + serial/TestSerial_ViewMapping_subview.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Reducers + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_Reductions.cpp + serial/TestSerial_Reducers_a.cpp + serial/TestSerial_Reducers_b.cpp + serial/TestSerial_Reducers_c.cpp + serial/TestSerial_Reducers_d.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_MDRange + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_MDRange_a.cpp + serial/TestSerial_MDRange_b.cpp + serial/TestSerial_MDRange_c.cpp + serial/TestSerial_MDRange_d.cpp + serial/TestSerial_MDRange_e.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Team + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_Team.cpp + serial/TestSerial_TeamReductionScan.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Tasking + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_Task.cpp + serial/TestSerial_WorkGraph.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Misc + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_Complex.cpp + serial/TestSerial_Init.cpp + serial/TestSerial_Other.cpp + serial/TestSerial_RangePolicy.cpp + serial/TestSerial_Scan.cpp + serial/TestSerial_SharedAlloc.cpp + serial/TestSerial_Crs.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + ELSE() + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_AtomicOperations_int.cpp + serial/TestSerial_AtomicOperations_unsignedint.cpp + serial/TestSerial_AtomicOperations_longint.cpp + serial/TestSerial_AtomicOperations_unsignedlongint.cpp + serial/TestSerial_AtomicOperations_longlongint.cpp + serial/TestSerial_AtomicOperations_double.cpp + serial/TestSerial_AtomicOperations_float.cpp + serial/TestSerial_AtomicOperations_complexdouble.cpp + serial/TestSerial_AtomicOperations_complexfloat.cpp + serial/TestSerial_AtomicViews.cpp + serial/TestSerial_Atomics.cpp + serial/TestSerial_Complex.cpp + serial/TestSerial_Init.cpp + serial/TestSerial_MDRange_a.cpp + serial/TestSerial_MDRange_b.cpp + serial/TestSerial_MDRange_c.cpp + serial/TestSerial_MDRange_d.cpp + serial/TestSerial_MDRange_e.cpp + serial/TestSerial_Other.cpp + serial/TestSerial_RangePolicy.cpp + serial/TestSerial_Reductions.cpp + serial/TestSerial_Reducers_a.cpp + serial/TestSerial_Reducers_b.cpp + serial/TestSerial_Reducers_c.cpp + serial/TestSerial_Reducers_d.cpp + serial/TestSerial_Scan.cpp + serial/TestSerial_SharedAlloc.cpp + serial/TestSerial_SubView_a.cpp + serial/TestSerial_SubView_b.cpp + serial/TestSerial_SubView_c01.cpp + serial/TestSerial_SubView_c02.cpp + serial/TestSerial_SubView_c03.cpp + serial/TestSerial_SubView_c04.cpp + serial/TestSerial_SubView_c05.cpp + serial/TestSerial_SubView_c06.cpp + serial/TestSerial_SubView_c07.cpp + serial/TestSerial_SubView_c08.cpp + serial/TestSerial_SubView_c09.cpp + serial/TestSerial_SubView_c10.cpp + serial/TestSerial_SubView_c11.cpp + serial/TestSerial_SubView_c12.cpp + serial/TestSerial_SubView_c13.cpp + serial/TestSerial_Task.cpp + serial/TestSerial_Team.cpp + serial/TestSerial_TeamReductionScan.cpp + serial/TestSerial_TeamScratch.cpp + serial/TestSerial_ViewAPI_a.cpp + serial/TestSerial_ViewAPI_b.cpp + serial/TestSerial_ViewAPI_c.cpp + serial/TestSerial_ViewAPI_d.cpp + serial/TestSerial_ViewAPI_e.cpp + serial/TestSerial_ViewMapping_a.cpp + serial/TestSerial_ViewMapping_b.cpp + serial/TestSerial_ViewMapping_subview.cpp + serial/TestSerial_ViewOfClass.cpp + serial/TestSerial_Crs.cpp + serial/TestSerial_WorkGraph.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + ENDIF() ENDIF() IF(Kokkos_ENABLE_Pthread) @@ -106,6 +261,8 @@ IF(Kokkos_ENABLE_Pthread) threads/TestThreads_AtomicOperations_longlongint.cpp threads/TestThreads_AtomicOperations_double.cpp threads/TestThreads_AtomicOperations_float.cpp + threads/TestThreads_AtomicOperations_complexdouble.cpp + threads/TestThreads_AtomicOperations_complexfloat.cpp threads/TestThreads_AtomicViews.cpp threads/TestThreads_Atomics.cpp threads/TestThreads_Complex.cpp @@ -161,75 +318,305 @@ IF(Kokkos_ENABLE_Pthread) ENDIF() IF(Kokkos_ENABLE_OpenMP) + IF(KOKKOS_SEPARATE_TESTS) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Atomics + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_AtomicOperations_int.cpp + openmp/TestOpenMP_AtomicOperations_unsignedint.cpp + openmp/TestOpenMP_AtomicOperations_longint.cpp + openmp/TestOpenMP_AtomicOperations_unsignedlongint.cpp + openmp/TestOpenMP_AtomicOperations_longlongint.cpp + openmp/TestOpenMP_AtomicOperations_double.cpp + openmp/TestOpenMP_AtomicOperations_float.cpp + openmp/TestOpenMP_AtomicOperations_complexdouble.cpp + openmp/TestOpenMP_AtomicOperations_complexfloat.cpp + openmp/TestOpenMP_AtomicViews.cpp + openmp/TestOpenMP_Atomics.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_SubView + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_SubView_a.cpp + openmp/TestOpenMP_SubView_b.cpp + openmp/TestOpenMP_SubView_c01.cpp + openmp/TestOpenMP_SubView_c02.cpp + openmp/TestOpenMP_SubView_c03.cpp + openmp/TestOpenMP_SubView_c04.cpp + openmp/TestOpenMP_SubView_c05.cpp + openmp/TestOpenMP_SubView_c06.cpp + openmp/TestOpenMP_SubView_c07.cpp + openmp/TestOpenMP_SubView_c08.cpp + openmp/TestOpenMP_SubView_c09.cpp + openmp/TestOpenMP_SubView_c10.cpp + openmp/TestOpenMP_SubView_c11.cpp + openmp/TestOpenMP_SubView_c12.cpp + openmp/TestOpenMP_SubView_c13.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_ViewAPI + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_ViewAPI_a.cpp + openmp/TestOpenMP_ViewAPI_b.cpp + openmp/TestOpenMP_ViewAPI_c.cpp + openmp/TestOpenMP_ViewAPI_d.cpp + openmp/TestOpenMP_ViewAPI_e.cpp + openmp/TestOpenMP_ViewOfClass.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_ViewMapping + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_ViewMapping_a.cpp + openmp/TestOpenMP_ViewMapping_b.cpp + openmp/TestOpenMP_ViewMapping_subview.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Reducers + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_Reductions.cpp + openmp/TestOpenMP_Reducers_a.cpp + openmp/TestOpenMP_Reducers_b.cpp + openmp/TestOpenMP_Reducers_c.cpp + openmp/TestOpenMP_Reducers_d.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_MDRange + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_MDRange_a.cpp + openmp/TestOpenMP_MDRange_b.cpp + openmp/TestOpenMP_MDRange_c.cpp + openmp/TestOpenMP_MDRange_d.cpp + openmp/TestOpenMP_MDRange_e.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Team + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_Team.cpp + openmp/TestOpenMP_TeamReductionScan.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Tasking + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_Task.cpp + openmp/TestOpenMP_WorkGraph.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Misc + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_Complex.cpp + openmp/TestOpenMP_Init.cpp + openmp/TestOpenMP_Other.cpp + openmp/TestOpenMP_RangePolicy.cpp + openmp/TestOpenMP_Scan.cpp + openmp/TestOpenMP_SharedAlloc.cpp + openmp/TestOpenMP_Crs.cpp + openmp/TestOpenMP_UniqueToken.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMPInterOp + SOURCES + UnitTestMain.cpp + openmp/TestOpenMP_InterOp.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + ELSE() + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_AtomicOperations_int.cpp + openmp/TestOpenMP_AtomicOperations_unsignedint.cpp + openmp/TestOpenMP_AtomicOperations_longint.cpp + openmp/TestOpenMP_AtomicOperations_unsignedlongint.cpp + openmp/TestOpenMP_AtomicOperations_longlongint.cpp + openmp/TestOpenMP_AtomicOperations_double.cpp + openmp/TestOpenMP_AtomicOperations_float.cpp + openmp/TestOpenMP_AtomicOperations_complexdouble.cpp + openmp/TestOpenMP_AtomicOperations_complexfloat.cpp + openmp/TestOpenMP_AtomicViews.cpp + openmp/TestOpenMP_Atomics.cpp + openmp/TestOpenMP_Complex.cpp + openmp/TestOpenMP_Init.cpp + openmp/TestOpenMP_MDRange_a.cpp + openmp/TestOpenMP_MDRange_b.cpp + openmp/TestOpenMP_MDRange_c.cpp + openmp/TestOpenMP_MDRange_d.cpp + openmp/TestOpenMP_MDRange_e.cpp + openmp/TestOpenMP_Other.cpp + openmp/TestOpenMP_RangePolicy.cpp + openmp/TestOpenMP_Reductions.cpp + openmp/TestOpenMP_Reducers_a.cpp + openmp/TestOpenMP_Reducers_b.cpp + openmp/TestOpenMP_Reducers_c.cpp + openmp/TestOpenMP_Reducers_d.cpp + openmp/TestOpenMP_Scan.cpp + openmp/TestOpenMP_SharedAlloc.cpp + openmp/TestOpenMP_SubView_a.cpp + openmp/TestOpenMP_SubView_b.cpp + openmp/TestOpenMP_SubView_c01.cpp + openmp/TestOpenMP_SubView_c02.cpp + openmp/TestOpenMP_SubView_c03.cpp + openmp/TestOpenMP_SubView_c04.cpp + openmp/TestOpenMP_SubView_c05.cpp + openmp/TestOpenMP_SubView_c06.cpp + openmp/TestOpenMP_SubView_c07.cpp + openmp/TestOpenMP_SubView_c08.cpp + openmp/TestOpenMP_SubView_c09.cpp + openmp/TestOpenMP_SubView_c10.cpp + openmp/TestOpenMP_SubView_c11.cpp + openmp/TestOpenMP_SubView_c12.cpp + openmp/TestOpenMP_SubView_c13.cpp + openmp/TestOpenMP_Task.cpp + openmp/TestOpenMP_Team.cpp + openmp/TestOpenMP_TeamReductionScan.cpp + openmp/TestOpenMP_ViewAPI_a.cpp + openmp/TestOpenMP_ViewAPI_b.cpp + openmp/TestOpenMP_ViewAPI_c.cpp + openmp/TestOpenMP_ViewAPI_d.cpp + openmp/TestOpenMP_ViewAPI_e.cpp + openmp/TestOpenMP_ViewMapping_a.cpp + openmp/TestOpenMP_ViewMapping_b.cpp + openmp/TestOpenMP_ViewMapping_subview.cpp + openmp/TestOpenMP_ViewOfClass.cpp + openmp/TestOpenMP_Crs.cpp + openmp/TestOpenMP_WorkGraph.cpp + openmp/TestOpenMP_UniqueToken.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMPInterOp + SOURCES + UnitTestMain.cpp + openmp/TestOpenMP_InterOp.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + ENDIF() +ENDIF() + +IF(Kokkos_ENABLE_HPX) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_OpenMP + UnitTest_HPX SOURCES UnitTestMainInit.cpp - openmp/TestOpenMP_AtomicOperations_int.cpp - openmp/TestOpenMP_AtomicOperations_unsignedint.cpp - openmp/TestOpenMP_AtomicOperations_longint.cpp - openmp/TestOpenMP_AtomicOperations_unsignedlongint.cpp - openmp/TestOpenMP_AtomicOperations_longlongint.cpp - openmp/TestOpenMP_AtomicOperations_double.cpp - openmp/TestOpenMP_AtomicOperations_float.cpp - openmp/TestOpenMP_AtomicViews.cpp - openmp/TestOpenMP_Atomics.cpp - openmp/TestOpenMP_Complex.cpp - openmp/TestOpenMP_Init.cpp - openmp/TestOpenMP_MDRange_a.cpp - openmp/TestOpenMP_MDRange_b.cpp - openmp/TestOpenMP_MDRange_c.cpp - openmp/TestOpenMP_MDRange_d.cpp - openmp/TestOpenMP_MDRange_e.cpp - openmp/TestOpenMP_Other.cpp - openmp/TestOpenMP_RangePolicy.cpp - openmp/TestOpenMP_Reductions.cpp - openmp/TestOpenMP_Reducers_a.cpp - openmp/TestOpenMP_Reducers_b.cpp - openmp/TestOpenMP_Reducers_c.cpp - openmp/TestOpenMP_Reducers_d.cpp - openmp/TestOpenMP_Scan.cpp - openmp/TestOpenMP_SharedAlloc.cpp - openmp/TestOpenMP_SubView_a.cpp - openmp/TestOpenMP_SubView_b.cpp - openmp/TestOpenMP_SubView_c01.cpp - openmp/TestOpenMP_SubView_c02.cpp - openmp/TestOpenMP_SubView_c03.cpp - openmp/TestOpenMP_SubView_c04.cpp - openmp/TestOpenMP_SubView_c05.cpp - openmp/TestOpenMP_SubView_c06.cpp - openmp/TestOpenMP_SubView_c07.cpp - openmp/TestOpenMP_SubView_c08.cpp - openmp/TestOpenMP_SubView_c09.cpp - openmp/TestOpenMP_SubView_c10.cpp - openmp/TestOpenMP_SubView_c11.cpp - openmp/TestOpenMP_SubView_c12.cpp - openmp/TestOpenMP_SubView_c13.cpp - openmp/TestOpenMP_Task.cpp - openmp/TestOpenMP_Team.cpp - openmp/TestOpenMP_TeamReductionScan.cpp - openmp/TestOpenMP_ViewAPI_a.cpp - openmp/TestOpenMP_ViewAPI_b.cpp - openmp/TestOpenMP_ViewAPI_c.cpp - openmp/TestOpenMP_ViewAPI_d.cpp - openmp/TestOpenMP_ViewAPI_e.cpp - openmp/TestOpenMP_ViewMapping_a.cpp - openmp/TestOpenMP_ViewMapping_b.cpp - openmp/TestOpenMP_ViewMapping_subview.cpp - openmp/TestOpenMP_ViewOfClass.cpp - openmp/TestOpenMP_Crs.cpp - openmp/TestOpenMP_WorkGraph.cpp - openmp/TestOpenMP_UniqueToken.cpp + hpx/TestHPX_AtomicOperations_int.cpp + hpx/TestHPX_AtomicOperations_unsignedint.cpp + hpx/TestHPX_AtomicOperations_longint.cpp + hpx/TestHPX_AtomicOperations_unsignedlongint.cpp + hpx/TestHPX_AtomicOperations_longlongint.cpp + hpx/TestHPX_AtomicOperations_double.cpp + hpx/TestHPX_AtomicOperations_float.cpp + hpx/TestHPX_AtomicViews.cpp + hpx/TestHPX_Atomics.cpp + hpx/TestHPX_Complex.cpp + hpx/TestHPX_Init.cpp + hpx/TestHPX_MDRange_a.cpp + hpx/TestHPX_MDRange_b.cpp + hpx/TestHPX_MDRange_c.cpp + hpx/TestHPX_MDRange_d.cpp + hpx/TestHPX_MDRange_e.cpp + hpx/TestHPX_Other.cpp + hpx/TestHPX_RangePolicy.cpp + hpx/TestHPX_Reductions.cpp + hpx/TestHPX_Reducers_a.cpp + hpx/TestHPX_Reducers_b.cpp + hpx/TestHPX_Reducers_c.cpp + hpx/TestHPX_Reducers_d.cpp + hpx/TestHPX_Scan.cpp + hpx/TestHPX_SharedAlloc.cpp + hpx/TestHPX_SubView_a.cpp + hpx/TestHPX_SubView_b.cpp + hpx/TestHPX_SubView_c01.cpp + hpx/TestHPX_SubView_c02.cpp + hpx/TestHPX_SubView_c03.cpp + hpx/TestHPX_SubView_c04.cpp + hpx/TestHPX_SubView_c05.cpp + hpx/TestHPX_SubView_c06.cpp + hpx/TestHPX_SubView_c07.cpp + hpx/TestHPX_SubView_c08.cpp + hpx/TestHPX_SubView_c09.cpp + hpx/TestHPX_SubView_c10.cpp + hpx/TestHPX_SubView_c11.cpp + hpx/TestHPX_SubView_c12.cpp + hpx/TestHPX_SubView_c13.cpp + hpx/TestHPX_Task.cpp + hpx/TestHPX_Team.cpp + hpx/TestHPX_TeamReductionScan.cpp + hpx/TestHPX_ViewAPI_a.cpp + hpx/TestHPX_ViewAPI_b.cpp + hpx/TestHPX_ViewAPI_c.cpp + hpx/TestHPX_ViewAPI_d.cpp + hpx/TestHPX_ViewAPI_e.cpp + hpx/TestHPX_ViewMapping_a.cpp + hpx/TestHPX_ViewMapping_b.cpp + hpx/TestHPX_ViewMapping_subview.cpp + hpx/TestHPX_ViewOfClass.cpp + hpx/TestHPX_Crs.cpp + hpx/TestHPX_WorkGraph.cpp + hpx/TestHPX_UniqueToken.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_OpenMPInterOp + UnitTest_HPXInterOp SOURCES UnitTestMain.cpp - openmp/TestOpenMP_InterOp.cpp + hpx/TestHPX_InterOp.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -310,6 +697,8 @@ IF(Kokkos_ENABLE_Cuda) cuda/TestCuda_AtomicOperations_longlongint.cpp cuda/TestCuda_AtomicOperations_double.cpp cuda/TestCuda_AtomicOperations_float.cpp + cuda/TestCuda_AtomicOperations_complexdouble.cpp + cuda/TestCuda_AtomicOperations_complexfloat.cpp cuda/TestCuda_AtomicViews.cpp cuda/TestCuda_Atomics.cpp cuda/TestCuda_Complex.cpp @@ -366,10 +755,20 @@ IF(Kokkos_ENABLE_Cuda) TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_CudaInterOp + UnitTest_CudaInterOpInit SOURCES UnitTestMain.cpp - cuda/TestCuda_InterOp.cpp + cuda/TestCuda_InterOp_Init.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_CudaInterOpStreams + SOURCES + UnitTestMain.cpp + cuda/TestCuda_InterOp_Streams.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -456,3 +855,40 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) + +# +# Compile-only tests +# +FUNCTION(KOKKOS_ADD_COMPILE_TEST TEST_NAME) + + SET(options LINK_KOKKOS) + SET(oneValueArgs) + SET(multiValueArgs) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + IF(PARSE_LINK_KOKKOS) + SET(libs ${TEST_LINK_TARGETS}) + ELSE() + SET(libs) + ENDIF() + + TRIBITS_ADD_EXECUTABLE( + ${TEST_NAME} + TESTONLY + COMM serial + TESTONLYLIBS ${libs} + ${PARSE_UNPARSED_ARGUMENTS} + ) + + target_compile_options( + ${PACKAGE_NAME}_${TEST_NAME} + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> + ) + target_link_libraries( + ${PACKAGE_NAME}_${TEST_NAME} + PUBLIC ${KOKKOS_LD_FLAGS} + ) + +ENDFUNCTION() + diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index 72832271c8..5a69213108 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -9,6 +9,7 @@ vpath %.cpp ${KOKKOS_PATH}/core/unit_test/threads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmptarget vpath %.cpp ${KOKKOS_PATH}/core/unit_test/qthreads +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/hpx vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda vpath %.cpp ${KOKKOS_PATH}/core/unit_test/rocm @@ -38,253 +39,310 @@ TEST_TARGETS = TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - OBJ_CUDA = UnitTestMainInit.o gtest-all.o - OBJ_CUDA += TestCuda_Init.o - OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o - OBJ_CUDA += TestCuda_RangePolicy.o - OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o - OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewLayoutStrideAssignment.o - OBJ_CUDA += TestCudaUVM_ViewCopy.o TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o - OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o - OBJ_CUDA += TestCudaHostPinned_ViewCopy.o TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o - OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o - OBJ_CUDA += TestCuda_View_64bit.o - OBJ_CUDA += TestCuda_ViewOfClass.o - OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o - OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o - OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o - OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o - OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o - OBJ_CUDA += TestCuda_SubView_c13.o - OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o - OBJ_CUDA += TestCuda_Reductions_DeviceView.o - OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o - OBJ_CUDA += TestCuda_Complex.o - OBJ_CUDA += TestCuda_AtomicOperations_int.o TestCuda_AtomicOperations_unsignedint.o TestCuda_AtomicOperations_longint.o - OBJ_CUDA += TestCuda_AtomicOperations_unsignedlongint.o TestCuda_AtomicOperations_longlongint.o TestCuda_AtomicOperations_double.o TestCuda_AtomicOperations_float.o - OBJ_CUDA += TestCuda_AtomicViews.o TestCuda_Atomics.o - OBJ_CUDA += TestCuda_Team.o TestCuda_TeamScratch.o - OBJ_CUDA += TestCuda_TeamReductionScan.o TestCuda_TeamTeamSize.o - OBJ_CUDA += TestCuda_Other.o - OBJ_CUDA += TestCuda_MDRange_a.o TestCuda_MDRange_b.o TestCuda_MDRange_c.o TestCuda_MDRange_d.o TestCuda_MDRange_e.o - OBJ_CUDA += TestCuda_Crs.o - OBJ_CUDA += TestCuda_Task.o TestCuda_WorkGraph.o - OBJ_CUDA += TestCuda_Spaces.o - OBJ_CUDA += TestCuda_UniqueToken.o - - TARGETS += KokkosCore_UnitTest_Cuda - TARGETS += KokkosCore_UnitTest_CudaInterOp - TEST_TARGETS += test-cuda + OBJ_CUDA = UnitTestMainInit.o gtest-all.o + OBJ_CUDA += TestCuda_Init.o + OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o + OBJ_CUDA += TestCuda_RangePolicy.o + OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o + OBJ_CUDA += TestCuda_DeepCopyAlignment.o + OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewLayoutStrideAssignment.o + OBJ_CUDA += TestCudaUVM_ViewCopy.o TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o + OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o + OBJ_CUDA += TestCudaHostPinned_ViewCopy.o TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o + OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o + OBJ_CUDA += TestCuda_View_64bit.o + OBJ_CUDA += TestCuda_ViewOfClass.o + OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o + OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o + OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o + OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o + OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o + OBJ_CUDA += TestCuda_SubView_c13.o + OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o + OBJ_CUDA += TestCuda_Reductions_DeviceView.o + OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o + OBJ_CUDA += TestCuda_Complex.o + OBJ_CUDA += TestCuda_AtomicOperations_int.o TestCuda_AtomicOperations_unsignedint.o TestCuda_AtomicOperations_longint.o + OBJ_CUDA += TestCuda_AtomicOperations_unsignedlongint.o TestCuda_AtomicOperations_longlongint.o TestCuda_AtomicOperations_double.o TestCuda_AtomicOperations_float.o + OBJ_CUDA += TestCuda_AtomicOperations_complexfloat.o TestCuda_AtomicOperations_complexdouble.o + OBJ_CUDA += TestCuda_AtomicViews.o TestCuda_Atomics.o + OBJ_CUDA += TestCuda_Team.o TestCuda_TeamScratch.o + OBJ_CUDA += TestCuda_TeamReductionScan.o TestCuda_TeamTeamSize.o + OBJ_CUDA += TestCuda_TeamVectorRange.o + OBJ_CUDA += TestCuda_Other.o + OBJ_CUDA += TestCuda_MDRange_a.o TestCuda_MDRange_b.o TestCuda_MDRange_c.o TestCuda_MDRange_d.o TestCuda_MDRange_e.o + OBJ_CUDA += TestCuda_Crs.o + OBJ_CUDA += TestCuda_Task.o TestCuda_WorkGraph.o + OBJ_CUDA += TestCuda_Spaces.o + OBJ_CUDA += TestCuda_UniqueToken.o + OBJ_CUDA += TestCuda_LocalDeepCopy.o + + TARGETS += KokkosCore_UnitTest_Cuda + TARGETS += KokkosCore_UnitTest_CudaInterOpInit + TARGETS += KokkosCore_UnitTest_CudaInterOpStreams + TEST_TARGETS += test-cuda endif ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) - OBJ_ROCM = UnitTestMainInit.o gtest-all.o - OBJ_ROCM += TestROCm_Init.o - OBJ_ROCM += TestROCm_Complex.o - OBJ_ROCM += TestROCm_RangePolicy.o - OBJ_ROCM += TestROCm_AtomicOperations_int.o TestROCm_AtomicOperations_unsignedint.o TestROCm_AtomicOperations_longint.o - OBJ_ROCM += TestROCm_AtomicOperations_unsignedlongint.o TestROCm_AtomicOperations_longlongint.o TestROCm_AtomicOperations_double.o TestROCm_AtomicOperations_float.o - OBJ_ROCM += TestROCm_Atomics.o - OBJ_ROCM += TestROCm_AtomicViews.o - OBJ_ROCM += TestROCm_Other.o - OBJ_ROCM += TestROCm_MDRange_a.o TestROCm_MDRange_b.o TestROCm_MDRange_c.o TestROCm_MDRange_d.o TestROCm_MDRange_e.o - OBJ_ROCM += TestROCm_MDRangeReduce_a.o TestROCm_MDRangeReduce_b.o TestROCm_MDRangeReduce_c.o TestROCm_MDRangeReduce_d.o TestROCm_MDRangeReduce_e.o - OBJ_ROCM += TestROCm_Reductions.o - OBJ_ROCM += TestROCm_Reducers_a.o TestROCm_Reducers_b.o TestROCm_Reducers_c.o TestROCm_Reducers_d.o - OBJ_ROCM += TestROCm_Scan.o - OBJ_ROCM += TestROCm_SharedAlloc.o - OBJ_ROCM += TestROCm_SubView_a.o - OBJ_ROCM += TestROCm_SubView_b.o - OBJ_ROCM += TestROCm_SubView_c01.o - OBJ_ROCM += TestROCm_SubView_c02.o - OBJ_ROCM += TestROCm_SubView_c03.o - OBJ_ROCM += TestROCm_SubView_c04.o - OBJ_ROCM += TestROCm_SubView_c05.o - OBJ_ROCM += TestROCm_SubView_c06.o - OBJ_ROCM += TestROCm_SubView_c07.o - OBJ_ROCM += TestROCm_SubView_c08.o - OBJ_ROCM += TestROCm_SubView_c09.o - OBJ_ROCM += TestROCm_SubView_c10.o - OBJ_ROCM += TestROCm_SubView_c11.o - OBJ_ROCM += TestROCm_SubView_c12.o - OBJ_ROCM += TestROCm_SubView_c13.o - OBJ_ROCM += TestROCm_Team.o - OBJ_ROCM += TestROCm_TeamReductionScan.o - OBJ_ROCM += TestROCm_TeamScratch.o TestROCm_TeamTeamSize.o - OBJ_ROCM += TestROCm_ViewAPI_a.o TestROCm_ViewAPI_b.o TestROCm_ViewAPI_c.o TestROCm_ViewAPI_d.o TestROCm_ViewAPI_e.o - OBJ_ROCM += TestROCm_ViewMapping_a.o - OBJ_ROCM += TestROCm_ViewMapping_b.o - OBJ_ROCM += TestROCm_ViewMapping_subview.o - OBJ_ROCM += TestROCmHostPinned_ViewCopy.o TestROCmHostPinned_ViewAPI_a.o TestROCmHostPinned_ViewAPI_b.o TestROCmHostPinned_ViewAPI_c.o TestROCmHostPinned_ViewAPI_d.o TestROCmHostPinned_ViewAPI_e.o - OBJ_ROCM += TestROCmHostPinned_View_64bit.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o - OBJ_ROCM += TestROCm_ViewOfClass.o - OBJ_ROCM += TestROCm_Spaces.o - OBJ_ROCM += TestROCm_Crs.o - - TARGETS += KokkosCore_UnitTest_ROCm - TEST_TARGETS += test-rocm + OBJ_ROCM = UnitTestMainInit.o gtest-all.o + OBJ_ROCM += TestROCm_Init.o + OBJ_ROCM += TestROCm_Complex.o + OBJ_ROCM += TestROCm_RangePolicy.o + OBJ_ROCM += TestROCm_AtomicOperations_int.o TestROCm_AtomicOperations_unsignedint.o TestROCm_AtomicOperations_longint.o + OBJ_ROCM += TestROCm_AtomicOperations_unsignedlongint.o TestROCm_AtomicOperations_longlongint.o TestROCm_AtomicOperations_double.o TestROCm_AtomicOperations_float.o + OBJ_ROCM += TestROCm_Atomics.o + OBJ_ROCM += TestROCm_AtomicViews.o + OBJ_ROCM += TestROCm_Other.o + OBJ_ROCM += TestROCm_MDRange_a.o TestROCm_MDRange_b.o TestROCm_MDRange_c.o TestROCm_MDRange_d.o TestROCm_MDRange_e.o + OBJ_ROCM += TestROCm_MDRangeReduce_a.o TestROCm_MDRangeReduce_b.o TestROCm_MDRangeReduce_c.o TestROCm_MDRangeReduce_d.o TestROCm_MDRangeReduce_e.o + OBJ_ROCM += TestROCm_Reductions.o + OBJ_ROCM += TestROCm_Reducers_a.o TestROCm_Reducers_b.o TestROCm_Reducers_c.o TestROCm_Reducers_d.o + OBJ_ROCM += TestROCm_Scan.o + OBJ_ROCM += TestROCm_SharedAlloc.o + OBJ_ROCM += TestROCm_SubView_a.o + OBJ_ROCM += TestROCm_SubView_b.o + OBJ_ROCM += TestROCm_SubView_c01.o + OBJ_ROCM += TestROCm_SubView_c02.o + OBJ_ROCM += TestROCm_SubView_c03.o + OBJ_ROCM += TestROCm_SubView_c04.o + OBJ_ROCM += TestROCm_SubView_c05.o + OBJ_ROCM += TestROCm_SubView_c06.o + OBJ_ROCM += TestROCm_SubView_c07.o + OBJ_ROCM += TestROCm_SubView_c08.o + OBJ_ROCM += TestROCm_SubView_c09.o + OBJ_ROCM += TestROCm_SubView_c10.o + OBJ_ROCM += TestROCm_SubView_c11.o + OBJ_ROCM += TestROCm_SubView_c12.o + OBJ_ROCM += TestROCm_SubView_c13.o + OBJ_ROCM += TestROCm_Team.o + OBJ_ROCM += TestROCm_TeamReductionScan.o + OBJ_ROCM += TestROCm_TeamScratch.o TestROCm_TeamTeamSize.o + OBJ_ROCM += TestROCm_ViewAPI_a.o TestROCm_ViewAPI_b.o TestROCm_ViewAPI_c.o TestROCm_ViewAPI_d.o TestROCm_ViewAPI_e.o + OBJ_ROCM += TestROCm_DeepCopyAlignment.o + OBJ_ROCM += TestROCm_ViewMapping_a.o + OBJ_ROCM += TestROCm_ViewMapping_b.o + OBJ_ROCM += TestROCm_ViewMapping_subview.o + OBJ_ROCM += TestROCmHostPinned_ViewCopy.o TestROCmHostPinned_ViewAPI_a.o TestROCmHostPinned_ViewAPI_b.o TestROCmHostPinned_ViewAPI_c.o TestROCmHostPinned_ViewAPI_d.o TestROCmHostPinned_ViewAPI_e.o + OBJ_ROCM += TestROCmHostPinned_View_64bit.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o + OBJ_ROCM += TestROCm_ViewOfClass.o + OBJ_ROCM += TestROCm_Spaces.o + OBJ_ROCM += TestROCm_Crs.o + + TARGETS += KokkosCore_UnitTest_ROCm + TEST_TARGETS += test-rocm endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - - OBJ_THREADS = UnitTestMainInit.o gtest-all.o - OBJ_THREADS += TestThreads_Init.o - OBJ_THREADS += TestThreads_SharedAlloc.o - OBJ_THREADS += TestThreads_RangePolicy.o + OBJ_THREADS = UnitTestMainInit.o gtest-all.o + OBJ_THREADS += TestThreads_Init.o + OBJ_THREADS += TestThreads_SharedAlloc.o + OBJ_THREADS += TestThreads_RangePolicy.o OBJ_THREADS += TestThreads_View_64bit.o - OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o - OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o TestThreads_ViewLayoutStrideAssignment.o - OBJ_THREADS += TestThreads_ViewOfClass.o - OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o - OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o - OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o - OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o - OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o - OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o - OBJ_THREADS += TestThreads_Reductions_DeviceView.o - OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o - OBJ_THREADS += TestThreads_Complex.o - OBJ_THREADS += TestThreads_AtomicOperations_int.o TestThreads_AtomicOperations_unsignedint.o TestThreads_AtomicOperations_longint.o - OBJ_THREADS += TestThreads_AtomicOperations_unsignedlongint.o TestThreads_AtomicOperations_longlongint.o TestThreads_AtomicOperations_double.o TestThreads_AtomicOperations_float.o - OBJ_THREADS += TestThreads_AtomicViews.o TestThreads_Atomics.o - OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o TestThreads_TeamTeamSize.o - OBJ_THREADS += TestThreads_TeamReductionScan.o - OBJ_THREADS += TestThreads_Other.o - OBJ_THREADS += TestThreads_MDRange_a.o TestThreads_MDRange_b.o TestThreads_MDRange_c.o TestThreads_MDRange_d.o TestThreads_MDRange_e.o + OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o + OBJ_THREADS += TestThreads_DeepCopyAlignment.o + OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o TestThreads_ViewLayoutStrideAssignment.o + OBJ_THREADS += TestThreads_ViewOfClass.o + OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o + OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o + OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o + OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o + OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o + OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o + OBJ_THREADS += TestThreads_Reductions_DeviceView.o + OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o + OBJ_THREADS += TestThreads_Complex.o + OBJ_THREADS += TestThreads_AtomicOperations_int.o TestThreads_AtomicOperations_unsignedint.o TestThreads_AtomicOperations_longint.o + OBJ_THREADS += TestThreads_AtomicOperations_unsignedlongint.o TestThreads_AtomicOperations_longlongint.o TestThreads_AtomicOperations_double.o TestThreads_AtomicOperations_float.o + OBJ_THREADS += TestThreads_AtomicOperations_complexfloat.o TestThreads_AtomicOperations_complexdouble.o + OBJ_THREADS += TestThreads_AtomicViews.o TestThreads_Atomics.o + OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o TestThreads_TeamTeamSize.o + OBJ_THREADS += TestThreads_TeamReductionScan.o + OBJ_THREADS += TestThreads_TeamVectorRange.o + OBJ_THREADS += TestThreads_Other.o + OBJ_THREADS += TestThreads_MDRange_a.o TestThreads_MDRange_b.o TestThreads_MDRange_c.o TestThreads_MDRange_d.o TestThreads_MDRange_e.o + OBJ_THREADS += TestThreads_LocalDeepCopy.o - TARGETS += KokkosCore_UnitTest_Threads + TARGETS += KokkosCore_UnitTest_Threads - TEST_TARGETS += test-threads + TEST_TARGETS += test-threads endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = UnitTestMainInit.o gtest-all.o - OBJ_OPENMP += TestOpenMP_Init.o - OBJ_OPENMP += TestOpenMP_SharedAlloc.o - OBJ_OPENMP += TestOpenMP_RangePolicy.o + OBJ_OPENMP = UnitTestMainInit.o gtest-all.o + OBJ_OPENMP += TestOpenMP_Init.o + OBJ_OPENMP += TestOpenMP_SharedAlloc.o + OBJ_OPENMP += TestOpenMP_RangePolicy.o OBJ_OPENMP += TestOpenMP_View_64bit.o - OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o - OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o TestOpenMP_ViewLayoutStrideAssignment.o - OBJ_OPENMP += TestOpenMP_ViewOfClass.o - OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o - OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o - OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o - OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o - OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o - OBJ_OPENMP += TestOpenMP_SubView_c13.o - OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o - OBJ_OPENMP += TestOpenMP_Reductions_DeviceView.o - OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o - OBJ_OPENMP += TestOpenMP_Complex.o - OBJ_OPENMP += TestOpenMP_AtomicOperations_int.o TestOpenMP_AtomicOperations_unsignedint.o TestOpenMP_AtomicOperations_longint.o - OBJ_OPENMP += TestOpenMP_AtomicOperations_unsignedlongint.o TestOpenMP_AtomicOperations_longlongint.o TestOpenMP_AtomicOperations_double.o TestOpenMP_AtomicOperations_float.o - OBJ_OPENMP += TestOpenMP_AtomicViews.o TestOpenMP_Atomics.o - OBJ_OPENMP += TestOpenMP_Team.o TestOpenMP_TeamScratch.o - OBJ_OPENMP += TestOpenMP_TeamReductionScan.o TestOpenMP_TeamTeamSize.o - OBJ_OPENMP += TestOpenMP_Other.o - OBJ_OPENMP += TestOpenMP_MDRange_a.o TestOpenMP_MDRange_b.o TestOpenMP_MDRange_c.o TestOpenMP_MDRange_d.o TestOpenMP_MDRange_e.o - OBJ_OPENMP += TestOpenMP_Crs.o - OBJ_OPENMP += TestOpenMP_Task.o TestOpenMP_WorkGraph.o - OBJ_OPENMP += TestOpenMP_UniqueToken.o - - TARGETS += KokkosCore_UnitTest_OpenMP + OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o + OBJ_OPENMP += TestOpenMP_DeepCopyAlignment.o + OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o TestOpenMP_ViewLayoutStrideAssignment.o + OBJ_OPENMP += TestOpenMP_ViewOfClass.o + OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o + OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o + OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o + OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o + OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o + OBJ_OPENMP += TestOpenMP_SubView_c13.o + OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o + OBJ_OPENMP += TestOpenMP_Reductions_DeviceView.o + OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o + OBJ_OPENMP += TestOpenMP_Complex.o + OBJ_OPENMP += TestOpenMP_AtomicOperations_int.o TestOpenMP_AtomicOperations_unsignedint.o TestOpenMP_AtomicOperations_longint.o + OBJ_OPENMP += TestOpenMP_AtomicOperations_unsignedlongint.o TestOpenMP_AtomicOperations_longlongint.o TestOpenMP_AtomicOperations_double.o TestOpenMP_AtomicOperations_float.o + OBJ_OPENMP += TestOpenMP_AtomicOperations_complexfloat.o TestOpenMP_AtomicOperations_complexdouble.o + OBJ_OPENMP += TestOpenMP_AtomicViews.o TestOpenMP_Atomics.o + OBJ_OPENMP += TestOpenMP_Team.o TestOpenMP_TeamScratch.o + OBJ_OPENMP += TestOpenMP_TeamReductionScan.o TestOpenMP_TeamTeamSize.o + OBJ_OPENMP += TestOpenMP_TeamVectorRange.o + OBJ_OPENMP += TestOpenMP_Other.o + OBJ_OPENMP += TestOpenMP_MDRange_a.o TestOpenMP_MDRange_b.o TestOpenMP_MDRange_c.o TestOpenMP_MDRange_d.o TestOpenMP_MDRange_e.o + OBJ_OPENMP += TestOpenMP_Crs.o + OBJ_OPENMP += TestOpenMP_Task.o TestOpenMP_WorkGraph.o + OBJ_OPENMP += TestOpenMP_UniqueToken.o + OBJ_OPENMP += TestOpenMP_LocalDeepCopy.o + + TARGETS += KokkosCore_UnitTest_OpenMP TARGETS += KokkosCore_UnitTest_OpenMPInterOp - TEST_TARGETS += test-openmp + TEST_TARGETS += test-openmp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - OBJ_OPENMPTARGET = UnitTestMainInit.o gtest-all.o - OBJ_OPENMPTARGET += TestOpenMPTarget_Init.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_SharedAlloc.o - OBJ_OPENMPTARGET += TestOpenMPTarget_RangePolicy.o - OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_a.o TestOpenMPTarget_ViewAPI_b.o TestOpenMPTarget_ViewAPI_c.o TestOpenMPTarget_ViewAPI_d.o TestOpenMPTarget_ViewAPI_e.o #Some commented out code - OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_a.o - OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_b.o + OBJ_OPENMPTARGET = UnitTestMainInit.o gtest-all.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Init.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SharedAlloc.o + OBJ_OPENMPTARGET += TestOpenMPTarget_RangePolicy.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_a.o TestOpenMPTarget_ViewAPI_b.o TestOpenMPTarget_ViewAPI_c.o TestOpenMPTarget_ViewAPI_d.o TestOpenMPTarget_ViewAPI_e.o #Some commented out code + OBJ_OPENMPTARGET += TestOpenMPTarget_DeepCopyAlignment.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_a.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_b.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_subview.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewOfClass.o - OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_a.o TestOpenMPTarget_SubView_b.o - #The following subview tests need something like UVM: - #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c01.o TestOpenMPTarget_SubView_c02.o TestOpenMPTarget_SubView_c03.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c04.o TestOpenMPTarget_SubView_c05.o TestOpenMPTarget_SubView_c06.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c07.o TestOpenMPTarget_SubView_c08.o TestOpenMPTarget_SubView_c09.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c10.o TestOpenMPTarget_SubView_c11.o TestOpenMPTarget_SubView_c12.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_Reductions.o # Need custom reductions - #OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_Scan.o - OBJ_OPENMPTARGET += TestOpenMPTarget_Complex.o - OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_int.o TestOpenMPTarget_AtomicOperations_unsignedint.o TestOpenMPTarget_AtomicOperations_longint.o - OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_unsignedlongint.o TestOpenMPTarget_AtomicOperations_longlongint.o TestOpenMPTarget_AtomicOperations_double.o TestOpenMPTarget_AtomicOperations_float.o - OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicViews.o - OBJ_OPENMPTARGET += TestOpenMPTarget_Atomics.o # Commented Out Arbitrary Type Atomics - #OBJ_OPENMPTARGET += TestOpenMPTarget_Team.o # There is still a static function in this - #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamScratch.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamReductionScan.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_Other.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_MDRange_a.o TestOpenMPTarget_MDRange_b.o TestOpenMPTarget_MDRange_c.o TestOpenMPTarget_MDRange_d.o TestOpenMPTarget_MDRange_d.e - #OBJ_OPENMPTARGET += TestOpenMPTarget_Task.o - - TARGETS += KokkosCore_UnitTest_OpenMPTarget + #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewOfClass.o + OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_a.o TestOpenMPTarget_SubView_b.o + #The following subview tests need something like UVM: + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c01.o TestOpenMPTarget_SubView_c02.o TestOpenMPTarget_SubView_c03.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c04.o TestOpenMPTarget_SubView_c05.o TestOpenMPTarget_SubView_c06.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c07.o TestOpenMPTarget_SubView_c08.o TestOpenMPTarget_SubView_c09.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c10.o TestOpenMPTarget_SubView_c11.o TestOpenMPTarget_SubView_c12.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Reductions.o # Need custom reductions + #OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Scan.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Complex.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_int.o TestOpenMPTarget_AtomicOperations_unsignedint.o TestOpenMPTarget_AtomicOperations_longint.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_unsignedlongint.o TestOpenMPTarget_AtomicOperations_longlongint.o TestOpenMPTarget_AtomicOperations_double.o TestOpenMPTarget_AtomicOperations_float.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_complexfloat.o TestOpenMPTarget_AtomicOperations_complexdouble.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicViews.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Atomics.o # Commented Out Arbitrary Type Atomics + #OBJ_OPENMPTARGET += TestOpenMPTarget_Team.o # There is still a static function in this + #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamScratch.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamReductionScan.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Other.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_MDRange_a.o TestOpenMPTarget_MDRange_b.o TestOpenMPTarget_MDRange_c.o TestOpenMPTarget_MDRange_d.o TestOpenMPTarget_MDRange_d.e + #OBJ_OPENMPTARGET += TestOpenMPTarget_Task.o - TEST_TARGETS += test-openmptarget + TARGETS += KokkosCore_UnitTest_OpenMPTarget + TEST_TARGETS += test-openmptarget endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - OBJ_QTHREADS = TestQthreads_Other.o TestQthreads_Reductions.o TestQthreads_Atomics.o TestQthreads_Team.o - OBJ_QTHREADS += TestQthreads_SubView_a.o TestQthreads_SubView_b.o - OBJ_QTHREADS += TestQthreads_SubView_c01.o TestQthreads_SubView_c02.o TestQthreads_SubView_c03.o - OBJ_QTHREADS += TestQthreads_SubView_c04.o TestQthreads_SubView_c05.o TestQthreads_SubView_c06.o - OBJ_QTHREADS += TestQthreads_SubView_c07.o TestQthreads_SubView_c08.o TestQthreads_SubView_c09.o - OBJ_QTHREADS += TestQthreads_SubView_c10.o TestQthreads_SubView_c11.o TestQthreads_SubView_c12.o - OBJ_QTHREADS += TestQthreads_ViewAPI_a.o TestQthreads_ViewAPI_b.o TestQthreads_ViewAPI_c.o TestQthreads_ViewAPI_d.o TestQthreads_ViewAPI_e.o UnitTestMain.o gtest-all.o - TARGETS += KokkosCore_UnitTest_Qthreads + OBJ_QTHREADS = TestQthreads_Other.o TestQthreads_Reductions.o TestQthreads_Atomics.o TestQthreads_Team.o + OBJ_QTHREADS += TestQthreads_SubView_a.o TestQthreads_SubView_b.o + OBJ_QTHREADS += TestQthreads_SubView_c01.o TestQthreads_SubView_c02.o TestQthreads_SubView_c03.o + OBJ_QTHREADS += TestQthreads_SubView_c04.o TestQthreads_SubView_c05.o TestQthreads_SubView_c06.o + OBJ_QTHREADS += TestQthreads_SubView_c07.o TestQthreads_SubView_c08.o TestQthreads_SubView_c09.o + OBJ_QTHREADS += TestQthreads_SubView_c10.o TestQthreads_SubView_c11.o TestQthreads_SubView_c12.o + OBJ_QTHREADS += TestQthreads_ViewAPI_a.o TestQthreads_ViewAPI_b.o TestQthreads_ViewAPI_c.o TestQthreads_ViewAPI_d.o TestQthreads_ViewAPI_e.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Qthreads - OBJ_QTHREADS2 = UnitTestMainInit.o gtest-all.o - OBJ_QTHREADS2 += TestQthreads_Complex.o - TARGETS += KokkosCore_UnitTest_Qthreads2 + OBJ_QTHREADS2 = UnitTestMainInit.o gtest-all.o + OBJ_QTHREADS2 += TestQthreads_Complex.o + TARGETS += KokkosCore_UnitTest_Qthreads2 - TEST_TARGETS += test-qthreads + TEST_TARGETS += test-qthreads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + OBJ_HPX = UnitTestMainInit.o gtest-all.o + OBJ_HPX += TestHPX_Init.o + OBJ_HPX += TestHPX_SharedAlloc.o + OBJ_HPX += TestHPX_RangePolicy.o + OBJ_HPX += TestHPX_View_64bit.o + OBJ_HPX += TestHPX_ViewAPI_a.o TestHPX_ViewAPI_b.o TestHPX_ViewAPI_c.o TestHPX_ViewAPI_d.o TestHPX_ViewAPI_e.o + OBJ_HPX += TestHPX_ViewMapping_a.o TestHPX_ViewMapping_b.o TestHPX_ViewMapping_subview.o + OBJ_HPX += TestHPX_ViewOfClass.o + OBJ_HPX += TestHPX_SubView_a.o TestHPX_SubView_b.o + OBJ_HPX += TestHPX_SubView_c01.o TestHPX_SubView_c02.o TestHPX_SubView_c03.o + OBJ_HPX += TestHPX_SubView_c04.o TestHPX_SubView_c05.o TestHPX_SubView_c06.o + OBJ_HPX += TestHPX_SubView_c07.o TestHPX_SubView_c08.o TestHPX_SubView_c09.o + OBJ_HPX += TestHPX_SubView_c10.o TestHPX_SubView_c11.o TestHPX_SubView_c12.o + OBJ_HPX += TestHPX_SubView_c13.o + OBJ_HPX += TestHPX_Reductions.o + OBJ_HPX += TestHPX_Scan.o + OBJ_HPX += TestHPX_Reducers_a.o TestHPX_Reducers_b.o TestHPX_Reducers_c.o TestHPX_Reducers_d.o + OBJ_HPX += TestHPX_Complex.o + OBJ_HPX += TestHPX_AtomicOperations_int.o TestHPX_AtomicOperations_unsignedint.o TestHPX_AtomicOperations_longint.o + OBJ_HPX += TestHPX_AtomicOperations_unsignedlongint.o TestHPX_AtomicOperations_longlongint.o TestHPX_AtomicOperations_double.o TestHPX_AtomicOperations_float.o + OBJ_HPX += TestHPX_AtomicViews.o TestHPX_Atomics.o + OBJ_HPX += TestHPX_Team.o + OBJ_HPX += TestHPX_TeamVectorRange.o + OBJ_HPX += TestHPX_TeamScratch.o + OBJ_HPX += TestHPX_TeamReductionScan.o + OBJ_HPX += TestHPX_Other.o + OBJ_HPX += TestHPX_MDRange_a.o TestHPX_MDRange_b.o TestHPX_MDRange_c.o TestHPX_MDRange_d.o TestHPX_MDRange_e.o + OBJ_HPX += TestHPX_Crs.o + OBJ_HPX += TestHPX_Task.o + OBJ_HPX += TestHPX_WorkGraph.o + OBJ_HPX += TestHPX_UniqueToken.o + + TARGETS += KokkosCore_UnitTest_HPX + TARGETS += KokkosCore_UnitTest_HPXInterOp + + TEST_TARGETS += test-hpx endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - OBJ_SERIAL = UnitTestMainInit.o gtest-all.o - OBJ_SERIAL += TestSerial_Init.o - OBJ_SERIAL += TestSerial_SharedAlloc.o - OBJ_SERIAL += TestSerial_RangePolicy.o - OBJ_SERIAL += TestSerial_View_64bit.o - OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o - OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o TestSerial_ViewLayoutStrideAssignment.o - OBJ_SERIAL += TestSerial_ViewOfClass.o - OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o - OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o - OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o - OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o - OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o - OBJ_SERIAL += TestSerial_SubView_c13.o - OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o - OBJ_SERIAL += TestSerial_Reductions_DeviceView.o - OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o - OBJ_SERIAL += TestSerial_Complex.o - OBJ_SERIAL += TestSerial_AtomicOperations_int.o TestSerial_AtomicOperations_unsignedint.o TestSerial_AtomicOperations_longint.o - OBJ_SERIAL += TestSerial_AtomicOperations_unsignedlongint.o TestSerial_AtomicOperations_longlongint.o TestSerial_AtomicOperations_double.o TestSerial_AtomicOperations_float.o - OBJ_SERIAL += TestSerial_AtomicViews.o TestSerial_Atomics.o - OBJ_SERIAL += TestSerial_Team.o TestSerial_TeamScratch.o - OBJ_SERIAL += TestSerial_TeamReductionScan.o TestSerial_TeamTeamSize.o - OBJ_SERIAL += TestSerial_Other.o - #HCC_WORKAROUND - ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) + OBJ_SERIAL = UnitTestMainInit.o gtest-all.o + OBJ_SERIAL += TestSerial_Init.o + OBJ_SERIAL += TestSerial_SharedAlloc.o + OBJ_SERIAL += TestSerial_RangePolicy.o + OBJ_SERIAL += TestSerial_View_64bit.o + OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o + OBJ_SERIAL += TestSerial_DeepCopyAlignment.o + OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o TestSerial_ViewLayoutStrideAssignment.o + OBJ_SERIAL += TestSerial_ViewOfClass.o + OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o + OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o + OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o + OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o + OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o + OBJ_SERIAL += TestSerial_SubView_c13.o + OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o + OBJ_SERIAL += TestSerial_Reductions_DeviceView.o + OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o + OBJ_SERIAL += TestSerial_Complex.o + OBJ_SERIAL += TestSerial_AtomicOperations_int.o TestSerial_AtomicOperations_unsignedint.o TestSerial_AtomicOperations_longint.o + OBJ_SERIAL += TestSerial_AtomicOperations_unsignedlongint.o TestSerial_AtomicOperations_longlongint.o TestSerial_AtomicOperations_double.o TestSerial_AtomicOperations_float.o + OBJ_SERIAL += TestSerial_AtomicOperations_complexfloat.o TestSerial_AtomicOperations_complexdouble.o + OBJ_SERIAL += TestSerial_AtomicViews.o TestSerial_Atomics.o + OBJ_SERIAL += TestSerial_Team.o TestSerial_TeamScratch.o + OBJ_SERIAL += TestSerial_TeamVectorRange.o + OBJ_SERIAL += TestSerial_TeamReductionScan.o TestSerial_TeamTeamSize.o + OBJ_SERIAL += TestSerial_Other.o + #HCC_WORKAROUND + ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) OBJ_SERIAL += TestSerial_MDRange_a.o TestSerial_MDRange_b.o TestSerial_MDRange_c.o TestSerial_MDRange_d.o TestSerial_MDRange_e.o - endif - OBJ_SERIAL += TestSerial_Crs.o - OBJ_SERIAL += TestSerial_Task.o TestSerial_WorkGraph.o - - TARGETS += KokkosCore_UnitTest_Serial + endif + OBJ_SERIAL += TestSerial_Crs.o + OBJ_SERIAL += TestSerial_Task.o TestSerial_WorkGraph.o + OBJ_SERIAL += TestSerial_LocalDeepCopy.o - TEST_TARGETS += test-serial + TARGETS += KokkosCore_UnitTest_Serial + + TEST_TARGETS += test-serial endif OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o @@ -298,10 +356,10 @@ TEST_TARGETS += test-host-barrier OBJ_DEFAULT = UnitTestMainInit.o gtest-all.o ifneq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) - OBJ_DEFAULT += TestDefaultDeviceType.o - OBJ_DEFAULT += TestDefaultDeviceType_a1.o TestDefaultDeviceType_b1.o TestDefaultDeviceType_c1.o - OBJ_DEFAULT += TestDefaultDeviceType_a2.o TestDefaultDeviceType_b2.o TestDefaultDeviceType_c2.o - OBJ_DEFAULT += TestDefaultDeviceType_a3.o TestDefaultDeviceType_b3.o TestDefaultDeviceType_c3.o + OBJ_DEFAULT += TestDefaultDeviceType.o + OBJ_DEFAULT += TestDefaultDeviceType_a1.o TestDefaultDeviceType_b1.o TestDefaultDeviceType_c1.o + OBJ_DEFAULT += TestDefaultDeviceType_a2.o TestDefaultDeviceType_b2.o TestDefaultDeviceType_c2.o + OBJ_DEFAULT += TestDefaultDeviceType_a3.o TestDefaultDeviceType_b3.o TestDefaultDeviceType_c3.o OBJ_DEFAULT += TestDefaultDeviceType_d.o endif endif @@ -325,9 +383,11 @@ TEST_TARGETS += ${INITTESTS_TEST_TARGETS} KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Cuda -KokkosCore_UnitTest_CudaInterOp: UnitTestMain.o gtest-all.o TestCuda_InterOp.o - $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestCuda_InterOp.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_CudaInterOp - +KokkosCore_UnitTest_CudaInterOpInit: UnitTestMain.o gtest-all.o TestCuda_InterOp_Init.o $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestCuda_InterOp_Init.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_CudaInterOpInit +KokkosCore_UnitTest_CudaInterOpStreams: UnitTestMain.o gtest-all.o TestCuda_InterOp_Streams.o $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestCuda_InterOp_Streams.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_CudaInterOpStreams + KokkosCore_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_ROCm @@ -337,7 +397,7 @@ KokkosCore_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosCore_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_OpenMP -KokkosCore_UnitTest_OpenMPInterOp: UnitTestMain.o gtest-all.o TestOpenMP_InterOp.o +KokkosCore_UnitTest_OpenMPInterOp: UnitTestMain.o gtest-all.o TestOpenMP_InterOp.o $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestOpenMP_InterOp.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_OpenMPInterOp KokkosCore_UnitTest_OpenMPTarget: $(OBJ_OPENMPTARGET) $(KOKKOS_LINK_DEPENDS) @@ -352,6 +412,12 @@ KokkosCore_UnitTest_Qthreads: $(OBJ_QTHREADS) $(KOKKOS_LINK_DEPENDS) KokkosCore_UnitTest_Qthreads2: $(OBJ_QTHREADS2) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_QTHREADS2) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Qthreads2 +KokkosCore_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HPX + +KokkosCore_UnitTest_HPXInterOp: UnitTestMain.o gtest-all.o TestHPX_InterOp.o $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestHPX_InterOp.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HPXInterOp + KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HWLOC @@ -376,7 +442,8 @@ ${INITTESTS_TARGETS}: KokkosCore_UnitTest_DefaultDeviceTypeInit_%: TestDefaultDe test-cuda: KokkosCore_UnitTest_Cuda ./KokkosCore_UnitTest_Cuda - ./KokkosCore_UnitTest_CudaInterOp + ./KokkosCore_UnitTest_CudaInterOpInit + ./KokkosCore_UnitTest_CudaInterOpStreams test-rocm: KokkosCore_UnitTest_ROCm ./KokkosCore_UnitTest_ROCm @@ -398,6 +465,10 @@ test-qthreads: KokkosCore_UnitTest_Qthreads KokkosCore_UnitTest_Qthreads2 ./KokkosCore_UnitTest_Qthreads ./KokkosCore_UnitTest_Qthreads2 +test-hpx: KokkosCore_UnitTest_HPX + ./KokkosCore_UnitTest_HPX + ./KokkosCore_UnitTest_HPXInterOp + test-hwloc: KokkosCore_UnitTest_HWLOC ./KokkosCore_UnitTest_HWLOC diff --git a/lib/kokkos/core/unit_test/TestAtomic.hpp b/lib/kokkos/core/unit_test/TestAtomic.hpp index 58b6325115..ee93d53470 100644 --- a/lib/kokkos/core/unit_test/TestAtomic.hpp +++ b/lib/kokkos/core/unit_test/TestAtomic.hpp @@ -211,13 +211,13 @@ T AddLoop( int loop ) { f_zero.data = data; Kokkos::parallel_for( 1, f_zero ); - execution_space::fence(); + execution_space().fence(); struct AddFunctor< T, execution_space > f_add; f_add.data = data; Kokkos::parallel_for( loop, f_add ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -226,7 +226,7 @@ T AddLoop( int loop ) { f_add_red.data = data; int dummy_result; Kokkos::parallel_reduce( loop, f_add_red , dummy_result ); - execution_space::fence(); + execution_space().fence(); return val; } @@ -298,12 +298,12 @@ T CASLoop( int loop ) { f_zero.data = data; Kokkos::parallel_for( 1, f_zero ); - execution_space::fence(); + execution_space().fence(); struct CASFunctor< T, execution_space > f_cas; f_cas.data = data; Kokkos::parallel_for( loop, f_cas ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -312,7 +312,7 @@ T CASLoop( int loop ) { f_cas_red.data = data; int dummy_result; Kokkos::parallel_reduce( loop, f_cas_red , dummy_result ); - execution_space::fence(); + execution_space().fence(); return val; } @@ -381,20 +381,20 @@ T ExchLoop( int loop ) { f_zero.data = data; Kokkos::parallel_for( 1, f_zero ); - execution_space::fence(); + execution_space().fence(); typename ZeroFunctor< T, execution_space >::type data2( "Data" ); typename ZeroFunctor< T, execution_space >::h_type h_data2( "HData" ); f_zero.data = data2; Kokkos::parallel_for( 1, f_zero ); - execution_space::fence(); + execution_space().fence(); struct ExchFunctor< T, execution_space > f_exch; f_exch.data = data; f_exch.data2 = data2; Kokkos::parallel_for( loop, f_exch ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); Kokkos::deep_copy( h_data2, data2 ); @@ -405,7 +405,7 @@ T ExchLoop( int loop ) { f_exch_red.data2 = data2; int dummy_result; Kokkos::parallel_reduce( loop, f_exch_red , dummy_result ); - execution_space::fence(); + execution_space().fence(); return val; } diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp index d068c18d87..e043737e42 100644 --- a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -113,13 +113,13 @@ T MaxAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct MaxFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -191,13 +191,13 @@ T MinAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct MinFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -268,13 +268,13 @@ T IncAtomic( T i0 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct IncFunctor< T, execution_space > f( i0 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -345,13 +345,13 @@ T DecAtomic( T i0 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct DecFunctor< T, execution_space > f( i0 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -423,13 +423,13 @@ T MulAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct MulFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -501,13 +501,13 @@ T DivAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct DivFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -536,7 +536,9 @@ bool DivAtomicTest( T i0, T i1 ) bool passed = true; - if ( (resSerial-res)*(resSerial-res) > 1e-10 ) { + using std::abs; + using Kokkos::abs; + if ( abs( (resSerial-res) * 1.) > 1e-5 ) { passed = false; std::cout << "Loop<" @@ -579,13 +581,13 @@ T ModAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct ModFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -657,13 +659,13 @@ T AndAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct AndFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -735,13 +737,13 @@ T OrAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct OrFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -813,13 +815,13 @@ T XorAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct XorFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -891,13 +893,13 @@ T LShiftAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct LShiftFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -969,13 +971,13 @@ T RShiftAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct RShiftFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations_complexdouble.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations_complexdouble.hpp new file mode 100644 index 0000000000..a8474d8952 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestAtomicOperations_complexdouble.hpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { +TEST_F( TEST_CATEGORY , atomic_operations_complexdouble ) +{ + const int start = 1; // Avoid zero for division. + const int end = 11; + for ( int i = start; i < end; ++i ) + { + ASSERT_TRUE( ( TestAtomicOperations::MulAtomicTest< Kokkos::complex, TEST_EXECSPACE >( start , end - i) ) ); + ASSERT_TRUE( ( TestAtomicOperations::DivAtomicTest< Kokkos::complex, TEST_EXECSPACE >( start , end - i) ) ); + } +} +} diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations_complexfloat.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations_complexfloat.hpp new file mode 100644 index 0000000000..961418e675 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestAtomicOperations_complexfloat.hpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { +TEST_F( TEST_CATEGORY , atomic_operations_complexfloat ) +{ + const int start = 1; // Avoid zero for division. + const int end = 11; + for ( int i = start; i < end; ++i ) + { + ASSERT_TRUE( ( TestAtomicOperations::MulAtomicTest< Kokkos::complex, TEST_EXECSPACE >( start , end - i) ) ); + ASSERT_TRUE( ( TestAtomicOperations::DivAtomicTest< Kokkos::complex, TEST_EXECSPACE >( start , end - i) ) ); + } +} +} diff --git a/lib/kokkos/core/unit_test/TestCXX11.hpp b/lib/kokkos/core/unit_test/TestCXX11.hpp index 8a158e2667..542b4a1912 100644 --- a/lib/kokkos/core/unit_test/TestCXX11.hpp +++ b/lib/kokkos/core/unit_test/TestCXX11.hpp @@ -235,6 +235,7 @@ double ReduceTestFunctor() { else { Kokkos::parallel_reduce( policy_type( 25, Kokkos::AUTO ), FunctorReduceTest< DeviceType >( a ), unmanaged_result( & result ) ); } + Kokkos::fence(); return result; } @@ -281,6 +282,7 @@ double ReduceTestLambda() { } }, unmanaged_result( & result ) ); } + Kokkos::fence(); return result; } diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp index e6b5c48d3d..07c332a9ae 100644 --- a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp +++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp @@ -102,7 +102,7 @@ bool Test() { AddFunctor< DeviceType > f( a, b ); Kokkos::parallel_for( 1024, f ); - DeviceType::fence(); + DeviceType().fence(); return true; } diff --git a/lib/kokkos/core/unit_test/TestDeepCopy.hpp b/lib/kokkos/core/unit_test/TestDeepCopy.hpp new file mode 100644 index 0000000000..aebf263290 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDeepCopy.hpp @@ -0,0 +1,167 @@ +#include + +namespace Test { + +namespace Impl { +template +struct TestDeepCopy { + + typedef Kokkos::View a_base_t; + typedef Kokkos::View b_base_t; + typedef Kokkos::View a_char_t; + typedef Kokkos::View b_char_t; + + typedef Kokkos::RangePolicy policyA_t; + typedef Kokkos::RangePolicy policyB_t; + + static void reset_a_copy_and_b(Kokkos::View a_char_copy, Kokkos::View b_char) { + const int N = b_char.extent(0); + Kokkos::parallel_for("TestDeepCopy: FillA_copy",policyA_t(0,N), KOKKOS_LAMBDA (const int& i) { + a_char_copy(i) = char(0); + }); + Kokkos::parallel_for("TestDeepCopy: FillB",policyB_t(0,N), KOKKOS_LAMBDA (const int& i) { + b_char(i) = char(0); + }); + } + + static int compare_equal(Kokkos::View a_char_copy, Kokkos::View a_char) { + const int N = a_char.extent(0); + int errors; + Kokkos::parallel_reduce("TestDeepCopy: FillA_copy",policyA_t(0,N), KOKKOS_LAMBDA (const int& i, int& lsum) { + if(a_char_copy(i) != a_char(i)) lsum++; + },errors); + return errors; + } + + static void run_test(int num_bytes) { + a_base_t a_base("test_space_to_space",(num_bytes+128)/8); + a_base_t a_base_copy("test_space_to_space",(num_bytes+128)/8); + Kokkos::View b_base("test_space_to_space",(num_bytes+128)/8); + + Kokkos::View a_char((char*) a_base.data(),a_base.extent(0)*8); + Kokkos::View a_char_copy((char*) a_base_copy.data(),a_base.extent(0)*8); + Kokkos::View b_char((char*) b_base.data(),b_base.extent(0)*8); + + Kokkos::parallel_for("TestDeepCopy: FillA",policyA_t(0,a_char.extent(0)), KOKKOS_LAMBDA (const int& i) { + a_char(i) = static_cast(i%97)+1; + }); + + reset_a_copy_and_b(a_char_copy, b_char); + + { + int check = compare_equal(a_char_copy,a_char); + ASSERT_EQ( check, a_char.extent(0) ); + } + + // (a.data()%8, (a.data()+a.extent(0))%8, b.data()%8, (b.data()+b.extent(0))%8 + // (0,0,0,0) + { + int a_begin = 0; + int a_end = 0; + int b_begin = 0; + int b_end = 0; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 0; + int a_end = 5; + int b_begin = 0; + int b_end = 5; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 3; + int a_end = 0; + int b_begin = 3; + int b_end = 0; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 3; + int a_end = 6; + int b_begin = 3; + int b_end = 6; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 5; + int a_end = 4; + int b_begin = 3; + int b_end = 6; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 0; + int a_end = 8; + int b_begin = 2; + int b_end = 6; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 2; + int a_end = 6; + int b_begin = 0; + int b_end = 8; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + } +}; +} + +TEST_F( TEST_CATEGORY, deep_copy_alignment ) +{ + { Impl::TestDeepCopy< TEST_EXECSPACE::memory_space , TEST_EXECSPACE::memory_space >::run_test( 100000 ); } + { Impl::TestDeepCopy< Kokkos::HostSpace , TEST_EXECSPACE::memory_space >::run_test( 100000 ); } + { Impl::TestDeepCopy< TEST_EXECSPACE::memory_space , Kokkos::HostSpace >::run_test( 100000 ); } +} + +} diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp index 1e1418fcbf..1261948f87 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -222,6 +222,14 @@ void check_correct_initialization( const Kokkos::InitArguments & argstruct ) { expected_nthreads = 1; } #endif + +#ifdef KOKKOS_ENABLE_HPX + // HPX uses all cores on machine by default. Skip this test. + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Experimental::HPX >::value || + std::is_same< Kokkos::DefaultHostExecutionSpace, Kokkos::Experimental::HPX >::value ) { + return; + } +#endif } int expected_numa = argstruct.num_numa; diff --git a/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp b/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp new file mode 100644 index 0000000000..31bda530a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp @@ -0,0 +1,904 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include +#include +#include +#include + +#include + +namespace Test { + + template + void impl_test_local_deepcopy_teampolicy_rank_1 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, lid, Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, lid, Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_2 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_3 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_4 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_5 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_6 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_7 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + Kokkos::deep_copy( A, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_1 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_2 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_3 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_4 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_5 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_6 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_7 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + Kokkos::deep_copy( A, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i ViewType; + + { //Rank-1 + impl_test_local_deepcopy_teampolicy_rank_1(8); + } + { //Rank-2 + impl_test_local_deepcopy_teampolicy_rank_2(8); + } + { //Rank-3 + impl_test_local_deepcopy_teampolicy_rank_3(8); + } + { //Rank-4 + impl_test_local_deepcopy_teampolicy_rank_4(8); + } + { //Rank-5 + impl_test_local_deepcopy_teampolicy_rank_5(8); + } + { //Rank-6 + impl_test_local_deepcopy_teampolicy_rank_6(8); + } + { //Rank-7 + impl_test_local_deepcopy_teampolicy_rank_7(8); + } +} +//------------------------------------------------------------------------------------------------------------- +TEST_F( TEST_CATEGORY , local_deepcopy_rangepolicy_layoutleft ) +{ + typedef TEST_EXECSPACE ExecSpace; + typedef Kokkos::View ViewType; + + { //Rank-1 + impl_test_local_deepcopy_rangepolicy_rank_1(8); + } + { //Rank-2 + impl_test_local_deepcopy_rangepolicy_rank_2(8); + } + { //Rank-3 + impl_test_local_deepcopy_rangepolicy_rank_3(8); + } + { //Rank-4 + impl_test_local_deepcopy_rangepolicy_rank_4(8); + } + { //Rank-5 + impl_test_local_deepcopy_rangepolicy_rank_5(8); + } + { //Rank-6 + impl_test_local_deepcopy_rangepolicy_rank_6(8); + } + { //Rank-7 + impl_test_local_deepcopy_rangepolicy_rank_7(8); + } +} +//------------------------------------------------------------------------------------------------------------- +TEST_F( TEST_CATEGORY , local_deepcopy_teampolicy_layoutright ) +{ + typedef TEST_EXECSPACE ExecSpace; + typedef Kokkos::View ViewType; + + { //Rank-1 + impl_test_local_deepcopy_teampolicy_rank_1(8); + } + { //Rank-2 + impl_test_local_deepcopy_teampolicy_rank_2(8); + } + { //Rank-3 + impl_test_local_deepcopy_teampolicy_rank_3(8); + } + { //Rank-4 + impl_test_local_deepcopy_teampolicy_rank_4(8); + } + { //Rank-5 + impl_test_local_deepcopy_teampolicy_rank_5(8); + } + { //Rank-6 + impl_test_local_deepcopy_teampolicy_rank_6(8); + } + { //Rank-7 + impl_test_local_deepcopy_teampolicy_rank_7(8); + } +} +//------------------------------------------------------------------------------------------------------------- +TEST_F( TEST_CATEGORY , local_deepcopy_rangepolicy_layoutright ) +{ + typedef TEST_EXECSPACE ExecSpace; + typedef Kokkos::View ViewType; + + { //Rank-1 + impl_test_local_deepcopy_rangepolicy_rank_1(8); + } + { //Rank-2 + impl_test_local_deepcopy_rangepolicy_rank_2(8); + } + { //Rank-3 + impl_test_local_deepcopy_rangepolicy_rank_3(8); + } + { //Rank-4 + impl_test_local_deepcopy_rangepolicy_rank_4(8); + } + { //Rank-5 + impl_test_local_deepcopy_rangepolicy_rank_5(8); + } + { //Rank-6 + impl_test_local_deepcopy_rangepolicy_rank_6(8); + } + { //Rank-7 + impl_test_local_deepcopy_rangepolicy_rank_7(8); + } +} +#endif +#endif +} diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp index a382a20700..cea89a4872 100644 --- a/lib/kokkos/core/unit_test/TestMDRange.hpp +++ b/lib/kokkos/core/unit_test/TestMDRange.hpp @@ -351,6 +351,7 @@ struct TestMDRange_2D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 ); @@ -931,6 +932,7 @@ struct TestMDRange_3D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); @@ -1502,6 +1504,7 @@ struct TestMDRange_4D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); @@ -2089,6 +2092,7 @@ struct TestMDRange_5D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); @@ -2607,6 +2611,7 @@ struct TestMDRange_6D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); diff --git a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp index efbb32e387..be744a7712 100644 --- a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp +++ b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -476,6 +476,9 @@ private: void test_run_time_parameters_type() { int league_size = 131; int team_size = 4 < policy_t::execution_space::concurrency() ? 4 : policy_t::execution_space::concurrency(); +#ifdef KOKKOS_ENABLE_HPX + team_size = 1; +#endif int chunk_size = 4; int per_team_scratch = 1024; int per_thread_scratch = 16; diff --git a/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp b/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp index fe947fe14e..293cc0ca59 100644 --- a/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp +++ b/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp @@ -453,15 +453,18 @@ struct TestReduceCombinatoricalInstantiation { result_view() = 0; CallParallelReduce( args..., result_view ); + Kokkos::fence(); ASSERT_EQ( expected_result, result_view() ); value = 0; CallParallelReduce( args..., Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits >( &value ) ); + Kokkos::fence(); ASSERT_EQ( expected_result, value ); result_view() = 0; const Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits > result_view_const_um = result_view; CallParallelReduce( args..., result_view_const_um ); + Kokkos::fence(); ASSERT_EQ( expected_result, result_view_const_um() ); value = 0; @@ -526,18 +529,21 @@ struct TestReduceCombinatoricalInstantiation { h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarFinal< ISTEAM >( result_view ) ); + Kokkos::fence(); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal< ISTEAM >( result_view ) ); + Kokkos::fence(); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit< ISTEAM >( result_view ) ); + Kokkos::fence(); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); } diff --git a/lib/kokkos/core/unit_test/TestReduceDeviceView.hpp b/lib/kokkos/core/unit_test/TestReduceDeviceView.hpp index 4f65166e37..d55c5449bc 100644 --- a/lib/kokkos/core/unit_test/TestReduceDeviceView.hpp +++ b/lib/kokkos/core/unit_test/TestReduceDeviceView.hpp @@ -30,7 +30,7 @@ void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor TestIsAsynchFunctor(atomic_test)); double time0 = timer.seconds(); timer.reset(); - ExecSpace::execution_space::fence(); + typename ExecSpace::execution_space().fence(); double time_fence0 = timer.seconds(); Kokkos::deep_copy(result,0); timer.reset(); @@ -42,7 +42,7 @@ void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor double time1 = timer.seconds(); // Check whether it was asyncronous timer.reset(); - ExecSpace::execution_space::fence(); + typename ExecSpace::execution_space().fence(); double time_fence1 = timer.seconds(); Kokkos::deep_copy(reducer_result,result); Kokkos::deep_copy(result,0); @@ -55,7 +55,7 @@ void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor double time2 = timer.seconds(); // Check whether it was asyncronous timer.reset(); - ExecSpace::execution_space::fence(); + typename ExecSpace::execution_space().fence(); double time_fence2 = timer.seconds(); Kokkos::deep_copy(view_result,result); Kokkos::deep_copy(result,0); @@ -69,7 +69,7 @@ void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor // Check whether it was asyncronous timer.reset(); - ExecSpace::execution_space::fence(); + typename ExecSpace::execution_space().fence(); double time_fence3 = timer.seconds(); ASSERT_EQ(N,scalar_result); diff --git a/lib/kokkos/core/unit_test/TestReducers.hpp b/lib/kokkos/core/unit_test/TestReducers.hpp index 7270ea3375..1d77574412 100644 --- a/lib/kokkos/core/unit_test/TestReducers.hpp +++ b/lib/kokkos/core/unit_test/TestReducers.hpp @@ -319,6 +319,7 @@ struct TestReducers { sum_view() = init; Kokkos::Sum< Scalar > reducer_view( sum_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar sum_view_scalar = sum_view(); ASSERT_EQ( sum_view_scalar, reference_sum ); @@ -365,6 +366,7 @@ struct TestReducers { prod_view() = init; Kokkos::Prod< Scalar > reducer_view( prod_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar prod_view_scalar = prod_view(); ASSERT_EQ( prod_view_scalar, reference_prod ); @@ -412,6 +414,7 @@ struct TestReducers { min_view() = init; Kokkos::Min< Scalar > reducer_view( min_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar min_view_scalar = min_view(); ASSERT_EQ( min_view_scalar, reference_min ); @@ -459,6 +462,7 @@ struct TestReducers { max_view() = init; Kokkos::Max< Scalar > reducer_view( max_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar max_view_scalar = max_view(); ASSERT_EQ( max_view_scalar, reference_max ); @@ -517,6 +521,7 @@ struct TestReducers { Kokkos::View< value_type, Kokkos::HostSpace > min_view( "View" ); Kokkos::MinLoc< Scalar, int > reducer_view( min_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); value_type min_view_scalar = min_view(); ASSERT_EQ( min_view_scalar.val, reference_min ); @@ -577,6 +582,7 @@ struct TestReducers { Kokkos::View< value_type, Kokkos::HostSpace > max_view( "View" ); Kokkos::MaxLoc< Scalar, int > reducer_view( max_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); value_type max_view_scalar = max_view(); ASSERT_EQ( max_view_scalar.val, reference_max ); @@ -687,6 +693,7 @@ struct TestReducers { Kokkos::View< value_type, Kokkos::HostSpace > minmax_view( "View" ); Kokkos::MinMaxLoc< Scalar, int > reducer_view( minmax_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); value_type minmax_view_scalar = minmax_view(); ASSERT_EQ( minmax_view_scalar.min_val, reference_min ); @@ -740,6 +747,7 @@ struct TestReducers { band_view() = init; Kokkos::BAnd< Scalar > reducer_view( band_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar band_view_scalar = band_view(); ASSERT_EQ( band_view_scalar, reference_band ); @@ -786,6 +794,7 @@ struct TestReducers { bor_view() = init; Kokkos::BOr< Scalar > reducer_view( bor_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar bor_view_scalar = bor_view(); ASSERT_EQ( bor_view_scalar, reference_bor ); @@ -832,6 +841,7 @@ struct TestReducers { land_view() = init; Kokkos::LAnd< Scalar > reducer_view( land_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar land_view_scalar = land_view(); ASSERT_EQ( land_view_scalar, reference_land ); @@ -878,6 +888,7 @@ struct TestReducers { lor_view() = init; Kokkos::LOr< Scalar > reducer_view( lor_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar lor_view_scalar = lor_view(); ASSERT_EQ( lor_view_scalar, reference_lor ); diff --git a/lib/kokkos/core/unit_test/TestScan.hpp b/lib/kokkos/core/unit_test/TestScan.hpp index e021ed09f5..eaebb254a7 100644 --- a/lib/kokkos/core/unit_test/TestScan.hpp +++ b/lib/kokkos/core/unit_test/TestScan.hpp @@ -96,6 +96,7 @@ struct TestScan { long long int total = 0; Kokkos::parallel_scan( N, *this, total ); + run_check( size_t( ( N+1 )*N/2 ), size_t( total ) ); check_error(); } @@ -109,6 +110,8 @@ struct TestScan { errors = errors_a; Kokkos::parallel_scan( exec_policy( Start , N ) , *this ); + Kokkos::fence(); + check_error(); } @@ -138,7 +141,7 @@ TEST_F( TEST_CATEGORY, scan ) TestScan< TEST_EXECSPACE >( 0 ); TestScan< TEST_EXECSPACE >( 100000 ); TestScan< TEST_EXECSPACE >( 10000000 ); - TEST_EXECSPACE::fence(); + TEST_EXECSPACE().fence(); } @@ -153,7 +156,7 @@ TEST_F( TEST_CATEGORY, scan ) TestScanFunctor( 1000000 ); TestScanFunctor( 10000000 ); - TEST_EXECSPACE::fence(); + TEST_EXECSPACE().fence(); }*/ diff --git a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp index 1a942b89c8..c475fe55dc 100644 --- a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp +++ b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp @@ -107,6 +107,8 @@ void test_shared_alloc() ASSERT_EQ( r[i], RecordMemS::get_record( r[i]->data() ) ); }); + Kokkos::fence(); + #ifdef KOKKOS_DEBUG // Sanity check for the whole set of allocation records to which this record belongs. RecordBase::is_sane( r[0] ); @@ -120,6 +122,8 @@ void test_shared_alloc() #endif } }); + + Kokkos::fence(); } { @@ -145,6 +149,8 @@ void test_shared_alloc() ASSERT_EQ( r[i], RecordMemS::get_record( r[i]->data() ) ); }); + Kokkos::fence(); + #ifdef KOKKOS_DEBUG RecordBase::is_sane( r[0] ); #endif @@ -157,6 +163,8 @@ void test_shared_alloc() } }); + Kokkos::fence(); + ASSERT_EQ( destroy_count, int( N ) ); } @@ -196,12 +204,14 @@ void test_shared_alloc() ASSERT_EQ( track.use_count(), 1 ); } - Kokkos::parallel_for( range, [=] ( size_t i ) { + Kokkos::parallel_for( range, [=] ( size_t ) { Tracker local_tracker; local_tracker.assign_allocated_record_to_uninitialized( rec ); ASSERT_GT( rec->use_count(), 1 ); }); + Kokkos::fence(); + ASSERT_EQ( rec->use_count(), 1 ); ASSERT_EQ( track.use_count(), 1 ); diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp index ac32a01fb8..361e8da9e1 100644 --- a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp +++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp @@ -47,11 +47,15 @@ #include #if defined( KOKKOS_ENABLE_TASKDAG ) #include +#include #include #include #include +//============================================================================== +// {{{1 + namespace TestTaskScheduler { namespace { @@ -72,29 +76,30 @@ long eval_fib( long n ) } -template< typename Space > +template< typename Scheduler > struct TestFib { - typedef Kokkos::TaskScheduler< Space > sched_type; - typedef Kokkos::Future< long, Space > future_type; - typedef long value_type; + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture< long, Scheduler >; + using value_type = long; - sched_type sched; future_type fib_m1; future_type fib_m2; const value_type n; KOKKOS_INLINE_FUNCTION - TestFib( const sched_type & arg_sched, const value_type arg_n ) - : sched( arg_sched ), fib_m1(), fib_m2(), n( arg_n ) {} + TestFib( const value_type arg_n ) + : fib_m1(), fib_m2(), n( arg_n ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename sched_type::member_type &, value_type & result ) + void operator()( typename sched_type::member_type & member, value_type & result ) { #if 0 printf( "\nTestFib(%ld) %d %d\n", n, int( !fib_m1.is_null() ), int( !fib_m2.is_null() ) ); #endif + auto& sched = member.scheduler(); + if ( n < 2 ) { result = n; } @@ -107,13 +112,13 @@ struct TestFib // path to completion. fib_m2 = Kokkos::task_spawn( Kokkos::TaskSingle( sched, Kokkos::TaskPriority::High ) - , TestFib( sched, n - 2 ) ); + , TestFib( n - 2 ) ); fib_m1 = Kokkos::task_spawn( Kokkos::TaskSingle( sched ) - , TestFib( sched, n - 1 ) ); + , TestFib( n - 1 ) ); - Kokkos::Future< Space > dep[] = { fib_m1, fib_m2 }; - Kokkos::Future< Space > fib_all = Kokkos::when_all( dep, 2 ); + Kokkos::BasicFuture dep[] = { fib_m1, fib_m2 }; + Kokkos::BasicFuture fib_all = sched.when_all( dep, 2 ); if ( !fib_m2.is_null() && !fib_m1.is_null() && !fib_all.is_null() ) { // High priority to retire this branch. @@ -123,9 +128,9 @@ struct TestFib #if 1 printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" , n - , sched.allocation_capacity() - , sched.allocated_task_count_max() - , sched.allocated_task_count_accum() + , 0 //sched.allocation_capacity() + , 0 //sched.allocated_task_count_max() + , 0l //sched.allocated_task_count_accum() ); #endif @@ -149,12 +154,18 @@ struct TestFib , std::min(size_t(MaxBlockSize),MemoryCapacity) , std::min(size_t(SuperBlockSize),MemoryCapacity) ); - future_type f = Kokkos::host_spawn( Kokkos::TaskSingle( root_sched ) - , TestFib( root_sched, i ) ); + { + future_type f = Kokkos::host_spawn( Kokkos::TaskSingle( root_sched ) + , TestFib( i ) ); + + Kokkos::wait( root_sched ); + + ASSERT_EQ( eval_fib( i ), f.get() ); + } + + ASSERT_EQ(root_sched.queue().allocation_count(), 0); - Kokkos::wait( root_sched ); - ASSERT_EQ( eval_fib( i ), f.get() ); #if 0 fprintf( stdout, "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" @@ -172,32 +183,36 @@ struct TestFib } // namespace TestTaskScheduler +// end TestFib }}}1 +//============================================================================== + //---------------------------------------------------------------------------- +//============================================================================== +// {{{1 + namespace TestTaskScheduler { -template< class Space > +template< class Scheduler > struct TestTaskDependence { - typedef Kokkos::TaskScheduler< Space > sched_type; - typedef Kokkos::Future< Space > future_type; - typedef Kokkos::View< long, Space > accum_type; + typedef Scheduler sched_type; + typedef Kokkos::BasicFuture< void, Scheduler > future_type; + typedef Kokkos::View< long, typename sched_type::execution_space > accum_type; typedef void value_type; - sched_type m_sched; accum_type m_accum; long m_count; KOKKOS_INLINE_FUNCTION TestTaskDependence( long n - , const sched_type & arg_sched , const accum_type & arg_accum ) - : m_sched( arg_sched ) - , m_accum( arg_accum ) + : m_accum( arg_accum ) , m_count( n ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename sched_type::member_type & ) + void operator()( typename sched_type::member_type & member ) { + auto& sched = member.scheduler(); enum { CHUNK = 8 }; const int n = CHUNK < m_count ? CHUNK : m_count; @@ -206,14 +221,14 @@ struct TestTaskDependence { const int increment = ( m_count + n - 1 ) / n; future_type f = - m_sched.when_all( n , [this,increment]( int i ) { + sched.when_all( n , [this,&member,increment]( int i ) { const long inc = increment ; const long begin = i * inc ; const long count = begin + inc < m_count ? inc : m_count - begin ; return Kokkos::task_spawn - ( Kokkos::TaskSingle( m_sched ) - , TestTaskDependence( count, m_sched, m_accum ) ); + ( Kokkos::TaskSingle( member.scheduler() ) + , TestTaskDependence( count, m_accum ) ); }); m_count = 0; @@ -244,7 +259,7 @@ struct TestTaskDependence { typename accum_type::HostMirror host_accum = Kokkos::create_mirror_view( accum ); - Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskDependence( n, sched, accum ) ); + Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskDependence( n, accum ) ); Kokkos::wait( sched ); @@ -256,22 +271,25 @@ struct TestTaskDependence { } // namespace TestTaskScheduler +// end TestTaskDependence }}}1 +//============================================================================== + //---------------------------------------------------------------------------- namespace TestTaskScheduler { -template< class ExecSpace > +template< class Scheduler > struct TestTaskTeam { //enum { SPAN = 8 }; enum { SPAN = 33 }; //enum { SPAN = 1 }; typedef void value_type; - typedef Kokkos::TaskScheduler< ExecSpace > sched_type; - typedef Kokkos::Future< ExecSpace > future_type; + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture; + using ExecSpace = typename sched_type::execution_space; typedef Kokkos::View< long*, ExecSpace > view_type; - sched_type sched; future_type future; view_type parfor_result; @@ -281,14 +299,12 @@ struct TestTaskTeam { const long nvalue; KOKKOS_INLINE_FUNCTION - TestTaskTeam( const sched_type & arg_sched - , const view_type & arg_parfor_result + TestTaskTeam( const view_type & arg_parfor_result , const view_type & arg_parreduce_check , const view_type & arg_parscan_result , const view_type & arg_parscan_check , const long arg_nvalue ) - : sched( arg_sched ) - , future() + : future() , parfor_result( arg_parfor_result ) , parreduce_check( arg_parreduce_check ) , parscan_result( arg_parscan_result ) @@ -298,21 +314,22 @@ struct TestTaskTeam { KOKKOS_INLINE_FUNCTION void operator()( typename sched_type::member_type & member ) { + auto& sched = member.scheduler(); const long end = nvalue + 1; + // begin = max(end - SPAN, 0); const long begin = 0 < end - SPAN ? end - SPAN : 0; if ( 0 < begin && future.is_null() ) { if ( member.team_rank() == 0 ) { future = Kokkos::task_spawn( Kokkos::TaskTeam( sched ) - , TestTaskTeam( sched - , parfor_result + , TestTaskTeam( parfor_result , parreduce_check , parscan_result , parscan_check , begin - 1 ) ); - #ifndef __HCC_ACCELERATOR__ + #if !defined(__HCC_ACCELERATOR__) && !defined(__CUDA_ARCH__) assert( !future.is_null() ); #endif @@ -449,8 +466,7 @@ struct TestTaskTeam { host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); future_type f = Kokkos::host_spawn( Kokkos::TaskTeam( root_sched ) - , TestTaskTeam( root_sched - , root_parfor_result + , TestTaskTeam( root_parfor_result , root_parreduce_check , root_parscan_result , root_parscan_check @@ -492,27 +508,25 @@ struct TestTaskTeam { } }; -template< class ExecSpace > +template< class Scheduler > struct TestTaskTeamValue { enum { SPAN = 8 }; typedef long value_type; - typedef Kokkos::TaskScheduler< ExecSpace > sched_type; - typedef Kokkos::Future< value_type, ExecSpace > future_type; + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture< value_type, sched_type >; + using ExecSpace = typename sched_type::execution_space; typedef Kokkos::View< long*, ExecSpace > view_type; - sched_type sched; future_type future; view_type result; const long nvalue; KOKKOS_INLINE_FUNCTION - TestTaskTeamValue( const sched_type & arg_sched - , const view_type & arg_result + TestTaskTeamValue( const view_type & arg_result , const long arg_nvalue ) - : sched( arg_sched ) - , future() + : future() , result( arg_result ) , nvalue( arg_nvalue ) {} @@ -523,12 +537,16 @@ struct TestTaskTeamValue { const long end = nvalue + 1; const long begin = 0 < end - SPAN ? end - SPAN : 0; + auto& sched = member.scheduler(); + if ( 0 < begin && future.is_null() ) { if ( member.team_rank() == 0 ) { - future = sched.task_spawn( TestTaskTeamValue( sched, result, begin - 1 ) + future = sched.task_spawn( TestTaskTeamValue( result, begin - 1 ) , Kokkos::TaskTeam ); + #if !defined(__HCC_ACCELERATOR__) && !defined(__CUDA_ARCH__) assert( !future.is_null() ); + #endif sched.respawn( this , future ); } @@ -565,7 +583,7 @@ struct TestTaskTeamValue { typename view_type::HostMirror host_result = Kokkos::create_mirror_view( root_result ); - future_type fv = root_sched.host_spawn( TestTaskTeamValue( root_sched, root_result, n ) + future_type fv = root_sched.host_spawn( TestTaskTeamValue( root_result, n ) , Kokkos::TaskTeam ); Kokkos::wait( root_sched ); @@ -594,31 +612,30 @@ struct TestTaskTeamValue { namespace TestTaskScheduler { -template< class Space > +template< class Scheduler > struct TestTaskSpawnWithPool { - typedef Kokkos::TaskScheduler< Space > sched_type; - typedef Kokkos::Future< Space > future_type; + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture; typedef void value_type; + using Space = typename sched_type::execution_space; - sched_type m_sched ; int m_count ; Kokkos::MemoryPool m_pool ; KOKKOS_INLINE_FUNCTION - TestTaskSpawnWithPool( const sched_type & arg_sched - , const int & arg_count - , const Kokkos::MemoryPool & arg_pool - ) - : m_sched( arg_sched ) - , m_count( arg_count ) + TestTaskSpawnWithPool( + const int & arg_count, + const Kokkos::MemoryPool & arg_pool + ) + : m_count( arg_count ) , m_pool( arg_pool ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename sched_type::member_type & ) + void operator()( typename sched_type::member_type & member ) { if ( m_count ) { - Kokkos::task_spawn( Kokkos::TaskSingle( m_sched ) , TestTaskSpawnWithPool( m_sched , m_count - 1, m_pool ) ); + Kokkos::task_spawn( Kokkos::TaskSingle( member.scheduler() ) , TestTaskSpawnWithPool( m_count - 1, m_pool ) ); } } @@ -639,7 +656,7 @@ struct TestTaskSpawnWithPool { using other_memory_space = typename Space::memory_space; Kokkos::MemoryPool pool(other_memory_space(), 10000, 100, 200, 1000); - auto f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskSpawnWithPool( sched, 3, pool ) ); + auto f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskSpawnWithPool( 3, pool ) ); Kokkos::wait( sched ); } @@ -647,36 +664,307 @@ struct TestTaskSpawnWithPool { } -namespace Test { +//---------------------------------------------------------------------------- -TEST_F( TEST_CATEGORY, task_fib ) -{ - const int N = 27 ; - for ( int i = 0; i < N; ++i ) { - TestTaskScheduler::TestFib< TEST_EXECSPACE >::run( i , ( i + 1 ) * ( i + 1 ) * 2000 ); +namespace TestTaskScheduler { + +template< class Scheduler > +struct TestTaskCtorsDevice { + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture; + using value_type = void; + using Space = typename sched_type::execution_space; + + int m_count; + + KOKKOS_INLINE_FUNCTION + TestTaskCtorsDevice(const int & arg_count) : m_count(arg_count) { } + + KOKKOS_INLINE_FUNCTION + void operator()(typename sched_type::member_type& member ) + { + // Note: Default construction on the device is not allowed + if(m_count == 4) { + Kokkos::task_spawn( + Kokkos::TaskSingle(member.scheduler()), + TestTaskCtorsDevice(m_count - 1) + ); + } + else if(m_count == 3) { + sched_type s = member.scheduler(); // move construct + s = member.scheduler(); // move assignment + Kokkos::task_spawn( + Kokkos::TaskSingle(s), + TestTaskCtorsDevice(m_count - 1) + ); + } + else if(m_count == 2) { + sched_type s3 = member.scheduler(); // move construct from member.scheduler(); + Kokkos::task_spawn( + Kokkos::TaskSingle(s3), + TestTaskCtorsDevice(m_count - 1) + ); + } + else if(m_count == 1) { + sched_type s = member.scheduler(); // move construct from member.scheduler(); + sched_type s2 = s; // copy construct from s + Kokkos::task_spawn( + Kokkos::TaskSingle(s2), + TestTaskCtorsDevice(m_count - 1) + ); + } } -} -TEST_F( TEST_CATEGORY, task_depend ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestTaskDependence< TEST_EXECSPACE >::run( i ); + static void run() + { + using memory_space = typename sched_type::memory_space; + + enum { MemoryCapacity = 16000 }; + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 4096 }; + + sched_type sched( + memory_space(), MemoryCapacity, MinBlockSize, MaxBlockSize, SuperBlockSize + ); + + auto f = Kokkos::host_spawn( + Kokkos::TaskSingle(sched), + TestTaskCtorsDevice(4) + ); + + Kokkos::wait(sched); + + // TODO assertions and sanity checks + } -} - -TEST_F( TEST_CATEGORY, task_team ) -{ - TestTaskScheduler::TestTaskTeam< TEST_EXECSPACE >::run( 1000 ); - //TestTaskScheduler::TestTaskTeamValue< TEST_EXECSPACE >::run( 1000 ); // Put back after testing. -} - -TEST_F( TEST_CATEGORY, task_with_mempool ) -{ - TestTaskScheduler::TestTaskSpawnWithPool< TEST_EXECSPACE >::run(); -} +}; } +//---------------------------------------------------------------------------- + + +namespace TestTaskScheduler { + +template +struct TestMultipleDependence { + + using sched_type = Scheduler; + using future_bool = Kokkos::BasicFuture; + using future_int = Kokkos::BasicFuture; + using value_type = bool; + using execution_space = typename sched_type::execution_space; + + enum : int { NPerDepth = 6 }; + enum : int { NFanout = 3 }; + + // xlC doesn't like incomplete aggregate constructors, so we have do do this manually: + KOKKOS_INLINE_FUNCTION + TestMultipleDependence(int depth, int max_depth) + : m_depth(depth), + m_max_depth(max_depth), + m_dep() + { + // gcc 4.8 has an internal compile error when I give the initializer in the class, so I have do do it here + for(int i = 0; i < NPerDepth; ++i) { + m_result_futures[i] = future_bool(); + } + } + + // xlC doesn't like incomplete aggregate constructors, so we have do do this manually: + KOKKOS_INLINE_FUNCTION + TestMultipleDependence(int depth, int max_depth, future_int dep) + : m_depth(depth), + m_max_depth(max_depth), + m_dep(dep) + { + // gcc 4.8 has an internal compile error when I give the initializer in the class, so I have do do it here + for(int i = 0; i < NPerDepth; ++i) { + m_result_futures[i] = future_bool(); + } + } + + int m_depth; + int m_max_depth; + future_int m_dep; + future_bool m_result_futures[NPerDepth]; + + + struct TestCheckReady { + future_int m_dep; + using value_type = bool; + KOKKOS_INLINE_FUNCTION + void operator()(typename Scheduler::member_type&, bool& value) { + // if it was "transiently" ready, this could be false even if we made it a dependence of this task + value = m_dep.is_ready(); + return; + } + }; + + + struct TestComputeValue { + using value_type = int; + KOKKOS_INLINE_FUNCTION + void operator()(typename Scheduler::member_type&, int& result) { + double value = 0; + // keep this one busy for a while + for(int i = 0; i < 10000; ++i) { + value += i * i / 7.138 / value; + } + // Do something irrelevant + result = int(value) << 2; + return; + } + }; + + + KOKKOS_INLINE_FUNCTION + void operator()(typename sched_type::member_type & member, bool& value) + { + if(m_result_futures[0].is_null()) { + if (m_depth == 0) { + // Spawn one expensive task at the root + m_dep = Kokkos::task_spawn(Kokkos::TaskSingle(member.scheduler()), TestComputeValue{}); + } + + // Then check for it to be ready in a whole bunch of other tasks that race + int n_checkers = NPerDepth; + if(m_depth < m_max_depth) { + n_checkers -= NFanout; + for(int i = n_checkers; i < NPerDepth; ++i) { + m_result_futures[i] = Kokkos::task_spawn(Kokkos::TaskSingle(member.scheduler()), + TestMultipleDependence(m_depth + 1, m_max_depth, m_dep) + ); + } + } + + for(int i = 0; i < n_checkers; ++i) { + m_result_futures[i] = member.scheduler().spawn(Kokkos::TaskSingle(m_dep), TestCheckReady{m_dep}); + } + auto done = member.scheduler().when_all(m_result_futures, NPerDepth); + Kokkos::respawn(this, done); + + return; + } + else { + value = true; + for(int i = 0; i < NPerDepth; ++i) { + value = value && !m_result_futures[i].is_null(); + if(value) { + value = value && m_result_futures[i].get(); + } + } + return; + } + } + + static void run(int depth) + { + typedef typename sched_type::memory_space memory_space; + + enum { MemoryCapacity = 1 << 30 }; + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 4096 }; + + sched_type sched( memory_space() + , MemoryCapacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize ); + + auto f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestMultipleDependence( 0, depth ) ); + + Kokkos::wait( sched ); + + ASSERT_TRUE( f.get() ); + + } +}; + +} + +//---------------------------------------------------------------------------- + +#define KOKKOS_PP_CAT_IMPL(x, y) x ## y +#define KOKKOS_TEST_WITH_SUFFIX(x, y) KOKKOS_PP_CAT_IMPL(x, y) + +#define TEST_SCHEDULER_SUFFIX _deprecated +#define TEST_SCHEDULER Kokkos::DeprecatedTaskScheduler +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + +#define TEST_SCHEDULER_SUFFIX _deprecated_multiple +#define TEST_SCHEDULER Kokkos::DeprecatedTaskSchedulerMultiple +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + + +#define TEST_SCHEDULER_SUFFIX _single +#define TEST_SCHEDULER Kokkos::TaskScheduler +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + +#define TEST_SCHEDULER_SUFFIX _multiple +#define TEST_SCHEDULER Kokkos::TaskSchedulerMultiple +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + + +#define TEST_SCHEDULER_SUFFIX _chase_lev +#define TEST_SCHEDULER Kokkos::ChaseLevTaskScheduler +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + +#if 0 +#define TEST_SCHEDULER_SUFFIX _fixed_mempool +#define TEST_SCHEDULER \ + Kokkos::SimpleTaskScheduler< \ + TEST_EXECSPACE, \ + Kokkos::Impl::SingleTaskQueue< \ + TEST_EXECSPACE, \ + Kokkos::Impl::default_tasking_memory_space_for_execution_space_t, \ + Kokkos::Impl::TaskQueueTraitsLockBased, \ + Kokkos::Impl::FixedBlockSizeMemoryPool< \ + Kokkos::Device>, \ + 128, \ + 16 \ + > \ + > \ + > +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + +#define TEST_SCHEDULER_SUFFIX _fixed_mempool_multiple +#define TEST_SCHEDULER \ + Kokkos::SimpleTaskScheduler< \ + TEST_EXECSPACE, \ + Kokkos::Impl::MultipleTaskQueue< \ + TEST_EXECSPACE, \ + Kokkos::Impl::default_tasking_memory_space_for_execution_space_t, \ + Kokkos::Impl::TaskQueueTraitsLockBased, \ + Kokkos::Impl::FixedBlockSizeMemoryPool< \ + Kokkos::Device>, \ + 128, \ + 16 \ + > \ + > \ + > +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX +#endif + +#undef KOKKOS_TEST_WITH_SUFFIX +#undef KOKKOS_PP_CAT_IMPL + #endif // #if defined( KOKKOS_ENABLE_TASKDAG ) #endif // #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler_single.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler_single.hpp new file mode 100644 index 0000000000..6ac9a6d740 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTaskScheduler_single.hpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace Test { + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_fib, TEST_SCHEDULER_SUFFIX) ) +{ + const int N = 27 ; + for ( int i = 0; i < N; ++i ) { + TestTaskScheduler::TestFib< TEST_SCHEDULER >::run( i , ( i + 1 ) * ( i + 1 ) * 64000 ); + } +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_depend, TEST_SCHEDULER_SUFFIX) ) +{ + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< TEST_SCHEDULER >::run( i ); + } +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_team, TEST_SCHEDULER_SUFFIX) ) +{ + TestTaskScheduler::TestTaskTeam< TEST_SCHEDULER >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< TEST_EXECSPACE >::run( 1000 ); // Put back after testing. +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_with_mempool, TEST_SCHEDULER_SUFFIX) ) +{ + TestTaskScheduler::TestTaskSpawnWithPool::run(); +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_multiple_depend, TEST_SCHEDULER_SUFFIX) ) +{ + for ( int i = 2; i < 6; ++i ) { + TestTaskScheduler::TestMultipleDependence::run( i ); + } +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_scheduler_ctors, TEST_SCHEDULER_SUFFIX) ) +{ + TEST_SCHEDULER sched; + TEST_SCHEDULER sched2 = sched; + sched = sched2; +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_scheduer_ctors_device, TEST_SCHEDULER_SUFFIX) ) +{ + TestTaskScheduler::TestTaskCtorsDevice::run(); +} + + +} // end namespace Test \ No newline at end of file diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp index 487a4d581c..5f325eb905 100644 --- a/lib/kokkos/core/unit_test/TestTeam.hpp +++ b/lib/kokkos/core/unit_test/TestTeam.hpp @@ -72,6 +72,7 @@ struct TestTeamPolicy { const int tid = member.team_rank() + member.team_size() * member.league_rank(); m_flags( member.team_rank(), member.league_rank() ) = tid; + static_assert((std::is_same::value),"TeamMember::execution_space is not the same as TeamPolicy<>::execution_space"); } KOKKOS_INLINE_FUNCTION @@ -265,7 +266,7 @@ public: Kokkos::parallel_reduce( team_exec, functor_type( nwork ), tmp ); } - execution_space::fence(); + execution_space().fence(); for ( unsigned i = 0; i < Repeat; ++i ) { for ( unsigned j = 0; j < Count; ++j ) { @@ -391,7 +392,7 @@ public: Kokkos::deep_copy( functor.accum, total ); Kokkos::parallel_reduce( team_exec, functor, result_type( & error ) ); - DeviceType::fence(); + DeviceType().fence(); Kokkos::deep_copy( accum, functor.accum ); Kokkos::deep_copy( total, functor.total ); @@ -400,7 +401,7 @@ public: ASSERT_EQ( total, accum ); } - execution_space::fence(); + execution_space().fence(); } }; @@ -495,6 +496,7 @@ struct TestSharedTeam { typename Functor::value_type error_count = 0; Kokkos::parallel_reduce( team_exec, Functor(), result_type( & error_count ) ); + Kokkos::fence(); ASSERT_EQ( error_count, 0 ); } @@ -569,6 +571,8 @@ struct TestLambdaSharedTeam { } }, result_type( & error_count ) ); + Kokkos::fence(); + ASSERT_EQ( error_count, 0 ); } }; @@ -679,6 +683,7 @@ struct TestScratchTeam { Kokkos::parallel_reduce( team_exec.set_scratch_size( 1, Kokkos::PerTeam( team_scratch_size ), Kokkos::PerThread( thread_scratch_size ) ), Functor(), result_type( & error_count ) ); + Kokkos::fence(); ASSERT_EQ( error_count, 0 ); } }; @@ -822,7 +827,6 @@ struct ClassNoShmemSizeFunction { Kokkos::TeamPolicy< TagReduce, ExecSpace, ScheduleType > policy( 10, team_size, 16 ); Kokkos::parallel_reduce( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), *this, error ); - Kokkos::fence(); ASSERT_EQ( error, 0 ); } @@ -877,7 +881,6 @@ struct ClassWithShmemSizeFunction { Kokkos::parallel_reduce( policy.set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), *this, error ); - Kokkos::fence(); ASSERT_EQ( error, 0 ); } @@ -929,7 +932,6 @@ void test_team_mulit_level_scratch_test_lambda() { count += test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); }, error ); ASSERT_EQ( error, 0 ); - Kokkos::fence(); #endif #endif } diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index 498d156db3..45433012f9 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -290,17 +290,23 @@ struct functor_team_reduce { functor_team_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} typedef typename ExecutionSpace::scratch_memory_space shmem_space; - typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; - unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_scalar_t; + unsigned team_shmem_size( int team_size ) const { return shared_scalar_t::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { Scalar value = Scalar(); + shared_scalar_t shared_value(team.team_scratch(0),1); Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) { val += i - team.league_rank() + team.league_size() + team.team_size(); }, value ); + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, shared_value(0) ); team.team_barrier(); @@ -314,11 +320,20 @@ struct functor_team_reduce { if ( test != value ) { if ( team.league_rank() == 0 ) { - printf( "FAILED team_parallel_reduce %i %i %f %f %lu\n", + printf( "FAILED team_parallel_reduce %i %i %lf %lf %lu\n", team.league_rank(), team.team_rank(), static_cast( test ), static_cast( value ), sizeof( Scalar ) ); } + flag() = 1; + } + if ( test != shared_value(0) ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED team_parallel_reduce with shared result %i %i %lf %lf %lu\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( shared_value(0) ), sizeof( Scalar ) ); + } + flag() = 1; } }); @@ -335,12 +350,13 @@ struct functor_team_reduce_reducer { functor_team_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} typedef typename ExecutionSpace::scratch_memory_space shmem_space; - typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; - unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_scalar_t; + unsigned team_shmem_size( int team_size ) const { return shared_scalar_t::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { Scalar value = 0; + shared_scalar_t shared_value(team.team_scratch(0),1); Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) { @@ -348,6 +364,13 @@ struct functor_team_reduce_reducer { }, Kokkos::Sum(value) ); + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + Kokkos::Sum(shared_value(0)) + ); team.team_barrier(); @@ -360,12 +383,19 @@ struct functor_team_reduce_reducer { } if ( test != value ) { - printf( "FAILED team_vector_parallel_reduce_reducer %i %i %f %f\n", + printf( "FAILED team_vector_parallel_reduce_reducer %i %i %lf %lf\n", team.league_rank(), team.team_rank(), static_cast( test ), static_cast( value ) ); flag() = 1; } + if ( test != shared_value(0) ) { + printf( "FAILED team_vector_parallel_reduce_reducer shared value %i %i %lf %lf\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( shared_value(0) ) ); + + flag() = 1; + } }); } }; @@ -823,7 +853,6 @@ namespace Test { // ( modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || (defined( KOKKOS_ENABLE_CUDA_LAMBDA ) && (8000 <= CUDA_VERSION)) - template< typename ScalarType, class DeviceType > class TestTripleNestedReduce { @@ -843,6 +872,14 @@ public: if( team_size > size_type(DeviceType::execution_space::concurrency())) team_size = size_type(DeviceType::execution_space::concurrency()); +#ifdef KOKKOS_ENABLE_HPX + team_size = 1; + if (!std::is_same::value) + { + team_size = 1; + } +#endif + //typedef Kokkos::LayoutLeft Layout; typedef Kokkos::LayoutRight Layout; @@ -962,6 +999,8 @@ TEST_F( TEST_CATEGORY, triple_nested_parallelism ) } #endif TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 16 ); + TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 33 ); + TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 19 ); #ifdef KOKKOS_ENABLE_ROCM // ROCm doesn't support team sizes not powers of two if (!std::is_same::value) #endif diff --git a/lib/kokkos/core/unit_test/TestTeamVectorRange.hpp b/lib/kokkos/core/unit_test/TestTeamVectorRange.hpp new file mode 100644 index 0000000000..86c8dab3ff --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTeamVectorRange.hpp @@ -0,0 +1,464 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include +#include +#include +#include +#include + +namespace TestTeamVectorRange { + +struct my_complex { + double re, im; + int dummy; + + KOKKOS_INLINE_FUNCTION + my_complex() { + re = 0.0; + im = 0.0; + dummy = 0; + } + + KOKKOS_INLINE_FUNCTION + my_complex( const my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const volatile my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + volatile my_complex & operator=( const my_complex & src ) volatile { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + volatile my_complex & operator=( const volatile my_complex & src ) volatile { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + my_complex( const volatile my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex( const double & val ) { + re = val; + im = 0.0; + dummy = 0; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator+=( const my_complex & src ) { + re += src.re; + im += src.im; + dummy += src.dummy; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=( const volatile my_complex & src ) volatile { + re += src.re; + im += src.im; + dummy += src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex operator +( const my_complex & src ) { + my_complex tmp = *this; + tmp.re += src.re; + tmp.im += src.im; + tmp.dummy += src.dummy; + return tmp; + } + + KOKKOS_INLINE_FUNCTION + my_complex operator+( const volatile my_complex & src ) volatile { + my_complex tmp = *this; + tmp.re += src.re; + tmp.im += src.im; + tmp.dummy += src.dummy; + return tmp; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator*=( const my_complex & src ) { + double re_tmp = re * src.re - im * src.im; + double im_tmp = re * src.im + im * src.re; + re = re_tmp; + im = im_tmp; + dummy *= src.dummy; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator*=( const volatile my_complex & src ) volatile { + double re_tmp = re * src.re - im * src.im; + double im_tmp = re * src.im + im * src.re; + re = re_tmp; + im = im_tmp; + dummy *= src.dummy; + } + + KOKKOS_INLINE_FUNCTION + bool operator==( const my_complex & src ) { + return ( re == src.re ) && ( im == src.im ) && ( dummy == src.dummy ); + } + + KOKKOS_INLINE_FUNCTION + bool operator!=( const my_complex & src ) { + return ( re != src.re ) || ( im != src.im ) || ( dummy != src.dummy ); + } + + KOKKOS_INLINE_FUNCTION + bool operator!=( const double & val ) { + return ( re != val ) || ( im != 0 ) || ( dummy != 0 ); + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const int & val ) { + re = val; + im = 0.0; + dummy = 0; + return *this; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const double & val ) { + re = val; + im = 0.0; + dummy = 0; + return *this; + } + + KOKKOS_INLINE_FUNCTION + operator double() { + return re; + } +}; +} + +namespace Kokkos { +template<> +struct reduction_identity { + typedef reduction_identity t_red_ident; + KOKKOS_FORCEINLINE_FUNCTION static TestTeamVectorRange::my_complex sum() + {return TestTeamVectorRange::my_complex(t_red_ident::sum());} + KOKKOS_FORCEINLINE_FUNCTION static TestTeamVectorRange::my_complex prod() + {return TestTeamVectorRange::my_complex(t_red_ident::prod());} +}; +} + +namespace TestTeamVectorRange { + +template< typename Scalar, class ExecutionSpace > +struct functor_teamvector_for { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_teamvector_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(131); } + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + typedef typename shmem_space::size_type size_type; + const size_type shmemSize = 131; + shared_int values = shared_int( team.team_shmem(), shmemSize ); + + if ( values.data() == nullptr || values.extent(0) < shmemSize ) { + printf( "FAILED to allocate shared memory of size %u\n", + static_cast( shmemSize ) ); + } + else { + // Initialize shared memory. + Kokkos::parallel_for( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i ) { + values( i ) = 0; + }); + // Wait for all memory to be written. + team.team_barrier(); + + // Accumulate value into per thread shared memory. + // This is non blocking. + Kokkos::parallel_for( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i ) + { + values( i ) += i - team.league_rank() + team.league_size() + team.team_size(); + }); + + // Wait for all memory to be written. + team.team_barrier(); + + // One thread per team executes the comparison. + Kokkos::single( Kokkos::PerTeam( team ), [&] () + { + Scalar test = 0; + Scalar value = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + for ( int i = 0; i < 131; ++i ) { + value += values( i ); + } + + if ( test != value ) { + printf ( "FAILED teamvector_parallel_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); + flag() = 1; + } + }); + } + } +}; + +template< typename Scalar, class ExecutionSpace > +struct functor_teamvector_reduce { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_teamvector_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_scalar_t; + unsigned team_shmem_size( int team_size ) const { return shared_scalar_t::shmem_size(team_size*13); } + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + Scalar value = Scalar(); + shared_scalar_t shared_value(team.team_scratch(0),1); + + Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, shared_value(0) ); + + team.team_barrier(); + Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, value ); + +// Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) +// { +// val += i - team.league_rank() + team.league_size() + team.team_size(); +// }, shared_value(0) ); + + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () + { + Scalar test = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + if ( test != value ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED teamvector_parallel_reduce %i %i %lf %lf %lu\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ), sizeof( Scalar ) ); + } + + flag() = 1; + } + if ( test != shared_value(0) ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED teamvector_parallel_reduce with shared result %i %i %lf %lf %lu\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( shared_value(0) ), sizeof( Scalar ) ); + } + + flag() = 1; + } + }); + } +}; + +template< typename Scalar, class ExecutionSpace > +struct functor_teamvector_reduce_reducer { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_teamvector_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_scalar_t; + unsigned team_shmem_size( int team_size ) const { return shared_scalar_t::shmem_size(team_size*13); } + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + Scalar value = 0; + shared_scalar_t shared_value(team.team_scratch(0),1); + + Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + Kokkos::Sum(value) + ); + + Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + Kokkos::Sum(shared_value(0)) + ); + + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () + { + Scalar test = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + if ( test != value ) { + printf( "FAILED teamvector_parallel_reduce_reducer %i %i %lf %lf\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); + + flag() = 1; + } + if ( test != shared_value(0) ) { + printf( "FAILED teamvector_parallel_reduce_reducer shared value %i %i %lf %lf\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( shared_value(0) ) ); + + flag() = 1; + } + }); + } +}; + +template< typename Scalar, class ExecutionSpace > +bool test_scalar( int nteams, int team_size, int test ) { + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > d_flag( "flag" ); + typename Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace >::HostMirror h_flag( "h_flag" ); + h_flag() = 0; + Kokkos::deep_copy( d_flag, h_flag ); + + if ( test == 0 ) { + Kokkos::parallel_for( "Test::TeamVectorFor", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_teamvector_for< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 1 ) { + Kokkos::parallel_for( "Test::TeamVectorReduce", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_teamvector_reduce< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 2 ) { + Kokkos::parallel_for( "Test::TeamVectorReduceReducer", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_teamvector_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) ); + } + + Kokkos::deep_copy( h_flag, d_flag ); + + return ( h_flag() == 0 ); +} + +template< class ExecutionSpace > +bool Test( int test ) { + bool passed = true; + + int team_size = 33; + if( team_size > int(ExecutionSpace::concurrency())) + team_size = int(ExecutionSpace::concurrency()); + passed = passed && test_scalar< int, ExecutionSpace >( 317, team_size, test ); + passed = passed && test_scalar< long long int, ExecutionSpace >( 317, team_size, test ); + passed = passed && test_scalar< float, ExecutionSpace >( 317, team_size, test ); + passed = passed && test_scalar< double, ExecutionSpace >( 317, team_size, test ); + passed = passed && test_scalar< my_complex, ExecutionSpace >( 317, team_size, test ); + + return passed; +} + +} // namespace TestTeamVectorRange + +namespace Test { + +TEST_F( TEST_CATEGORY, team_teamvector_range ) +{ + ASSERT_TRUE( ( TestTeamVectorRange::Test< TEST_EXECSPACE >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVectorRange::Test< TEST_EXECSPACE >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVectorRange::Test< TEST_EXECSPACE >( 2 ) ) ); +} +} diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp index 704c7f9940..a58755dc9b 100644 --- a/lib/kokkos/core/unit_test/TestTile.hpp +++ b/lib/kokkos/core/unit_test/TestTile.hpp @@ -42,6 +42,9 @@ #ifndef TEST_TILE_HPP #define TEST_TILE_HPP +//======================================================================== +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + #include #include @@ -166,4 +169,8 @@ TEST_F( TEST_CATEGORY, tile_layout ) } } + +#endif // KOKKOS_ENABLE_DEPRECATED_CODE +//===================================================================== + #endif //TEST_TILE_HPP diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp index 2ebd48cd61..e332bebff0 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp @@ -827,6 +827,48 @@ struct TestViewMirror ASSERT_EQ( a_org(5), a_h3(5) ); } + template< class MemoryTraits, class Space > + struct CopyUnInit { + typedef typename Kokkos::Impl::MirrorViewType::view_type mirror_view_type; + + mirror_view_type a_d; + + KOKKOS_INLINE_FUNCTION + CopyUnInit( mirror_view_type & a_d_ ) : a_d(a_d_) { + } + + KOKKOS_INLINE_FUNCTION + void operator() (const typename Space::size_type i) const { + a_d(i) = (double)(10-i); + } + + }; + + template< class MemoryTraits > + void static test_mirror_no_initialize() { + Kokkos::View< double*, Layout, Kokkos::HostSpace > a_org( "A", 10 ); + Kokkos::View< double*, Layout, Kokkos::HostSpace, MemoryTraits > a_h = a_org; + + for (int i = 0; i < 10; i++) + { + a_h(i) = (double)i; + } + auto a_d = Kokkos::create_mirror_view( DeviceType(), a_h, Kokkos::WithoutInitializing ); + + int equal_ptr_h_d = (a_h.data() == a_d.data()) ? 1 : 0; + constexpr int is_same_memspace = std::is_same< Kokkos::HostSpace, typename DeviceType::memory_space >::value ? 1 : 0; + + ASSERT_EQ( equal_ptr_h_d, is_same_memspace); + + Kokkos::parallel_for( Kokkos::RangePolicy< typename DeviceType::execution_space >( 0, int(10)), CopyUnInit< MemoryTraits, DeviceType >(a_d)); + + Kokkos::deep_copy( a_h, a_d ); + + for (int i = 0; i < 10; i++) + { + ASSERT_EQ(a_h(i), (double)(10-i)); + } + } void static testit() { test_mirror< Kokkos::MemoryTraits<0> >(); @@ -835,6 +877,8 @@ struct TestViewMirror test_mirror_view< Kokkos::MemoryTraits >(); test_mirror_copy< Kokkos::MemoryTraits<0> >(); test_mirror_copy< Kokkos::MemoryTraits >(); + test_mirror_no_initialize< Kokkos::MemoryTraits<0> >(); + test_mirror_no_initialize< Kokkos::MemoryTraits >(); } }; @@ -865,7 +909,7 @@ public: } static void run_test_view_operator_a() { - {TestViewOperator< T, device > f; Kokkos::parallel_for(int(N0),f);} + {TestViewOperator< T, device > f; Kokkos::parallel_for(int(N0),f); Kokkos::fence();} #ifndef KOKKOS_ENABLE_OPENMPTARGET TestViewOperator_LeftAndRight< int[2][3][4][2][3][4], device >f6; f6.testit(); TestViewOperator_LeftAndRight< int[2][3][4][2][3], device >f5; f5.testit(); diff --git a/lib/kokkos/core/unit_test/TestViewAPI_e.hpp b/lib/kokkos/core/unit_test/TestViewAPI_e.hpp index efb34a64cc..76815dc112 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI_e.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI_e.hpp @@ -194,6 +194,7 @@ inline void test_anonymous_space() { d_anon_dyn_view(j) += 42; } }); + Kokkos::fence(); #endif } @@ -201,4 +202,45 @@ TEST_F( TEST_CATEGORY, anonymous_space ) { test_anonymous_space(); } + +template +struct TestViewOverloadResolution { + // Overload based on value_type and rank + static int foo(Kokkos::View a) { + return 1; + } + static int foo(Kokkos::View a) { + return 2; + } + static int foo(Kokkos::View a) { + return 3; + } + + // Overload based on compile time dimensions + static int bar(Kokkos::View a) { + return 4; + } + static int bar(Kokkos::View a) { + return 5; + } + + static void test_function_overload() { + Kokkos::View a("A",10,3); + int data_type_1 = foo(a); + int data_type_3 = foo(Kokkos::View(a)); + Kokkos::View b("B",10,3,4); + int data_type_2 = foo(b); + Kokkos::View c(a); + int static_extent = bar(c); + ASSERT_EQ(1,data_type_1); + ASSERT_EQ(3,data_type_2); + ASSERT_EQ(1,data_type_3); + ASSERT_EQ(4,static_extent); + } +}; + +TEST_F( TEST_CATEGORY, view_overload_resolution ) +{ + TestViewOverloadResolution::test_function_overload(); +} } diff --git a/lib/kokkos/core/unit_test/TestViewMapping_a.hpp b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp index 03d5e501b9..69247902cd 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping_a.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp @@ -1012,12 +1012,14 @@ void test_view_mapping() ASSERT_EQ( a.use_count(), 1 ); ASSERT_EQ( b.use_count(), 0 ); -#if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) && !defined( KOKKOS_ENABLE_ROCM ) +// TODO: a.use_count() and x.use_count() are 0 with the asynchronous HPX backend. Why? +#if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) && !defined( KOKKOS_ENABLE_ROCM ) && \ + !(defined( KOKKOS_ENABLE_HPX ) && defined( KOKKOS_ENABLE_HPX_ASYNC_DISPATCH )) // Cannot launch host lambda when CUDA lambda is enabled. typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space host_exec_space; - Kokkos::parallel_for( Kokkos::RangePolicy< host_exec_space >( 0, 10 ), KOKKOS_LAMBDA ( int i ) { + Kokkos::parallel_for( Kokkos::RangePolicy< host_exec_space >( 0, 10 ), KOKKOS_LAMBDA ( int ) { // 'a' is captured by copy, and the capture mechanism converts 'a' to an // unmanaged copy. When the parallel dispatch accepts a move for the // lambda, this count should become 1. diff --git a/lib/kokkos/core/unit_test/TestViewMapping_b.hpp b/lib/kokkos/core/unit_test/TestViewMapping_b.hpp index 7c7807f60d..36fc0461a4 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping_b.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_b.hpp @@ -173,12 +173,12 @@ void test_view_mapping_class_value() { typedef typename Space::execution_space ExecSpace; - ExecSpace::fence(); + ExecSpace().fence(); { Kokkos::View< MappingClassValueType, ExecSpace > a( "a" ); - ExecSpace::fence(); + ExecSpace().fence(); } - ExecSpace::fence(); + ExecSpace().fence(); } TEST_F( TEST_CATEGORY , view_mapping_class_value ) diff --git a/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp b/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp index 0c2d22e013..62bd582871 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp @@ -201,6 +201,7 @@ struct TestViewMappingSubview long error_count = -1; Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, 1 ), *this, error_count ); + ASSERT_EQ( error_count, 0 ); } }; diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp index 207fbb148d..bffc77181f 100644 --- a/lib/kokkos/core/unit_test/TestViewSubview.hpp +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -48,6 +48,86 @@ #include #include #include +#include + +// TODO @refactoring move this to somewhere common + +//------------------------------------------------------------------------------ + +template +struct _kokkos____________________static_test_failure_____; + +template +struct static_predicate_message {}; + +//------------------------------------------------------------------------------ + +template class, class...> +struct static_assert_predicate_true_impl; + +template