From 113b8b4be23bdbcfde5031144110a26d178f29a0 Mon Sep 17 00:00:00 2001 From: "Dan S. Bolintineanu" Date: Thu, 30 May 2019 14:21:11 -0600 Subject: [PATCH 001/117] Fixes two bugs in pair granular: 1. User-set cutoffs did not work 2. Restarts not working --- src/GRANULAR/pair_granular.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp index 903ed303b0..913f6f8b59 100644 --- a/src/GRANULAR/pair_granular.cpp +++ b/src/GRANULAR/pair_granular.cpp @@ -890,6 +890,7 @@ void PairGranular::coeff(int narg, char **arg) if (iarg + 1 >= narg) error->all(FLERR, "Illegal pair_coeff command, not enough parameters"); cutoff_one = force->numeric(FLERR,arg[iarg+1]); + iarg += 2; } else error->all(FLERR, "Illegal pair coeff command"); } @@ -1234,7 +1235,7 @@ void PairGranular::write_restart(FILE *fp) fwrite(&tangential_coeffs[i][j],sizeof(double),3,fp); fwrite(&roll_coeffs[i][j],sizeof(double),3,fp); fwrite(&twist_coeffs[i][j],sizeof(double),3,fp); - fwrite(&cut[i][j],sizeof(double),1,fp); + fwrite(&cutoff_type[i][j],sizeof(double),1,fp); } } } @@ -1264,7 +1265,7 @@ void PairGranular::read_restart(FILE *fp) fread(&tangential_coeffs[i][j],sizeof(double),3,fp); fread(&roll_coeffs[i][j],sizeof(double),3,fp); fread(&twist_coeffs[i][j],sizeof(double),3,fp); - fread(&cut[i][j],sizeof(double),1,fp); + fread(&cutoff_type[i][j],sizeof(double),1,fp); } MPI_Bcast(&normal_model[i][j],1,MPI_INT,0,world); MPI_Bcast(&damping_model[i][j],1,MPI_INT,0,world); @@ -1275,7 +1276,7 @@ void PairGranular::read_restart(FILE *fp) MPI_Bcast(&tangential_coeffs[i][j],3,MPI_DOUBLE,0,world); MPI_Bcast(&roll_coeffs[i][j],3,MPI_DOUBLE,0,world); MPI_Bcast(&twist_coeffs[i][j],3,MPI_DOUBLE,0,world); - MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cutoff_type[i][j],1,MPI_DOUBLE,0,world); } } } From fd93c27dcb2ddb01f86c03b2ea13e344e2e29585 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jun 2019 07:35:30 -0400 Subject: [PATCH 002/117] make valgrind happy by fully initializing line buffer for thermo output --- src/thermo.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/thermo.cpp b/src/thermo.cpp index 3e777edf82..f4ab7b0779 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -113,9 +113,11 @@ Thermo::Thermo(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) if (strcmp(style,"one") == 0) { line = new char[256+6*64]; + memset(line,0,256+6*64); strcpy(line,ONE); } else if (strcmp(style,"multi") == 0) { line = new char[256+12*64]; + memset(line,0,256+12*64); strcpy(line,MULTI); lineflag = MULTILINE; From 4c0cd0a019d4dbd0e8367cb02b3530ad97ace525 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jun 2019 07:36:03 -0400 Subject: [PATCH 003/117] remove memory leak by freeing custom MPI data types --- src/USER-LB/fix_lb_fluid.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/USER-LB/fix_lb_fluid.cpp b/src/USER-LB/fix_lb_fluid.cpp index 6f1dfc9982..9e6b9c72f1 100644 --- a/src/USER-LB/fix_lb_fluid.cpp +++ b/src/USER-LB/fix_lb_fluid.cpp @@ -547,6 +547,18 @@ FixLbFluid::~FixLbFluid() } else { delete [] NodeArea; } + MPI_Type_free(&passxf); + MPI_Type_free(&passyf); + MPI_Type_free(&passzf); + MPI_Type_free(&passxu); + MPI_Type_free(&passyu); + MPI_Type_free(&passzf); + MPI_Type_free(&passxrho); + MPI_Type_free(&passyrho); + MPI_Type_free(&passzrho); + MPI_Type_free(&passxtemp); + MPI_Type_free(&passytemp); + MPI_Type_free(&passztemp); } int FixLbFluid::setmask() From 92615bda3aa3e02f525648fb155684a433cc8a8e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jun 2019 07:38:11 -0400 Subject: [PATCH 004/117] update kolmogorov/crespi/full example input for recent change in REBO --- examples/USER/misc/kolmogorov_crespi_full/CH.rebo | 1 + examples/USER/misc/kolmogorov_crespi_full/in.bilayer-graphene | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 120000 examples/USER/misc/kolmogorov_crespi_full/CH.rebo diff --git a/examples/USER/misc/kolmogorov_crespi_full/CH.rebo b/examples/USER/misc/kolmogorov_crespi_full/CH.rebo new file mode 120000 index 0000000000..c5a6a40100 --- /dev/null +++ b/examples/USER/misc/kolmogorov_crespi_full/CH.rebo @@ -0,0 +1 @@ +../../../../potentials/CH.rebo \ No newline at end of file diff --git a/examples/USER/misc/kolmogorov_crespi_full/in.bilayer-graphene b/examples/USER/misc/kolmogorov_crespi_full/in.bilayer-graphene index 63ac92cb05..c3e59337de 100644 --- a/examples/USER/misc/kolmogorov_crespi_full/in.bilayer-graphene +++ b/examples/USER/misc/kolmogorov_crespi_full/in.bilayer-graphene @@ -18,7 +18,7 @@ group adsorbate type 2 ######################## Potential defition ######################## pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0 #################################################################### -pair_coeff * * rebo CH.airebo NULL C # chemical +pair_coeff * * rebo CH.rebo NULL C # chemical pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range #################################################################### # Neighbor update settings From b1458ceebfe3c15fb827b20e9b81c179b0875b8f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jun 2019 07:42:18 -0400 Subject: [PATCH 005/117] fix typo --- src/USER-LB/fix_lb_fluid.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/USER-LB/fix_lb_fluid.cpp b/src/USER-LB/fix_lb_fluid.cpp index 9e6b9c72f1..31c54aee76 100644 --- a/src/USER-LB/fix_lb_fluid.cpp +++ b/src/USER-LB/fix_lb_fluid.cpp @@ -552,7 +552,7 @@ FixLbFluid::~FixLbFluid() MPI_Type_free(&passzf); MPI_Type_free(&passxu); MPI_Type_free(&passyu); - MPI_Type_free(&passzf); + MPI_Type_free(&passzu); MPI_Type_free(&passxrho); MPI_Type_free(&passyrho); MPI_Type_free(&passzrho); From 5fc3081a55eb0555a3bab32b11ea178f04616645 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jun 2019 14:23:30 -0400 Subject: [PATCH 006/117] make building tools (msi2lmp, chain.x) an option, which is off by default --- cmake/CMakeLists.txt | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 8f7ac9a6e7..67561c536b 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -106,6 +106,8 @@ if(BUILD_LIB) endif() endif() +option(BUILD_TOOLS "Build and install LAMMPS tools (msi2lmp, binary2txt, chain)" OFF) + if(NOT BUILD_EXE AND NOT BUILD_LIB) message(FATAL_ERROR "You need to at least enable one of two following options: BUILD_LIB or BUILD_EXE") endif() @@ -518,6 +520,18 @@ if(BUILD_EXE) install(TARGETS lmp DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1) +if(BUILD_TOOLS) + add_executable(binary2txt ${LAMMPS_TOOLS_DIR}/binary2txt.cpp) + install(TARGETS binary2txt DESTINATION ${CMAKE_INSTALL_BINDIR}) + + # ninja-build currently does not support fortran. thus we skip building this tool + if(NOT CMAKE_GENERATOR STREQUAL "Ninja") + message(STATUS "Skipping building 'chain.x' with Ninja build tool due to lack of Fortran support") + enable_language(Fortran) + add_executable(chain.x ${LAMMPS_TOOLS_DIR}/chain.f) + target_link_libraries(chain.x ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) + endif() + enable_language(C) get_filename_component(MSI2LMP_SOURCE_DIR ${LAMMPS_TOOLS_DIR}/msi2lmp/src ABSOLUTE) file(GLOB MSI2LMP_SOURCES ${MSI2LMP_SOURCE_DIR}/[^.]*.c) @@ -525,7 +539,6 @@ if(BUILD_EXE) target_link_libraries(msi2lmp m) install(TARGETS msi2lmp DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${LAMMPS_DOC_DIR}/msi2lmp.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) - endif() include(Documentation) From f4f4a7c85013b037947e9617e237f5a67c757273 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jun 2019 14:24:51 -0400 Subject: [PATCH 007/117] adapt for Ninja build tool, which cannot handle sub-builds and fortran so builds of downloaded libraries are not supported right now --- cmake/CMakeLists.txt | 4 ++++ cmake/Modules/Packages/KIM.cmake | 3 +++ cmake/Modules/Packages/LATTE.cmake | 3 +++ cmake/Modules/Packages/USER-PLUMED.cmake | 3 +++ cmake/Modules/Packages/VORONOI.cmake | 3 +++ 5 files changed, 16 insertions(+) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 67561c536b..7f212ac48e 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -224,6 +224,9 @@ if(PKG_MSCG OR PKG_USER-ATC OR PKG_USER-AWPMD OR PKG_USER-PLUMED OR PKG_USER-QUI find_package(LAPACK) find_package(BLAS) if(NOT LAPACK_FOUND OR NOT BLAS_FOUND) + if(CMAKE_GENERATOR STREQUAL "Ninja") + status(FATAL_ERROR "Cannot build internal linear algebra library with Ninja build tool due to lack for Fortran support") + endif() enable_language(Fortran) file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/[^.]*.[fF]) add_library(linalg STATIC ${LAPACK_SOURCES}) @@ -519,6 +522,7 @@ if(BUILD_EXE) set_target_properties(lmp PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY}) install(TARGETS lmp DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1) +endif() if(BUILD_TOOLS) add_executable(binary2txt ${LAMMPS_TOOLS_DIR}/binary2txt.cpp) diff --git a/cmake/Modules/Packages/KIM.cmake b/cmake/Modules/Packages/KIM.cmake index 21ebd0f8e0..8815f73881 100644 --- a/cmake/Modules/Packages/KIM.cmake +++ b/cmake/Modules/Packages/KIM.cmake @@ -13,6 +13,9 @@ if(PKG_KIM) endif() option(DOWNLOAD_KIM "Download KIM-API from OpenKIM instead of using an already installed one" ${DOWNLOAD_KIM_DEFAULT}) if(DOWNLOAD_KIM) + if(CMAKE_GENERATOR STREQUAL "Ninja") + message(FATAL_ERROR "Cannot build downloaded KIM-API library with Ninja build tool") + endif() message(STATUS "KIM-API download requested - we will build our own") enable_language(C) enable_language(Fortran) diff --git a/cmake/Modules/Packages/LATTE.cmake b/cmake/Modules/Packages/LATTE.cmake index a709561562..de7116780b 100644 --- a/cmake/Modules/Packages/LATTE.cmake +++ b/cmake/Modules/Packages/LATTE.cmake @@ -11,6 +11,9 @@ if(PKG_LATTE) if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR message(FATAL_ERROR "For downlading LATTE you need at least cmake-3.7") endif() + if(CMAKE_GENERATOR STREQUAL "Ninja") + message(FATAL_ERROR "Cannot build downloaded LATTE library with Ninja build tool") + endif() message(STATUS "LATTE download requested - we will build our own") include(ExternalProject) ExternalProject_Add(latte_build diff --git a/cmake/Modules/Packages/USER-PLUMED.cmake b/cmake/Modules/Packages/USER-PLUMED.cmake index 422527dd06..500558fc72 100644 --- a/cmake/Modules/Packages/USER-PLUMED.cmake +++ b/cmake/Modules/Packages/USER-PLUMED.cmake @@ -17,6 +17,9 @@ if(PKG_USER-PLUMED) option(DOWNLOAD_PLUMED "Download Plumed package instead of using an already installed one" ${DOWNLOAD_PLUMED_DEFAULT}) if(DOWNLOAD_PLUMED) + if(CMAKE_GENERATOR STREQUAL "Ninja") + message(FATAL_ERROR "Cannot build downloaded Plumed library with Ninja build tool") + endif() if(BUILD_MPI) set(PLUMED_CONFIG_MPI "--enable-mpi") set(PLUMED_CONFIG_CC ${CMAKE_MPI_C_COMPILER}) diff --git a/cmake/Modules/Packages/VORONOI.cmake b/cmake/Modules/Packages/VORONOI.cmake index df4551b6e7..5ce974a7ae 100644 --- a/cmake/Modules/Packages/VORONOI.cmake +++ b/cmake/Modules/Packages/VORONOI.cmake @@ -7,6 +7,9 @@ if(PKG_VORONOI) endif() option(DOWNLOAD_VORO "Download and compile the Voro++ library instead of using an already installed one" ${DOWNLOAD_VORO_DEFAULT}) if(DOWNLOAD_VORO) + if(CMAKE_GENERATOR STREQUAL "Ninja") + message(FATAL_ERROR "Cannot build downloaded Voro++ library with Ninja build tool") + endif() message(STATUS "Voro++ download requested - we will build our own") include(ExternalProject) From e9666f585f1924142e2c044c53d765806d35076d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jun 2019 15:46:54 -0400 Subject: [PATCH 008/117] update log files for kolmogorov/crespi/full potential --- ...++.1 => log.5Jun19.bilayer-graphene.g++.1} | 51 ++++++++++--------- ...++.4 => log.5Jun19.bilayer-graphene.g++.4} | 51 ++++++++++--------- 2 files changed, 54 insertions(+), 48 deletions(-) rename examples/USER/misc/kolmogorov_crespi_full/{log.16Mar18.bilayer-graphene.g++.1 => log.5Jun19.bilayer-graphene.g++.1} (59%) rename examples/USER/misc/kolmogorov_crespi_full/{log.16Mar18.bilayer-graphene.g++.4 => log.5Jun19.bilayer-graphene.g++.4} (59%) diff --git a/examples/USER/misc/kolmogorov_crespi_full/log.16Mar18.bilayer-graphene.g++.1 b/examples/USER/misc/kolmogorov_crespi_full/log.5Jun19.bilayer-graphene.g++.1 similarity index 59% rename from examples/USER/misc/kolmogorov_crespi_full/log.16Mar18.bilayer-graphene.g++.1 rename to examples/USER/misc/kolmogorov_crespi_full/log.5Jun19.bilayer-graphene.g++.1 index a06b3effdd..c74f9956a2 100644 --- a/examples/USER/misc/kolmogorov_crespi_full/log.16Mar18.bilayer-graphene.g++.1 +++ b/examples/USER/misc/kolmogorov_crespi_full/log.5Jun19.bilayer-graphene.g++.1 @@ -1,4 +1,5 @@ -LAMMPS (8 Mar 2018) +LAMMPS (5 Jun 2019) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:88) using 1 OpenMP thread(s) per MPI task # Initialization units metal @@ -21,6 +22,8 @@ read_data Bi_gr_AB_stack_2L_noH_300K.data 0 = max # of 1-3 neighbors 0 = max # of 1-4 neighbors 1 = max # of special neighbors + special bonds CPU = 0.000353813 secs + read_data CPU = 0.0043292 secs mass 1 12.0107 # carbon mass (g/mole) | membrane mass 2 12.0107 # carbon mass (g/mole) | adsorbate # Separate atom groups @@ -32,8 +35,8 @@ group adsorbate type 2 ######################## Potential defition ######################## pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0 #################################################################### -pair_coeff * * rebo CH.airebo NULL C # chemical -Reading potential file CH.airebo with DATE: 2011-10-25 +pair_coeff * * rebo CH.rebo NULL C # chemical +Reading potential file CH.rebo with DATE: 2018-7-3 pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range #################################################################### # Neighbor update settings @@ -92,32 +95,32 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 16.96 | 16.96 | 16.96 Mbytes Step TotEng PotEng KinEng v_REBO v_KC Temp v_adsxcom v_adsycom v_adszcom v_adsvxcom v_adsvycom v_adsvzcom - 0 -5025.3867722725 -5040.0767391239 14.6899668514 -5011.2636297759 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108 - 100 -5025.3962433293 -5041.3829775585 15.9867342292 -5012.5109377234 -28.8720398351 91.0071804888 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069014 - 200 -5025.3942568861 -5041.9638220670 16.5695651809 -5012.7804299195 -29.1833921475 94.3250439654 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265577 - 300 -5025.3919463074 -5040.9705419367 15.5785956293 -5012.0510295102 -28.9195124265 88.6837826830 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745908 - 400 -5025.3965376948 -5041.6929964127 16.2964587179 -5012.6418090677 -29.0511873450 92.7703393702 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704496 - 500 -5025.4050172900 -5042.1712310053 16.7662137153 -5013.1850218645 -28.9862091408 95.4444989087 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066 - 600 -5025.3985715734 -5041.2158947893 15.8173232159 -5012.4875319345 -28.7283628548 90.0427797270 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001093 - 700 -5025.3997561572 -5041.6276721306 16.2279159734 -5012.7093581188 -28.9183140118 92.3801482386 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311564 - 800 -5025.3967603736 -5042.3401685987 16.9434082251 -5013.3044877099 -29.0356808888 96.4532085367 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492925 - 900 -5025.3984542801 -5042.2820667481 16.8836124680 -5013.4066841442 -28.8753826039 96.1128111061 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648638 - 1000 -5025.3988185618 -5041.9160822433 16.5172636815 -5012.8147737982 -29.1013084450 94.0273088606 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119 -Loop time of 156.142 on 1 procs for 1000 steps with 1360 atoms + 0 -5025.3867727863 -5040.0767396377 14.6899668514 -5011.2636302897 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108 + 100 -5025.3962438431 -5041.3829780735 15.9867342304 -5012.5109382383 -28.8720398352 91.0071804956 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069015 + 200 -5025.3942574000 -5041.9638225847 16.5695651847 -5012.7804304371 -29.1833921476 94.3250439874 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265578 + 300 -5025.3919468212 -5040.9705424499 15.5785956286 -5012.0510300232 -28.9195124266 88.6837826792 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745907 + 400 -5025.3965382086 -5041.6929969192 16.2964587107 -5012.6418095739 -29.0511873454 92.7703393292 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704497 + 500 -5025.4050178038 -5042.1712315208 16.7662137170 -5013.1850223792 -28.9862091417 95.4444989189 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066 + 600 -5025.3985720873 -5041.2158953052 15.8173232179 -5012.4875324499 -28.7283628553 90.0427797386 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001092 + 700 -5025.3997566711 -5041.6276726420 16.2279159709 -5012.7093586298 -28.9183140122 92.3801482242 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311565 + 800 -5025.3967608874 -5042.3401691104 16.9434082230 -5013.3044882226 -29.0356808878 96.4532085250 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492926 + 900 -5025.3984547937 -5042.2820672614 16.8836124676 -5013.4066846579 -28.8753826035 96.1128111040 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648640 + 1000 -5025.3988190757 -5041.9160827657 16.5172636900 -5012.8147743212 -29.1013084444 94.0273089090 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119 +Loop time of 103.724 on 1 procs for 1000 steps with 1360 atoms -Performance: 0.553 ns/day, 43.373 hours/ns, 6.404 timesteps/s -99.6% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 0.833 ns/day, 28.812 hours/ns, 9.641 timesteps/s +99.9% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 155.99 | 155.99 | 155.99 | 0.0 | 99.90 -Bond | 0.00075769 | 0.00075769 | 0.00075769 | 0.0 | 0.00 +Pair | 103.59 | 103.59 | 103.59 | 0.0 | 99.87 +Bond | 0.00022388 | 0.00022388 | 0.00022388 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.084217 | 0.084217 | 0.084217 | 0.0 | 0.05 -Output | 0.0016122 | 0.0016122 | 0.0016122 | 0.0 | 0.00 -Modify | 0.034797 | 0.034797 | 0.034797 | 0.0 | 0.02 -Other | | 0.02838 | | | 0.02 +Comm | 0.082476 | 0.082476 | 0.082476 | 0.0 | 0.08 +Output | 0.0010884 | 0.0010884 | 0.0010884 | 0.0 | 0.00 +Modify | 0.032938 | 0.032938 | 0.032938 | 0.0 | 0.03 +Other | | 0.01749 | | | 0.02 Nlocal: 1360 ave 1360 max 1360 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -133,4 +136,4 @@ Ave neighs/atom = 195.004 Ave special neighs/atom = 0 Neighbor list builds = 0 Dangerous builds = 0 -Total wall time: 0:02:36 +Total wall time: 0:01:43 diff --git a/examples/USER/misc/kolmogorov_crespi_full/log.16Mar18.bilayer-graphene.g++.4 b/examples/USER/misc/kolmogorov_crespi_full/log.5Jun19.bilayer-graphene.g++.4 similarity index 59% rename from examples/USER/misc/kolmogorov_crespi_full/log.16Mar18.bilayer-graphene.g++.4 rename to examples/USER/misc/kolmogorov_crespi_full/log.5Jun19.bilayer-graphene.g++.4 index 58322f9ce0..b90ee7ee2e 100644 --- a/examples/USER/misc/kolmogorov_crespi_full/log.16Mar18.bilayer-graphene.g++.4 +++ b/examples/USER/misc/kolmogorov_crespi_full/log.5Jun19.bilayer-graphene.g++.4 @@ -1,4 +1,5 @@ -LAMMPS (8 Mar 2018) +LAMMPS (5 Jun 2019) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:88) using 1 OpenMP thread(s) per MPI task # Initialization units metal @@ -21,6 +22,8 @@ read_data Bi_gr_AB_stack_2L_noH_300K.data 0 = max # of 1-3 neighbors 0 = max # of 1-4 neighbors 1 = max # of special neighbors + special bonds CPU = 0.000187874 secs + read_data CPU = 0.00234103 secs mass 1 12.0107 # carbon mass (g/mole) | membrane mass 2 12.0107 # carbon mass (g/mole) | adsorbate # Separate atom groups @@ -32,8 +35,8 @@ group adsorbate type 2 ######################## Potential defition ######################## pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0 #################################################################### -pair_coeff * * rebo CH.airebo NULL C # chemical -Reading potential file CH.airebo with DATE: 2011-10-25 +pair_coeff * * rebo CH.rebo NULL C # chemical +Reading potential file CH.rebo with DATE: 2018-7-3 pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range #################################################################### # Neighbor update settings @@ -92,32 +95,32 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 11.13 | 11.13 | 11.13 Mbytes Step TotEng PotEng KinEng v_REBO v_KC Temp v_adsxcom v_adsycom v_adszcom v_adsvxcom v_adsvycom v_adsvzcom - 0 -5025.3867722725 -5040.0767391239 14.6899668514 -5011.2636297759 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108 - 100 -5025.3962433293 -5041.3829775585 15.9867342292 -5012.5109377234 -28.8720398351 91.0071804888 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069014 - 200 -5025.3942568861 -5041.9638220670 16.5695651809 -5012.7804299195 -29.1833921475 94.3250439654 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265577 - 300 -5025.3919463074 -5040.9705419367 15.5785956293 -5012.0510295103 -28.9195124265 88.6837826830 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745908 - 400 -5025.3965376948 -5041.6929964127 16.2964587179 -5012.6418090677 -29.0511873450 92.7703393702 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704496 - 500 -5025.4050172900 -5042.1712310053 16.7662137153 -5013.1850218645 -28.9862091408 95.4444989088 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066 - 600 -5025.3985715734 -5041.2158947893 15.8173232159 -5012.4875319345 -28.7283628548 90.0427797270 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001093 - 700 -5025.3997561572 -5041.6276721306 16.2279159734 -5012.7093581188 -28.9183140118 92.3801482386 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311564 - 800 -5025.3967603736 -5042.3401685987 16.9434082251 -5013.3044877099 -29.0356808888 96.4532085367 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492925 - 900 -5025.3984542801 -5042.2820667481 16.8836124680 -5013.4066841442 -28.8753826039 96.1128111061 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648638 - 1000 -5025.3988185618 -5041.9160822433 16.5172636815 -5012.8147737983 -29.1013084450 94.0273088606 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119 -Loop time of 42.5422 on 4 procs for 1000 steps with 1360 atoms + 0 -5025.3867727863 -5040.0767396377 14.6899668514 -5011.2636302897 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108 + 100 -5025.3962438431 -5041.3829780735 15.9867342304 -5012.5109382383 -28.8720398352 91.0071804956 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069015 + 200 -5025.3942574000 -5041.9638225847 16.5695651847 -5012.7804304371 -29.1833921476 94.3250439874 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265578 + 300 -5025.3919468212 -5040.9705424499 15.5785956286 -5012.0510300232 -28.9195124266 88.6837826792 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745907 + 400 -5025.3965382086 -5041.6929969192 16.2964587107 -5012.6418095739 -29.0511873454 92.7703393291 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704497 + 500 -5025.4050178038 -5042.1712315208 16.7662137170 -5013.1850223792 -28.9862091417 95.4444989189 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066 + 600 -5025.3985720873 -5041.2158953052 15.8173232179 -5012.4875324499 -28.7283628553 90.0427797386 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001092 + 700 -5025.3997566711 -5041.6276726420 16.2279159709 -5012.7093586298 -28.9183140122 92.3801482242 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311565 + 800 -5025.3967608874 -5042.3401691104 16.9434082230 -5013.3044882226 -29.0356808878 96.4532085250 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492926 + 900 -5025.3984547938 -5042.2820672614 16.8836124676 -5013.4066846579 -28.8753826035 96.1128111040 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648640 + 1000 -5025.3988190757 -5041.9160827657 16.5172636900 -5012.8147743212 -29.1013084444 94.0273089090 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119 +Loop time of 33.7338 on 4 procs for 1000 steps with 1360 atoms -Performance: 2.031 ns/day, 11.817 hours/ns, 23.506 timesteps/s -98.9% CPU use with 4 MPI tasks x 1 OpenMP threads +Performance: 2.561 ns/day, 9.370 hours/ns, 29.644 timesteps/s +94.1% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 39.928 | 40.992 | 42.377 | 15.8 | 96.36 -Bond | 0.0003643 | 0.00043392 | 0.00048113 | 0.0 | 0.00 +Pair | 30.833 | 31.356 | 32.18 | 9.1 | 92.95 +Bond | 0.00026059 | 0.00029182 | 0.00031185 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.12253 | 1.5076 | 2.5698 | 82.1 | 3.54 -Output | 0.0012577 | 0.0013637 | 0.0016453 | 0.4 | 0.00 -Modify | 0.010833 | 0.012247 | 0.013317 | 0.9 | 0.03 -Other | | 0.02864 | | | 0.07 +Comm | 1.443 | 2.2722 | 2.8091 | 34.3 | 6.74 +Output | 0.00068855 | 0.00095087 | 0.0017185 | 0.0 | 0.00 +Modify | 0.010187 | 0.011709 | 0.015284 | 1.9 | 0.03 +Other | | 0.09241 | | | 0.27 Nlocal: 340 ave 344 max 334 min Histogram: 1 0 0 0 0 0 1 0 1 1 @@ -133,4 +136,4 @@ Ave neighs/atom = 195.004 Ave special neighs/atom = 0 Neighbor list builds = 0 Dangerous builds = 0 -Total wall time: 0:00:42 +Total wall time: 0:00:33 From 995b6b31a240a3f1315c4641764727c53858afdd Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 17 Jun 2019 16:05:54 -0400 Subject: [PATCH 009/117] flag a couple more packages that contain sub-builds of downloaded code --- cmake/Modules/Packages/MSCG.cmake | 3 +++ cmake/Modules/Packages/USER-SCAFACOS.cmake | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cmake/Modules/Packages/MSCG.cmake b/cmake/Modules/Packages/MSCG.cmake index e8744bc192..b442580583 100644 --- a/cmake/Modules/Packages/MSCG.cmake +++ b/cmake/Modules/Packages/MSCG.cmake @@ -11,6 +11,9 @@ if(PKG_MSCG) if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR message(FATAL_ERROR "For downlading MSCG you need at least cmake-3.7") endif() + if(CMAKE_GENERATOR STREQUAL "Ninja") + message(FATAL_ERROR "Cannot build downloaded MSCG library with Ninja build tool") + endif() include(ExternalProject) if(NOT LAPACK_FOUND) set(EXTRA_MSCG_OPTS "-DLAPACK_LIBRARIES=${CMAKE_CURRENT_BINARY_DIR}/liblinalg.a") diff --git a/cmake/Modules/Packages/USER-SCAFACOS.cmake b/cmake/Modules/Packages/USER-SCAFACOS.cmake index adb002081f..475f2585c8 100644 --- a/cmake/Modules/Packages/USER-SCAFACOS.cmake +++ b/cmake/Modules/Packages/USER-SCAFACOS.cmake @@ -13,6 +13,9 @@ if(PKG_USER-SCAFACOS) endif() option(DOWNLOAD_SCAFACOS "Download ScaFaCoS library instead of using an already installed one" ${DOWNLOAD_SCAFACOS_DEFAULT}) if(DOWNLOAD_SCAFACOS) + if(CMAKE_GENERATOR STREQUAL "Ninja") + message(FATAL_ERROR "Cannot build downloaded ScaFaCoS library with Ninja build tool") + endif() message(STATUS "ScaFaCoS download requested - we will build our own") include(ExternalProject) ExternalProject_Add(scafacos_build From 81cdce9b04fe4d50fe2d568baed3afccb40da2d1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 18 Jun 2019 00:14:48 -0400 Subject: [PATCH 010/117] flag and document that USER-TALLY computes are not compatible with dynamic groups --- doc/src/compute_tally.txt | 2 ++ src/USER-TALLY/compute_force_tally.cpp | 1 + src/USER-TALLY/compute_heat_flux_tally.cpp | 1 + src/USER-TALLY/compute_pe_mol_tally.cpp | 1 + src/USER-TALLY/compute_pe_tally.cpp | 1 + src/USER-TALLY/compute_stress_tally.cpp | 1 + 6 files changed, 7 insertions(+) diff --git a/doc/src/compute_tally.txt b/doc/src/compute_tally.txt index 6401be54e9..125eba1302 100644 --- a/doc/src/compute_tally.txt +++ b/doc/src/compute_tally.txt @@ -88,6 +88,8 @@ potentials only include the pair potential portion of the EAM interaction when used by this compute, not the embedding term. Also bonded or Kspace interactions do not contribute to this compute. +The computes in this package are not compatible with dynamic groups. + [Related commands:] {compute group/group}_compute_group_group.html, {compute diff --git a/src/USER-TALLY/compute_force_tally.cpp b/src/USER-TALLY/compute_force_tally.cpp index 0ec1d332a4..ba155db586 100644 --- a/src/USER-TALLY/compute_force_tally.cpp +++ b/src/USER-TALLY/compute_force_tally.cpp @@ -41,6 +41,7 @@ ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) : vector_flag = 0; peratom_flag = 1; timeflag = 1; + dynamic_group_allow = 0; comm_reverse = size_peratom_cols = 3; extscalar = 1; diff --git a/src/USER-TALLY/compute_heat_flux_tally.cpp b/src/USER-TALLY/compute_heat_flux_tally.cpp index f8db92a730..4aff25d952 100644 --- a/src/USER-TALLY/compute_heat_flux_tally.cpp +++ b/src/USER-TALLY/compute_heat_flux_tally.cpp @@ -38,6 +38,7 @@ ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) : vector_flag = 1; timeflag = 1; + dynamic_group_allow = 0; comm_reverse = 7; extvector = 1; diff --git a/src/USER-TALLY/compute_pe_mol_tally.cpp b/src/USER-TALLY/compute_pe_mol_tally.cpp index 264ddca270..08b3ae4d73 100644 --- a/src/USER-TALLY/compute_pe_mol_tally.cpp +++ b/src/USER-TALLY/compute_pe_mol_tally.cpp @@ -39,6 +39,7 @@ ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) : vector_flag = 1; size_vector = 4; timeflag = 1; + dynamic_group_allow = 0; extvector = 1; peflag = 1; // we need Pair::ev_tally() to be run diff --git a/src/USER-TALLY/compute_pe_tally.cpp b/src/USER-TALLY/compute_pe_tally.cpp index 3031915ebe..7b920d903d 100644 --- a/src/USER-TALLY/compute_pe_tally.cpp +++ b/src/USER-TALLY/compute_pe_tally.cpp @@ -40,6 +40,7 @@ ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) : vector_flag = 0; peratom_flag = 1; timeflag = 1; + dynamic_group_allow = 0; comm_reverse = size_peratom_cols = 2; extscalar = 1; diff --git a/src/USER-TALLY/compute_stress_tally.cpp b/src/USER-TALLY/compute_stress_tally.cpp index 8ed40ae8e2..f61f498f43 100644 --- a/src/USER-TALLY/compute_stress_tally.cpp +++ b/src/USER-TALLY/compute_stress_tally.cpp @@ -41,6 +41,7 @@ ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) : vector_flag = 0; peratom_flag = 1; timeflag = 1; + dynamic_group_allow = 0; comm_reverse = size_peratom_cols = 6; extscalar = 0; From 961dcfc2619d8d3b5ebc3a4e9b424e776d239e81 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 18 Jun 2019 11:50:06 -0400 Subject: [PATCH 011/117] mention alternate build environment generators for cmake --- doc/src/Build_cmake.txt | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/doc/src/Build_cmake.txt b/doc/src/Build_cmake.txt index 265c16e3d4..a16ba995a8 100644 --- a/doc/src/Build_cmake.txt +++ b/doc/src/Build_cmake.txt @@ -32,10 +32,18 @@ cmake \[options ...\] ../cmake # configuration with (command-line) cmake make # compilation :pre The cmake command will detect available features, enable selected -packages and options, and will generate the build environment. The make -command will then compile and link LAMMPS, producing (by default) an -executable called "lmp" and a library called "liblammps.a" in the -"build" folder. +packages and options, and will generate the build environment. By default +this build environment will be created for "Unix Makefiles" on most +platforms and particularly on Linux. However, alternate build tools +(e.g. Ninja) and support files for Integrated Development Environments +(IDE) like Eclipse, CodeBlocks, or Kate can be generated, too. This is +selected via the "-G" command line flag. For the rest of the documentation +we will assume that the build environment is generated for makefiles +and thus the make command will be used to compile and link LAMMPS as +indicated above, producing (by default) an executable called "lmp" and +a library called "liblammps.a" in the "build" folder. When generating +a build environment for the "Ninja" build tool, the build command would +be "ninja" instead of "make". If your machine has multiple CPU cores (most do these days), using a command like "make -jN" (with N being the number of available local From d98c105d3433655fc4f0e6aeb0085f71e3f49668 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 18 Jun 2019 11:52:09 -0400 Subject: [PATCH 012/117] step version string to 18 June 2019 --- doc/lammps.1 | 2 +- doc/src/Manual.txt | 4 ++-- src/version.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/lammps.1 b/doc/lammps.1 index f4a801779a..7ba31bfd06 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,4 +1,4 @@ -.TH LAMMPS "5 June 2019" "2019-06-05" +.TH LAMMPS "18 June 2019" "2019-06-18" .SH NAME .B LAMMPS \- Molecular Dynamics Simulator. diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt index 2fa9623f36..ba6ab8aac7 100644 --- a/doc/src/Manual.txt +++ b/doc/src/Manual.txt @@ -1,7 +1,7 @@ LAMMPS Users Manual - + @@ -21,7 +21,7 @@ :line LAMMPS Documentation :c,h1 -5 Jun 2019 version :c,h2 +18 Jun 2019 version :c,h2 "What is a LAMMPS version?"_Manual_version.html diff --git a/src/version.h b/src/version.h index 06ee8ab8f4..c2f6fcaf92 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "5 Jun 2019" +#define LAMMPS_VERSION "18 Jun 2019" From 98fbaef406c77e138733419e54a7834ce6fcf89f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 18 Jun 2019 15:00:27 -0400 Subject: [PATCH 013/117] workaround for ICE issue with gcc 4.8.x --- src/lmptype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lmptype.h b/src/lmptype.h index 20d29880ed..12fa6cc4fb 100644 --- a/src/lmptype.h +++ b/src/lmptype.h @@ -211,7 +211,7 @@ typedef int bigint; #elif defined(__INTEL_COMPILER) # define _noopt #elif defined(__GNUC__) -# if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 4)) +# if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9)) # define _noopt __attribute__((optimize("O0","no-var-tracking-assignments"))) # else # define _noopt __attribute__((optimize("O0"))) From f8f8e441b90d818830121a669e96261beed16fc2 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 19 Jun 2019 07:06:54 -0400 Subject: [PATCH 014/117] add missing cmake package module --- cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 7f212ac48e..bde36aa896 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -303,6 +303,7 @@ include(Packages/USER-QUIP) include(Packages/USER-QMMM) include(Packages/USER-VTK) include(Packages/KIM) +include(Packages/LATTE) include(Packages/MESSAGE) include(Packages/MSCG) include(Packages/COMPRESS) From 599ef7816180f783f8b10864380c3b7bc090f7ef Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 19 Jun 2019 17:13:06 -0400 Subject: [PATCH 015/117] put dump_modify after the list of all dump variant doc pages --- doc/src/Commands_all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/Commands_all.txt b/doc/src/Commands_all.txt index 58ca148555..80f91d5291 100644 --- a/doc/src/Commands_all.txt +++ b/doc/src/Commands_all.txt @@ -50,11 +50,11 @@ An alphabetic list of all general LAMMPS commands. "dump"_dump.html, "dump adios"_dump_adios.html, "dump image"_dump_image.html, -"dump_modify"_dump_modify.html, "dump movie"_dump_image.html, "dump netcdf"_dump_netcdf.html, "dump netcdf/mpiio"_dump_netcdf.html, "dump vtk"_dump_vtk.html, +"dump_modify"_dump_modify.html, "dynamical_matrix"_dynamical_matrix.html, "echo"_echo.html, "fix"_fix.html, From f54ad0966849752c1636bf89ec090ddb9c7a9a7a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 19 Jun 2019 17:53:40 -0400 Subject: [PATCH 016/117] recreate missing LaTeX files for image-only equations --- doc/src/Eqs/angle_class2_p6.tex | 15 +++++++++++++++ doc/src/Eqs/angle_cosine_buck6d.tex | 15 +++++++++++++++ doc/src/Eqs/improper_inversion_harmonic.tex | 15 +++++++++++++++ doc/src/Eqs/pair_agni.tex | 18 ++++++++++++++++++ .../Eqs/{pair_buck6d.txt => pair_buck6d.tex} | 1 + doc/src/Eqs/pair_coul_gauss.tex | 15 +++++++++++++++ 6 files changed, 79 insertions(+) create mode 100644 doc/src/Eqs/angle_class2_p6.tex create mode 100644 doc/src/Eqs/angle_cosine_buck6d.tex create mode 100644 doc/src/Eqs/improper_inversion_harmonic.tex create mode 100644 doc/src/Eqs/pair_agni.tex rename doc/src/Eqs/{pair_buck6d.txt => pair_buck6d.tex} (91%) create mode 100644 doc/src/Eqs/pair_coul_gauss.tex diff --git a/doc/src/Eqs/angle_class2_p6.tex b/doc/src/Eqs/angle_class2_p6.tex new file mode 100644 index 0000000000..37fd87e9ec --- /dev/null +++ b/doc/src/Eqs/angle_class2_p6.tex @@ -0,0 +1,15 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +$$ + E_{a} = K_2\left(\theta - \theta_0\right)^2 + K_3\left(\theta - \theta_0\right)^3 + K_4\left(\theta - \theta_0\right)^4 + K_5\left(\theta - \theta_0\right)^5 + K_6\left(\theta - \theta_0\right)^6 +$$ + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Eqs/angle_cosine_buck6d.tex b/doc/src/Eqs/angle_cosine_buck6d.tex new file mode 100644 index 0000000000..49be2fc8c2 --- /dev/null +++ b/doc/src/Eqs/angle_cosine_buck6d.tex @@ -0,0 +1,15 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +$$ + E = K \left[ 1 + \cos(n\theta - \theta_0)\right] +$$ + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Eqs/improper_inversion_harmonic.tex b/doc/src/Eqs/improper_inversion_harmonic.tex new file mode 100644 index 0000000000..a1607a1149 --- /dev/null +++ b/doc/src/Eqs/improper_inversion_harmonic.tex @@ -0,0 +1,15 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +$$ + E = K \left(\theta - \theta_0\right)^2 +$$ + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Eqs/pair_agni.tex b/doc/src/Eqs/pair_agni.tex new file mode 100644 index 0000000000..b9aa7882fc --- /dev/null +++ b/doc/src/Eqs/pair_agni.tex @@ -0,0 +1,18 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +\begin{eqnarray*} + F_i^u & = & \sum_t^{N_t}\alpha_t \cdot \exp\left[-\frac{\left(d_{i,t}^u\right)^2}{2l^2}\right] \\ + d_{i,t}^u & = & \left|\left| V_i^u(\eta) - V_t^u(\eta) \right|\right| \\ + V_i^u(\eta) & = & \sum_{j \neq i}\frac{r^u_{ij}}{r_{ij}} \cdot e^{-\left(\frac{r_{ij}}{\eta} \right)^2} \cdot f_d\left(r_{ij}\right) \\ + f_d\left(r_{ij}\right) & = & \frac{1}{2} \left[\cos\left(\frac{\pi r_{ij}}{R_c}\right) + 1 \right] +\end{eqnarray*} + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: diff --git a/doc/src/Eqs/pair_buck6d.txt b/doc/src/Eqs/pair_buck6d.tex similarity index 91% rename from doc/src/Eqs/pair_buck6d.txt rename to doc/src/Eqs/pair_buck6d.tex index 4888444d8c..903c0685be 100644 --- a/doc/src/Eqs/pair_buck6d.txt +++ b/doc/src/Eqs/pair_buck6d.tex @@ -1,6 +1,7 @@ \documentclass[12pt]{article} \begin{document} +\pagestyle{empty} \begin{eqnarray*} E = A e^{-\kappa r} - \frac{C}{r^6} \cdot \frac{1}{1 + D r^{14}} \qquad r < r_c \\ diff --git a/doc/src/Eqs/pair_coul_gauss.tex b/doc/src/Eqs/pair_coul_gauss.tex new file mode 100644 index 0000000000..1eb9c05a6f --- /dev/null +++ b/doc/src/Eqs/pair_coul_gauss.tex @@ -0,0 +1,15 @@ +\documentclass[12pt]{article} + +\pagestyle{empty} +\begin{document} + +$$ + E = \frac{C_{q_i q_j}}{\epsilon r_{ij}}\,\, \textrm{erf}\left(\alpha_{ij} r_{ij}\right)\quad\quad\quad r < r_c +$$ + +\end{document} + +%%% Local Variables: +%%% mode: latex +%%% TeX-master: t +%%% End: From c9fc83ef6fefbbb5dd8b94de535fe820750b1dca Mon Sep 17 00:00:00 2001 From: Anne Gunn Date: Thu, 20 Jun 2019 08:16:20 -0600 Subject: [PATCH 017/117] Move two non-equation images from Eqs folder to JPG folder --- doc/src/{Eqs => JPG}/dreiding_hbond.jpg | Bin doc/src/{Eqs => JPG}/umbrella.jpg | Bin doc/src/improper_fourier.txt | 2 +- doc/src/improper_inversion_harmonic.txt | 2 +- doc/src/improper_umbrella.txt | 2 +- doc/src/pair_hbond_dreiding.txt | 2 +- 6 files changed, 4 insertions(+), 4 deletions(-) rename doc/src/{Eqs => JPG}/dreiding_hbond.jpg (100%) rename doc/src/{Eqs => JPG}/umbrella.jpg (100%) diff --git a/doc/src/Eqs/dreiding_hbond.jpg b/doc/src/JPG/dreiding_hbond.jpg similarity index 100% rename from doc/src/Eqs/dreiding_hbond.jpg rename to doc/src/JPG/dreiding_hbond.jpg diff --git a/doc/src/Eqs/umbrella.jpg b/doc/src/JPG/umbrella.jpg similarity index 100% rename from doc/src/Eqs/umbrella.jpg rename to doc/src/JPG/umbrella.jpg diff --git a/doc/src/improper_fourier.txt b/doc/src/improper_fourier.txt index 8b2021dccd..1b569b3894 100644 --- a/doc/src/improper_fourier.txt +++ b/doc/src/improper_fourier.txt @@ -27,7 +27,7 @@ The {fourier} improper style uses the following potential: where K is the force constant and omega is the angle between the IL axis and the IJK plane: -:c,image(Eqs/umbrella.jpg) +:c,image(JPG/umbrella.jpg) If all parameter (see bellow) is not zero, the all the three possible angles will taken in account. diff --git a/doc/src/improper_inversion_harmonic.txt b/doc/src/improper_inversion_harmonic.txt index 857eaecc5f..bf114daeb0 100644 --- a/doc/src/improper_inversion_harmonic.txt +++ b/doc/src/improper_inversion_harmonic.txt @@ -28,7 +28,7 @@ where K is the force constant and omega is the angle evaluated for all three axis-plane combinations centered around the atom I. For the IL axis and the IJK plane omega looks as follows: -:c,image(Eqs/umbrella.jpg) +:c,image(JPG/umbrella.jpg) Note that the {inversion/harmonic} angle term evaluation differs to the "improper_umbrella"_improper_umbrella.html due to the cyclic diff --git a/doc/src/improper_umbrella.txt b/doc/src/improper_umbrella.txt index 6c29ec7ac5..9fe6ac07e1 100644 --- a/doc/src/improper_umbrella.txt +++ b/doc/src/improper_umbrella.txt @@ -29,7 +29,7 @@ commonly referred to as a classic inversion and used in the where K is the force constant and omega is the angle between the IL axis and the IJK plane: -:c,image(Eqs/umbrella.jpg) +:c,image(JPG/umbrella.jpg) If omega0 = 0 the potential term has a minimum for the planar structure. Otherwise it has two minima at +/- omega0, with a barrier diff --git a/doc/src/pair_hbond_dreiding.txt b/doc/src/pair_hbond_dreiding.txt index 9dd0bed87f..ec470f601f 100644 --- a/doc/src/pair_hbond_dreiding.txt +++ b/doc/src/pair_hbond_dreiding.txt @@ -46,7 +46,7 @@ Here, {r} is the radial distance between the donor (D) and acceptor (A) atoms and {theta} is the bond angle between the acceptor, the hydrogen (H) and the donor atoms: -:c,image(Eqs/dreiding_hbond.jpg) +:c,image(JPG/dreiding_hbond.jpg) These 3-body interactions can be defined for pairs of acceptor and donor atoms, based on atom types. For each donor/acceptor atom pair, From bc224bc66ed6e9fdf5a90ea7c2120600c31cbb18 Mon Sep 17 00:00:00 2001 From: Anne Gunn Date: Thu, 20 Jun 2019 09:15:40 -0600 Subject: [PATCH 018/117] Eliminate a doc build warning. pair_spin_dipole.txt was recently edited. Much content was removed and it no longer contained any internal reference links. But it still had a link anchor at the bottom. This was generating a build warning. I've removed the unused link anchor and an unneeded line separator. --- doc/src/pair_spin_dipole.txt | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/src/pair_spin_dipole.txt b/doc/src/pair_spin_dipole.txt index 2f27f91d08..0d6471e07f 100644 --- a/doc/src/pair_spin_dipole.txt +++ b/doc/src/pair_spin_dipole.txt @@ -81,9 +81,3 @@ currently supported. "fix nve/spin"_fix_nve_spin.html [Default:] none - -:line - -:link(Allen2) -[(Allen)] Allen and Tildesley, Computer Simulation of Liquids, -Clarendon Press, Oxford, 1987. From 17bfed3590524da8ba71900ad8719e0fb81744d5 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 20 Jun 2019 11:39:41 -0400 Subject: [PATCH 019/117] move non-LaTeX images from doc/src/Eqs folder to doc/src/JPG this also updates links in doc sources referencing them --- doc/src/{Eqs => JPG}/dreiding_hbond.jpg | Bin doc/src/{Eqs => JPG}/umbrella.jpg | Bin doc/src/improper_fourier.txt | 2 +- doc/src/improper_inversion_harmonic.txt | 2 +- doc/src/improper_umbrella.txt | 2 +- doc/src/pair_hbond_dreiding.txt | 2 +- 6 files changed, 4 insertions(+), 4 deletions(-) rename doc/src/{Eqs => JPG}/dreiding_hbond.jpg (100%) rename doc/src/{Eqs => JPG}/umbrella.jpg (100%) diff --git a/doc/src/Eqs/dreiding_hbond.jpg b/doc/src/JPG/dreiding_hbond.jpg similarity index 100% rename from doc/src/Eqs/dreiding_hbond.jpg rename to doc/src/JPG/dreiding_hbond.jpg diff --git a/doc/src/Eqs/umbrella.jpg b/doc/src/JPG/umbrella.jpg similarity index 100% rename from doc/src/Eqs/umbrella.jpg rename to doc/src/JPG/umbrella.jpg diff --git a/doc/src/improper_fourier.txt b/doc/src/improper_fourier.txt index 8b2021dccd..1b569b3894 100644 --- a/doc/src/improper_fourier.txt +++ b/doc/src/improper_fourier.txt @@ -27,7 +27,7 @@ The {fourier} improper style uses the following potential: where K is the force constant and omega is the angle between the IL axis and the IJK plane: -:c,image(Eqs/umbrella.jpg) +:c,image(JPG/umbrella.jpg) If all parameter (see bellow) is not zero, the all the three possible angles will taken in account. diff --git a/doc/src/improper_inversion_harmonic.txt b/doc/src/improper_inversion_harmonic.txt index 857eaecc5f..bf114daeb0 100644 --- a/doc/src/improper_inversion_harmonic.txt +++ b/doc/src/improper_inversion_harmonic.txt @@ -28,7 +28,7 @@ where K is the force constant and omega is the angle evaluated for all three axis-plane combinations centered around the atom I. For the IL axis and the IJK plane omega looks as follows: -:c,image(Eqs/umbrella.jpg) +:c,image(JPG/umbrella.jpg) Note that the {inversion/harmonic} angle term evaluation differs to the "improper_umbrella"_improper_umbrella.html due to the cyclic diff --git a/doc/src/improper_umbrella.txt b/doc/src/improper_umbrella.txt index 6c29ec7ac5..9fe6ac07e1 100644 --- a/doc/src/improper_umbrella.txt +++ b/doc/src/improper_umbrella.txt @@ -29,7 +29,7 @@ commonly referred to as a classic inversion and used in the where K is the force constant and omega is the angle between the IL axis and the IJK plane: -:c,image(Eqs/umbrella.jpg) +:c,image(JPG/umbrella.jpg) If omega0 = 0 the potential term has a minimum for the planar structure. Otherwise it has two minima at +/- omega0, with a barrier diff --git a/doc/src/pair_hbond_dreiding.txt b/doc/src/pair_hbond_dreiding.txt index 9dd0bed87f..ec470f601f 100644 --- a/doc/src/pair_hbond_dreiding.txt +++ b/doc/src/pair_hbond_dreiding.txt @@ -46,7 +46,7 @@ Here, {r} is the radial distance between the donor (D) and acceptor (A) atoms and {theta} is the bond angle between the acceptor, the hydrogen (H) and the donor atoms: -:c,image(Eqs/dreiding_hbond.jpg) +:c,image(JPG/dreiding_hbond.jpg) These 3-body interactions can be defined for pairs of acceptor and donor atoms, based on atom types. For each donor/acceptor atom pair, From 6760866f447cbc3abe90d4ff7ec42c5192e48576 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 20 Jun 2019 11:49:53 -0400 Subject: [PATCH 020/117] integrate pair styles spin/dipole/cut and spin/dipole/long properly into docs --- doc/src/Commands_pair.txt | 2 ++ doc/src/Packages_details.txt | 2 ++ doc/src/lammps.book | 1 + doc/src/pair_spin_dipole.txt | 5 ----- doc/src/pair_style.txt | 2 ++ doc/src/pairs.txt | 1 + 6 files changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/src/Commands_pair.txt b/doc/src/Commands_pair.txt index fea085b4ed..6077fad8ec 100644 --- a/doc/src/Commands_pair.txt +++ b/doc/src/Commands_pair.txt @@ -222,6 +222,8 @@ OPT. "sph/rhosum"_pair_sph_rhosum.html, "sph/taitwater"_pair_sph_taitwater.html, "sph/taitwater/morris"_pair_sph_taitwater_morris.html, +"spin/dipole/cut"_pair_spin_dipole.html, +"spin/dipole/long"_pair_spin_dipole.html, "spin/dmi"_pair_spin_dmi.html, "spin/exchange"_pair_spin_exchange.html, "spin/magelec"_pair_spin_magelec.html, diff --git a/doc/src/Packages_details.txt b/doc/src/Packages_details.txt index 1528adc420..bd5addda6f 100644 --- a/doc/src/Packages_details.txt +++ b/doc/src/Packages_details.txt @@ -911,6 +911,8 @@ the usual manner via MD. Various pair, fix, and compute styles. src/SPIN: filenames -> commands "Howto spins"_Howto_spins.html +"pair_style spin/dipole/cut"_pair_spin_dipole.html +"pair_style spin/dipole/long"_pair_spin_dipole.html "pair_style spin/dmi"_pair_spin_dmi.html "pair_style spin/exchange"_pair_spin_exchange.html "pair_style spin/magelec"_pair_spin_magelec.html diff --git a/doc/src/lammps.book b/doc/src/lammps.book index 2738c9b051..8abe9cffa1 100644 --- a/doc/src/lammps.book +++ b/doc/src/lammps.book @@ -647,6 +647,7 @@ pair_sph_lj.html pair_sph_rhosum.html pair_sph_taitwater.html pair_sph_taitwater_morris.html +pair_spin_dipole.html pair_spin_dmi.html pair_spin_exchange.html pair_spin_magelec.html diff --git a/doc/src/pair_spin_dipole.txt b/doc/src/pair_spin_dipole.txt index 2f27f91d08..01ac965be7 100644 --- a/doc/src/pair_spin_dipole.txt +++ b/doc/src/pair_spin_dipole.txt @@ -82,8 +82,3 @@ currently supported. [Default:] none -:line - -:link(Allen2) -[(Allen)] Allen and Tildesley, Computer Simulation of Liquids, -Clarendon Press, Oxford, 1987. diff --git a/doc/src/pair_style.txt b/doc/src/pair_style.txt index e305bc705d..8a35e5a467 100644 --- a/doc/src/pair_style.txt +++ b/doc/src/pair_style.txt @@ -284,6 +284,8 @@ accelerated styles exist. "sph/rhosum"_pair_sph_rhosum.html - "sph/taitwater"_pair_sph_taitwater.html - "sph/taitwater/morris"_pair_sph_taitwater_morris.html - +"spin/dipole/cut"_pair_spin_dipole.html - +"spin/dipole/long"_pair_spin_dipole.html - "spin/dmi"_pair_spin_dmi.html - "spin/exchange"_pair_spin_exchange.html - "spin/magelec"_pair_spin_magelec.html - diff --git a/doc/src/pairs.txt b/doc/src/pairs.txt index babdd2d1cc..2f63f18bad 100644 --- a/doc/src/pairs.txt +++ b/doc/src/pairs.txt @@ -105,6 +105,7 @@ Pair Styles :h1 pair_sph_rhosum pair_sph_taitwater pair_sph_taitwater_morris + pair_spin_dipole pair_spin_dmi pair_spin_exchange pair_spin_magelec From c460d05bc628de96374bdbcf9ae81dabb0c15ac2 Mon Sep 17 00:00:00 2001 From: Aidan Thompson Date: Fri, 21 Jun 2019 00:26:44 -0600 Subject: [PATCH 021/117] Added NULL initialization for snaptr --- src/SNAP/pair_snap.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index 6eb05f85a4..f9ba8922a0 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -52,6 +52,7 @@ PairSNAP::PairSNAP(LAMMPS *lmp) : Pair(lmp) beta_max = 0; beta = NULL; bispectrum = NULL; + snaptr = NULL; } /* ---------------------------------------------------------------------- */ From 7a56a4be2435d2c15264da95abca26e42dae4d20 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 21 Jun 2019 14:18:18 -0400 Subject: [PATCH 022/117] add option to pair_modify to explicitly turn off F dot r --- doc/src/pair_modify.txt | 10 ++++++++-- src/pair.cpp | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/src/pair_modify.txt b/doc/src/pair_modify.txt index 4824a3d83e..c446aa29d0 100644 --- a/doc/src/pair_modify.txt +++ b/doc/src/pair_modify.txt @@ -13,7 +13,8 @@ pair_modify command :h3 pair_modify keyword values ... :pre one or more keyword/value pairs may be listed :ulb,l -keyword = {pair} or {shift} or {mix} or {table} or {table/disp} or {tabinner} or {tabinner/disp} or {tail} or {compute} :l +keyword = {pair} or {shift} or {mix} or {table} or {table/disp} or {tabinner} +or {tabinner/disp} or {tail} or {compute} or {nofdotr} :l {pair} values = sub-style N {special} which wt1 wt2 wt3 or sub-style N {compute/tally} flag sub-style = sub-style of "pair hybrid"_pair_hybrid.html @@ -33,7 +34,8 @@ keyword = {pair} or {shift} or {mix} or {table} or {table/disp} or {tabinner} or {tabinner/disp} value = cutoff cutoff = inner cutoff at which to begin table (distance units) {tail} value = {yes} or {no} - {compute} value = {yes} or {no} :pre + {compute} value = {yes} or {no} + {nofdotr} :pre :ule [Examples:] @@ -212,6 +214,10 @@ a pair style will not work, because the "kspace_style"_kspace_style.html command requires a Kspace-compatible pair style be defined. +The {nofdotr} keyword allows to disable an optimization that computes +the global stress tensor from the total forces and atom positions rather +than from summing forces between individual pairs of atoms. + :line The {special} keyword allows to override the 1-2, 1-3, and 1-4 diff --git a/src/pair.cpp b/src/pair.cpp index 92b5d003a8..2b4863a54c 100644 --- a/src/pair.cpp +++ b/src/pair.cpp @@ -186,6 +186,10 @@ void Pair::modify_params(int narg, char **arg) else if (strcmp(arg[iarg+1],"no") == 0) compute_flag = 0; else error->all(FLERR,"Illegal pair_modify command"); iarg += 2; + } else if (strcmp(arg[iarg],"nofdotr") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal pair_modify command"); + no_virial_fdotr_compute = 1; + ++iarg; } else error->all(FLERR,"Illegal pair_modify command"); } } From f190647ab47cf930a84764b23f4fd6735ac1e1f6 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 21 Jun 2019 21:23:20 -0400 Subject: [PATCH 023/117] use snprintf() instead of sprintf() to avoid buffer overflows when copying style names --- src/atom.cpp | 8 ++++---- src/domain.cpp | 4 ++-- src/force.cpp | 28 ++++++++++++++-------------- src/input.cpp | 4 ++-- src/update.cpp | 8 ++++---- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/atom.cpp b/src/atom.cpp index a53f35d7b3..1f5d5a80c4 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -455,8 +455,8 @@ void Atom::create_avec(const char *style, int narg, char **arg, int trysuffix) if (sflag) { char estyle[256]; - if (sflag == 1) sprintf(estyle,"%s/%s",style,lmp->suffix); - else sprintf(estyle,"%s/%s",style,lmp->suffix2); + if (sflag == 1) snprintf(estyle,256,"%s/%s",style,lmp->suffix); + else snprintf(estyle,256,"%s/%s",style,lmp->suffix2); int n = strlen(estyle) + 1; atom_style = new char[n]; strcpy(atom_style,estyle); @@ -487,7 +487,7 @@ AtomVec *Atom::new_avec(const char *style, int trysuffix, int &sflag) if (lmp->suffix) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (avec_map->find(estyle) != avec_map->end()) { AtomVecCreator avec_creator = (*avec_map)[estyle]; return avec_creator(lmp); @@ -497,7 +497,7 @@ AtomVec *Atom::new_avec(const char *style, int trysuffix, int &sflag) if (lmp->suffix2) { sflag = 2; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix2); + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); if (avec_map->find(estyle) != avec_map->end()) { AtomVecCreator avec_creator = (*avec_map)[estyle]; return avec_creator(lmp); diff --git a/src/domain.cpp b/src/domain.cpp index 2e7652a434..74d7560c31 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -1734,7 +1734,7 @@ void Domain::add_region(int narg, char **arg) if (lmp->suffix_enable) { if (lmp->suffix) { char estyle[256]; - sprintf(estyle,"%s/%s",arg[1],lmp->suffix); + snprintf(estyle,256,"%s/%s",arg[1],lmp->suffix); if (region_map->find(estyle) != region_map->end()) { RegionCreator region_creator = (*region_map)[estyle]; regions[nregion] = region_creator(lmp, narg, arg); @@ -1746,7 +1746,7 @@ void Domain::add_region(int narg, char **arg) if (lmp->suffix2) { char estyle[256]; - sprintf(estyle,"%s/%s",arg[1],lmp->suffix2); + snprintf(estyle,256,"%s/%s",arg[1],lmp->suffix2); if (region_map->find(estyle) != region_map->end()) { RegionCreator region_creator = (*region_map)[estyle]; regions[nregion] = region_creator(lmp, narg, arg); diff --git a/src/force.cpp b/src/force.cpp index ed27df1215..2648358932 100644 --- a/src/force.cpp +++ b/src/force.cpp @@ -238,7 +238,7 @@ Pair *Force::new_pair(const char *style, int trysuffix, int &sflag) if (lmp->suffix) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (pair_map->find(estyle) != pair_map->end()) { PairCreator pair_creator = (*pair_map)[estyle]; return pair_creator(lmp); @@ -247,7 +247,7 @@ Pair *Force::new_pair(const char *style, int trysuffix, int &sflag) if (lmp->suffix2) { sflag = 2; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix2); + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); if (pair_map->find(estyle) != pair_map->end()) { PairCreator pair_creator = (*pair_map)[estyle]; return pair_creator(lmp); @@ -350,7 +350,7 @@ Bond *Force::new_bond(const char *style, int trysuffix, int &sflag) if (lmp->suffix) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (bond_map->find(estyle) != bond_map->end()) { BondCreator bond_creator = (*bond_map)[estyle]; return bond_creator(lmp); @@ -360,7 +360,7 @@ Bond *Force::new_bond(const char *style, int trysuffix, int &sflag) if (lmp->suffix2) { sflag = 2; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix2); + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); if (bond_map->find(estyle) != bond_map->end()) { BondCreator bond_creator = (*bond_map)[estyle]; return bond_creator(lmp); @@ -429,7 +429,7 @@ Angle *Force::new_angle(const char *style, int trysuffix, int &sflag) if (lmp->suffix) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (angle_map->find(estyle) != angle_map->end()) { AngleCreator angle_creator = (*angle_map)[estyle]; return angle_creator(lmp); @@ -439,7 +439,7 @@ Angle *Force::new_angle(const char *style, int trysuffix, int &sflag) if (lmp->suffix2) { sflag = 2; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (angle_map->find(estyle) != angle_map->end()) { AngleCreator angle_creator = (*angle_map)[estyle]; return angle_creator(lmp); @@ -509,7 +509,7 @@ Dihedral *Force::new_dihedral(const char *style, int trysuffix, int &sflag) if (lmp->suffix) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (dihedral_map->find(estyle) != dihedral_map->end()) { DihedralCreator dihedral_creator = (*dihedral_map)[estyle]; return dihedral_creator(lmp); @@ -519,7 +519,7 @@ Dihedral *Force::new_dihedral(const char *style, int trysuffix, int &sflag) if (lmp->suffix2) { sflag = 2; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix2); + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); if (dihedral_map->find(estyle) != dihedral_map->end()) { DihedralCreator dihedral_creator = (*dihedral_map)[estyle]; return dihedral_creator(lmp); @@ -588,7 +588,7 @@ Improper *Force::new_improper(const char *style, int trysuffix, int &sflag) if (lmp->suffix) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (improper_map->find(estyle) != improper_map->end()) { ImproperCreator improper_creator = (*improper_map)[estyle]; return improper_creator(lmp); @@ -598,7 +598,7 @@ Improper *Force::new_improper(const char *style, int trysuffix, int &sflag) if (lmp->suffix2) { sflag = 2; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix2); + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); if (improper_map->find(estyle) != improper_map->end()) { ImproperCreator improper_creator = (*improper_map)[estyle]; return improper_creator(lmp); @@ -671,7 +671,7 @@ KSpace *Force::new_kspace(const char *style, int trysuffix, int &sflag) if (lmp->suffix) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (kspace_map->find(estyle) != kspace_map->end()) { KSpaceCreator kspace_creator = (*kspace_map)[estyle]; return kspace_creator(lmp); @@ -681,7 +681,7 @@ KSpace *Force::new_kspace(const char *style, int trysuffix, int &sflag) if (lmp->suffix2) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix2); + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); if (kspace_map->find(estyle) != kspace_map->end()) { KSpaceCreator kspace_creator = (*kspace_map)[estyle]; return kspace_creator(lmp); @@ -735,8 +735,8 @@ void Force::store_style(char *&str, const char *style, int sflag) { if (sflag) { char estyle[256]; - if (sflag == 1) sprintf(estyle,"%s/%s",style,lmp->suffix); - else sprintf(estyle,"%s/%s",style,lmp->suffix2); + if (sflag == 1) snprintf(estyle,256,"%s/%s",style,lmp->suffix); + else snprintf(estyle,256,"%s/%s",style,lmp->suffix2); int n = strlen(estyle) + 1; str = new char[n]; strcpy(str,estyle); diff --git a/src/input.cpp b/src/input.cpp index 0111cb5738..9e0ad78d9e 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1818,11 +1818,11 @@ void Input::pair_style() if (!match && lmp->suffix_enable) { char estyle[256]; if (lmp->suffix) { - sprintf(estyle,"%s/%s",arg[0],lmp->suffix); + snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix); if (strcmp(estyle,force->pair_style) == 0) match = 1; } if (lmp->suffix2) { - sprintf(estyle,"%s/%s",arg[0],lmp->suffix2); + snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix2); if (strcmp(estyle,force->pair_style) == 0) match = 1; } } diff --git a/src/update.cpp b/src/update.cpp index 6f9c2c9a07..f5e706e354 100644 --- a/src/update.cpp +++ b/src/update.cpp @@ -316,8 +316,8 @@ void Update::create_integrate(int narg, char **arg, int trysuffix) if (sflag) { char estyle[256]; - if (sflag == 1) sprintf(estyle,"%s/%s",arg[0],lmp->suffix); - else sprintf(estyle,"%s/%s",arg[0],lmp->suffix2); + if (sflag == 1) snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix); + else snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix2); int n = strlen(estyle) + 1; integrate_style = new char[n]; strcpy(integrate_style,estyle); @@ -339,7 +339,7 @@ void Update::new_integrate(char *style, int narg, char **arg, if (lmp->suffix) { sflag = 1; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix); + snprintf(estyle,256,"%s/%s",style,lmp->suffix); if (integrate_map->find(estyle) != integrate_map->end()) { IntegrateCreator integrate_creator = (*integrate_map)[estyle]; integrate = integrate_creator(lmp, narg, arg); @@ -350,7 +350,7 @@ void Update::new_integrate(char *style, int narg, char **arg, if (lmp->suffix2) { sflag = 2; char estyle[256]; - sprintf(estyle,"%s/%s",style,lmp->suffix2); + snprintf(estyle,256,"%s/%s",style,lmp->suffix2); if (integrate_map->find(estyle) != integrate_map->end()) { IntegrateCreator integrate_creator = (*integrate_map)[estyle]; integrate = integrate_creator(lmp, narg, arg); From 58f9380c45bb92d35565d884b7ee31c6fb2a22b8 Mon Sep 17 00:00:00 2001 From: "Vishnu V. Krishnan" Date: Sat, 22 Jun 2019 19:46:21 +0530 Subject: [PATCH 024/117] Archlinux install documentation Link to #1495 --- doc/src/Install_linux.txt | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/src/Install_linux.txt b/doc/src/Install_linux.txt index ec063e7a95..9aebd30c05 100644 --- a/doc/src/Install_linux.txt +++ b/doc/src/Install_linux.txt @@ -15,7 +15,8 @@ Binaries are available for different versions of Linux: "Pre-built Fedora Linux executables"_#fedora "Pre-built EPEL Linux executables (RHEL, CentOS)"_#epel "Pre-built OpenSuse Linux executables"_#opensuse -"Gentoo Linux executable"_#gentoo :all(b) +"Gentoo Linux executable"_#gentoo +"Arch Linux build-script"_#arch :all(b) :line @@ -168,3 +169,31 @@ for details. Thanks to Nicolas Bock and Christoph Junghans (LANL) for setting up this Gentoo capability. + +:line + +Archlinux build-script :h4,link(arch) + +LAMMPS is available via Arch's unofficial Arch User repository (AUR). + +There are three scripts available, named lammps, lammps-beta and lammps-git. +They respectively package the stable, patch and git releases. + +To install, you will need to have the git package installed. You may use +any of the above names in-place of lammps. + +$ git clone https://aur.archlinux.org/lammps.git :pre +$ cd lammps :pre +$ makepkg -s :pre +# makepkg -i :pre + +To update, you may repeat the above, or change into the cloned directory, +and execute the following, after which, if there are any changes, you may +use makepkg as above. + +$ git pull :pre + +Alternatively, you may use an AUR helper to install these packages. + +Note that the AUR provides build-scripts that download the source and +the build the package on your machine. From 3b6cc29f64a201f3a9083ed1a62e49e35ef3f46f Mon Sep 17 00:00:00 2001 From: Evangelos Voyiatzis Date: Sun, 23 Jun 2019 20:16:25 +0200 Subject: [PATCH 025/117] Implementation of inner/middle/outer compute methods for lj/class2/coul/long --- src/CLASS2/pair_lj_class2_coul_long.cpp | 412 +++++++++++++++++++++++- src/CLASS2/pair_lj_class2_coul_long.h | 21 +- 2 files changed, 416 insertions(+), 17 deletions(-) diff --git a/src/CLASS2/pair_lj_class2_coul_long.cpp b/src/CLASS2/pair_lj_class2_coul_long.cpp index 85fe0152e2..7bc67a5afa 100644 --- a/src/CLASS2/pair_lj_class2_coul_long.cpp +++ b/src/CLASS2/pair_lj_class2_coul_long.cpp @@ -2,12 +2,10 @@ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov - Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. - See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ @@ -20,8 +18,12 @@ #include "comm.h" #include "force.h" #include "kspace.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" #include "neighbor.h" #include "neigh_list.h" +#include "neigh_request.h" #include "math_const.h" #include "memory.h" #include "error.h" @@ -42,6 +44,7 @@ using namespace MathConst; PairLJClass2CoulLong::PairLJClass2CoulLong(LAMMPS *lmp) : Pair(lmp) { ewaldflag = pppmflag = 1; + respa_enable = 1; writedata = 1; ftable = NULL; } @@ -196,6 +199,377 @@ void PairLJClass2CoulLong::compute(int eflag, int vflag) if (vflag_fdotr) virial_fdotr_compute(); } +/* ---------------------------------------------------------------------- */ + +void PairLJClass2CoulLong::compute_inner() +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair; + double rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double rsw; + int *ilist,*jlist,*numneigh,**firstneigh; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + + inum = list->inum_inner; + ilist = list->ilist_inner; + numneigh = list->numneigh_inner; + firstneigh = list->firstneigh_inner; + + double cut_out_on = cut_respa[0]; + double cut_out_off = cut_respa[1]; + + double cut_out_diff = cut_out_off - cut_out_on; + double cut_out_on_sq = cut_out_on*cut_out_on; + double cut_out_off_sq = cut_out_off*cut_out_off; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cut_out_off_sq) { + r2inv = 1.0/rsq; + forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul; + + jtype = type[j]; + if (rsq < cut_ljsq[itype][jtype]) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + } else forcelj = 0.0; + + fpair = (forcecoul + factor_lj*forcelj) * r2inv; + if (rsq > cut_out_on_sq) { + rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + fpair *= 1.0 + rsw*rsw*(2.0*rsw-3.0); + } + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void PairLJClass2CoulLong::compute_middle() +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fpair; + double rsq,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double rsw; + int *ilist,*jlist,*numneigh,**firstneigh; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + + inum = list->inum_middle; + ilist = list->ilist_middle; + numneigh = list->numneigh_middle; + firstneigh = list->firstneigh_middle; + + double cut_in_off = cut_respa[0]; + double cut_in_on = cut_respa[1]; + double cut_out_on = cut_respa[2]; + double cut_out_off = cut_respa[3]; + + double cut_in_diff = cut_in_on - cut_in_off; + double cut_out_diff = cut_out_off - cut_out_on; + double cut_in_off_sq = cut_in_off*cut_in_off; + double cut_in_on_sq = cut_in_on*cut_in_on; + double cut_out_on_sq = cut_out_on*cut_out_on; + double cut_out_off_sq = cut_out_off*cut_out_off; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) { + r2inv = 1.0/rsq; + forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*forcecoul; + + jtype = type[j]; + if (rsq < cut_ljsq[itype][jtype]) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + } else forcelj = 0.0; + + fpair = (forcecoul + factor_lj*forcelj) * r2inv; + if (rsq < cut_in_on_sq) { + rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; + fpair *= rsw*rsw*(3.0 - 2.0*rsw); + } + if (rsq > cut_out_on_sq) { + rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0); + } + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void PairLJClass2CoulLong::compute_outer(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + double rsw; + int *ilist,*jlist,*numneigh,**firstneigh; + double rsq; + + evdwl = ecoul = 0.0; + ev_init(eflag,vflag); + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + double cut_in_off = cut_respa[2]; + double cut_in_on = cut_respa[3]; + + double cut_in_diff = cut_in_on - cut_in_off; + double cut_in_off_sq = cut_in_off*cut_in_off; + double cut_in_on_sq = cut_in_on*cut_in_on; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - 1.0); + if (rsq > cut_in_off_sq) { + if (rsq < cut_in_on_sq) { + rsw = (r - cut_in_off)/cut_in_diff; + forcecoul += prefactor*rsw*rsw*(3.0 - 2.0*rsw); + if (factor_coul < 1.0) + forcecoul -= + (1.0-factor_coul)*prefactor*rsw*rsw*(3.0 - 2.0*rsw); + } else { + forcecoul += prefactor; + if (factor_coul < 1.0) + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype] && rsq > cut_in_off_sq) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + if (rsq < cut_in_on_sq) { + rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; + forcelj *= rsw*rsw*(3.0 - 2.0*rsw); + } + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (eflag) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ptable[itable] + fraction*dptable[itable]; + prefactor = qtmp*q[j] * table; + ecoul -= (1.0-factor_coul)*prefactor; + } + } + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (vflag) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + table = vtable[itable] + fraction*dvtable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ptable[itable] + fraction*dptable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq <= cut_in_off_sq) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + } else if (rsq <= cut_in_on_sq) { + rinv = sqrt(r2inv); + r3inv = r2inv*rinv; + r6inv = r3inv*r3inv; + forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]); + } + fpair = (forcecoul + factor_lj*forcelj) * r2inv; + } + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,ecoul,fpair,delx,dely,delz); + } + } + } +} + /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ @@ -289,10 +663,33 @@ void PairLJClass2CoulLong::init_style() if (!atom->q_flag) error->all(FLERR, "Pair style lj/class2/coul/long requires atom attribute q"); - - neighbor->request(this,instance_me); + + // request regular or rRESPA neighbor list + + int irequest; + int respa = 0; + + if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + } + + irequest = neighbor->request(this,instance_me); + + if (respa >= 1) { + neighbor->requests[irequest]->respaouter = 1; + neighbor->requests[irequest]->respainner = 1; + } + if (respa == 2) neighbor->requests[irequest]->respamiddle = 1; cut_coulsq = cut_coul * cut_coul; + + // set rRESPA cutoffs + + if (strstr(update->integrate_style,"respa") && + ((Respa *) update->integrate)->level_inner >= 0) + cut_respa = ((Respa *) update->integrate)->cutoff; + else cut_respa = NULL; // insure use of KSpace long-range solver, set g_ewald @@ -301,7 +698,7 @@ void PairLJClass2CoulLong::init_style() g_ewald = force->kspace->g_ewald; // setup force tables - if (ncoultablebits) init_tables(cut_coul,NULL); + if (ncoultablebits) init_tables(cut_coul,cut_respa); } /* ---------------------------------------------------------------------- @@ -341,6 +738,11 @@ double PairLJClass2CoulLong::init_one(int i, int j) lj3[j][i] = lj3[i][j]; lj4[j][i] = lj4[i][j]; offset[j][i] = offset[i][j]; + + // check interior rRESPA cutoff + + if (cut_respa && MIN(cut_lj[i][j],cut_coul) < cut_respa[3]) + error->all(FLERR,"Pair cutoff < Respa interior cutoff"); // compute I,J contribution to long-range tail correction // count total # of atoms of type I and J via Allreduce diff --git a/src/CLASS2/pair_lj_class2_coul_long.h b/src/CLASS2/pair_lj_class2_coul_long.h index 202aaaaa43..447191ea1f 100644 --- a/src/CLASS2/pair_lj_class2_coul_long.h +++ b/src/CLASS2/pair_lj_class2_coul_long.h @@ -2,12 +2,10 @@ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov - Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. - See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ @@ -31,7 +29,7 @@ class PairLJClass2CoulLong : public Pair { virtual void compute(int, int); virtual void settings(int, char **); void coeff(int, char **); - virtual void init_style(); + void init_style(); virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); @@ -40,6 +38,10 @@ class PairLJClass2CoulLong : public Pair { void write_data(FILE *); void write_data_all(FILE *); double single(int, int, int, int, double, double, double, double &); + + void compute_inner(); + void compute_middle(); + void compute_outer(int, int); void *extract(const char *, int &); protected: @@ -49,6 +51,7 @@ class PairLJClass2CoulLong : public Pair { double **epsilon,**sigma; double **lj1,**lj2,**lj3,**lj4,**offset; double g_ewald; + double *cut_respa; virtual void allocate(); }; @@ -59,23 +62,17 @@ class PairLJClass2CoulLong : public Pair { #endif /* ERROR/WARNING messages: - E: Illegal ... command - Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. - E: Incorrect args for pair coefficients - Self-explanatory. Check the input script or data file. - E: Pair style lj/class2/coul/long requires atom attribute q - The atom style defined does not have this attribute. - E: Pair style requires a KSpace style - No kspace style is defined. - +E: Pair cutoff < Respa interior cutoff +One or more pairwise cutoffs are too short to use with the specified +rRESPA cutoffs. */ From f31faafeec77fafddacf3b716ca7fc7ae37b3bbe Mon Sep 17 00:00:00 2001 From: Evangelos Voyiatzis Date: Sun, 23 Jun 2019 20:18:39 +0200 Subject: [PATCH 026/117] modification in the doc file of lj/class2 style --- doc/src/pair_class2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/src/pair_class2.txt b/doc/src/pair_class2.txt index 2d6b325fed..9e25560071 100644 --- a/doc/src/pair_class2.txt +++ b/doc/src/pair_class2.txt @@ -155,7 +155,7 @@ All of the lj/class2 pair styles write their information to "binary restart files"_restart.html, so pair_style and pair_coeff commands do not need to be specified in an input script that reads a restart file. -Only the {lj/class2} pair style support the use of the +Only the {lj/class2} and {lj/class2/coul/long} pair styles support the use of the {inner}, {middle}, and {outer} keywords of the "run_style respa"_run_style.html command, meaning the pairwise forces can be partitioned by distance at different levels of the rRESPA hierarchy. From 171d74f2f277896ee7e45a85d5aac775d0a06e3a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 23 Jun 2019 16:43:54 -0400 Subject: [PATCH 027/117] remove class member name clashes. Pointers contains `infile` so we rename other uses to `inpfile` --- src/MISC/fix_orient_bcc.cpp | 16 ++++++------- src/MISC/fix_orient_fcc.cpp | 16 ++++++------- src/REPLICA/neb.cpp | 8 +++---- src/REPLICA/neb.h | 2 +- src/RIGID/fix_rigid.cpp | 42 +++++++++++++++++------------------ src/RIGID/fix_rigid.h | 4 ++-- src/RIGID/fix_rigid_small.cpp | 42 +++++++++++++++++------------------ src/RIGID/fix_rigid_small.h | 4 ++-- src/SPIN/neb_spin.cpp | 8 +++---- src/SPIN/neb_spin.h | 2 +- src/read_restart.cpp | 28 +++++++++++------------ 11 files changed, 86 insertions(+), 86 deletions(-) diff --git a/src/MISC/fix_orient_bcc.cpp b/src/MISC/fix_orient_bcc.cpp index c614577933..02cce5a014 100644 --- a/src/MISC/fix_orient_bcc.cpp +++ b/src/MISC/fix_orient_bcc.cpp @@ -115,25 +115,25 @@ FixOrientBCC::FixOrientBCC(LAMMPS *lmp, int narg, char **arg) : char *result; int count; - FILE *infile = fopen(xifilename,"r"); - if (infile == NULL) error->one(FLERR,"Fix orient/bcc file open failed"); + FILE *inpfile = fopen(xifilename,"r"); + if (inpfile == NULL) error->one(FLERR,"Fix orient/bcc file open failed"); for (int i = 0; i < 4; i++) { - result = fgets(line,IMGMAX,infile); + result = fgets(line,IMGMAX,inpfile); if (!result) error->one(FLERR,"Fix orient/bcc file read failed"); count = sscanf(line,"%lg %lg %lg",&Rxi[i][0],&Rxi[i][1],&Rxi[i][2]); if (count != 3) error->one(FLERR,"Fix orient/bcc file read failed"); } - fclose(infile); + fclose(inpfile); - infile = fopen(chifilename,"r"); - if (infile == NULL) error->one(FLERR,"Fix orient/bcc file open failed"); + inpfile = fopen(chifilename,"r"); + if (inpfile == NULL) error->one(FLERR,"Fix orient/bcc file open failed"); for (int i = 0; i < 4; i++) { - result = fgets(line,IMGMAX,infile); + result = fgets(line,IMGMAX,inpfile); if (!result) error->one(FLERR,"Fix orient/bcc file read failed"); count = sscanf(line,"%lg %lg %lg",&Rchi[i][0],&Rchi[i][1],&Rchi[i][2]); if (count != 3) error->one(FLERR,"Fix orient/bcc file read failed"); } - fclose(infile); + fclose(inpfile); } MPI_Bcast(&Rxi[0][0],18,MPI_DOUBLE,0,world); diff --git a/src/MISC/fix_orient_fcc.cpp b/src/MISC/fix_orient_fcc.cpp index 5b394adde7..fc827ceb8f 100644 --- a/src/MISC/fix_orient_fcc.cpp +++ b/src/MISC/fix_orient_fcc.cpp @@ -113,25 +113,25 @@ FixOrientFCC::FixOrientFCC(LAMMPS *lmp, int narg, char **arg) : char *result; int count; - FILE *infile = fopen(xifilename,"r"); - if (infile == NULL) error->one(FLERR,"Fix orient/fcc file open failed"); + FILE *inpfile = fopen(xifilename,"r"); + if (inpfile == NULL) error->one(FLERR,"Fix orient/fcc file open failed"); for (int i = 0; i < 6; i++) { - result = fgets(line,IMGMAX,infile); + result = fgets(line,IMGMAX,inpfile); if (!result) error->one(FLERR,"Fix orient/fcc file read failed"); count = sscanf(line,"%lg %lg %lg",&Rxi[i][0],&Rxi[i][1],&Rxi[i][2]); if (count != 3) error->one(FLERR,"Fix orient/fcc file read failed"); } - fclose(infile); + fclose(inpfile); - infile = fopen(chifilename,"r"); - if (infile == NULL) error->one(FLERR,"Fix orient/fcc file open failed"); + inpfile = fopen(chifilename,"r"); + if (inpfile == NULL) error->one(FLERR,"Fix orient/fcc file open failed"); for (int i = 0; i < 6; i++) { - result = fgets(line,IMGMAX,infile); + result = fgets(line,IMGMAX,inpfile); if (!result) error->one(FLERR,"Fix orient/fcc file read failed"); count = sscanf(line,"%lg %lg %lg",&Rchi[i][0],&Rchi[i][1],&Rchi[i][2]); if (count != 3) error->one(FLERR,"Fix orient/fcc file read failed"); } - fclose(infile); + fclose(inpfile); } MPI_Bcast(&Rxi[0][0],18,MPI_DOUBLE,0,world); diff --git a/src/REPLICA/neb.cpp b/src/REPLICA/neb.cpp index 3963379356..6b68c52dbb 100644 --- a/src/REPLICA/neb.cpp +++ b/src/REPLICA/neb.cpp @@ -146,12 +146,12 @@ void NEB::command(int narg, char **arg) if (strcmp(arg[5],"final") == 0) { if (narg != 7 && narg !=8) error->universe_all(FLERR,"Illegal NEB command"); - infile = arg[6]; - readfile(infile,0); + inpfile = arg[6]; + readfile(inpfile,0); } else if (strcmp(arg[5],"each") == 0) { if (narg != 7 && narg !=8) error->universe_all(FLERR,"Illegal NEB command"); - infile = arg[6]; - readfile(infile,1); + inpfile = arg[6]; + readfile(inpfile,1); } else if (strcmp(arg[5],"none") == 0) { if (narg != 6 && narg !=7) error->universe_all(FLERR,"Illegal NEB command"); } else error->universe_all(FLERR,"Illegal NEB command"); diff --git a/src/REPLICA/neb.h b/src/REPLICA/neb.h index 9453c3c43b..f585a0c8a7 100644 --- a/src/REPLICA/neb.h +++ b/src/REPLICA/neb.h @@ -47,7 +47,7 @@ class NEB : protected Pointers { double ftol; // force tolerance convergence criterion int n1steps, n2steps; // number of steps in stage 1 and 2 int nevery; // output interval - char *infile; // name of file containing final state + char *inpfile; // name of file containing final state class FixNEB *fneb; int numall; // per-replica dimension of array all diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp index 8fa43b89ce..abb669bc02 100644 --- a/src/RIGID/fix_rigid.cpp +++ b/src/RIGID/fix_rigid.cpp @@ -59,7 +59,7 @@ enum{ISO,ANISO,TRICLINIC}; FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), step_respa(NULL), - infile(NULL), nrigid(NULL), mol2body(NULL), body2mol(NULL), + inpfile(NULL), nrigid(NULL), mol2body(NULL), body2mol(NULL), body(NULL), displace(NULL), masstotal(NULL), xcm(NULL), vcm(NULL), fcm(NULL), inertia(NULL), ex_space(NULL), ey_space(NULL), ez_space(NULL), angmom(NULL), omega(NULL), @@ -327,7 +327,7 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) : t_iter = 1; t_order = 3; p_chain = 10; - infile = NULL; + inpfile = NULL; pcouple = NONE; pstyle = ANISO; @@ -546,12 +546,12 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) : p_chain = force->inumeric(FLERR,arg[iarg+1]); iarg += 2; - } else if (strcmp(arg[iarg],"infile") == 0) { + } else if (strcmp(arg[iarg],"inpfile") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid command"); - delete [] infile; + delete [] inpfile; int n = strlen(arg[iarg+1]) + 1; - infile = new char[n]; - strcpy(infile,arg[iarg+1]); + inpfile = new char[n]; + strcpy(inpfile,arg[iarg+1]); restart_file = 1; reinitflag = 0; iarg += 2; @@ -649,7 +649,7 @@ FixRigid::~FixRigid() atom->delete_callback(id,0); delete random; - delete [] infile; + delete [] inpfile; memory->destroy(mol2body); memory->destroy(body2mol); @@ -760,14 +760,14 @@ void FixRigid::init() // setup rigid bodies, using current atom info. if reinitflag is not set, // do the initialization only once, b/c properties may not be re-computable // especially if overlapping particles. - // do not do dynamic init if read body properties from infile. - // this is b/c the infile defines the static and dynamic properties and may + // do not do dynamic init if read body properties from inpfile. + // this is b/c the inpfile defines the static and dynamic properties and may // not be computable if contain overlapping particles. - // setup_bodies_static() reads infile itself + // setup_bodies_static() reads inpfile itself if (reinitflag || !setupflag) { setup_bodies_static(); - if (!infile) setup_bodies_dynamic(); + if (!inpfile) setup_bodies_dynamic(); setupflag = 1; } @@ -1640,7 +1640,7 @@ void FixRigid::set_v() sets extended flags, masstotal, center-of-mass sets Cartesian and diagonalized inertia tensor sets body image flags - may read some properties from infile + may read some properties from inpfile ------------------------------------------------------------------------- */ void FixRigid::setup_bodies_static() @@ -1791,7 +1791,7 @@ void FixRigid::setup_bodies_static() xcm[ibody][2] = all[ibody][2]/masstotal[ibody]; } - // set vcm, angmom = 0.0 in case infile is used + // set vcm, angmom = 0.0 in case inpfile is used // and doesn't overwrite all body's values // since setup_bodies_dynamic() will not be called @@ -1810,7 +1810,7 @@ void FixRigid::setup_bodies_static() // inbody[i] = 0/1 if Ith rigid body is initialized by file int *inbody; - if (infile) { + if (inpfile) { memory->create(inbody,nbody,"rigid:inbody"); for (ibody = 0; ibody < nbody; ibody++) inbody[ibody] = 0; readfile(0,masstotal,xcm,vcm,angmom,imagebody,inbody); @@ -1918,7 +1918,7 @@ void FixRigid::setup_bodies_static() // overwrite Cartesian inertia tensor with file values - if (infile) readfile(1,NULL,all,NULL,NULL,NULL,inbody); + if (inpfile) readfile(1,NULL,all,NULL,NULL,NULL,inbody); // diagonalize inertia tensor for each body via Jacobi rotations // inertia = 3 eigenvalues = principal moments of inertia @@ -2116,11 +2116,11 @@ void FixRigid::setup_bodies_static() MPI_Allreduce(sum[0],all[0],6*nbody,MPI_DOUBLE,MPI_SUM,world); // error check that re-computed moments of inertia match diagonalized ones - // do not do test for bodies with params read from infile + // do not do test for bodies with params read from inpfile double norm; for (ibody = 0; ibody < nbody; ibody++) { - if (infile && inbody[ibody]) continue; + if (inpfile && inbody[ibody]) continue; if (inertia[ibody][0] == 0.0) { if (fabs(all[ibody][0]) > TOLERANCE) error->all(FLERR,"Fix rigid: Bad principal moments"); @@ -2149,7 +2149,7 @@ void FixRigid::setup_bodies_static() error->all(FLERR,"Fix rigid: Bad principal moments"); } - if (infile) memory->destroy(inbody); + if (inpfile) memory->destroy(inbody); } /* ---------------------------------------------------------------------- @@ -2268,10 +2268,10 @@ void FixRigid::readfile(int which, double *vec, char line[MAXLINE]; if (me == 0) { - fp = fopen(infile,"r"); + fp = fopen(inpfile,"r"); if (fp == NULL) { char str[128]; - snprintf(str,128,"Cannot open fix rigid infile %s",infile); + snprintf(str,128,"Cannot open fix rigid inpfile %s",inpfile); error->one(FLERR,str); } @@ -2371,7 +2371,7 @@ void FixRigid::readfile(int which, double *vec, /* ---------------------------------------------------------------------- write out restart info for mass, COM, inertia tensor, image flags to file - identical format to infile option, so info can be read in when restarting + identical format to inpfile option, so info can be read in when restarting only proc 0 writes list of global bodies to file ------------------------------------------------------------------------- */ diff --git a/src/RIGID/fix_rigid.h b/src/RIGID/fix_rigid.h index 507e4c7553..d9d7b07ce8 100644 --- a/src/RIGID/fix_rigid.h +++ b/src/RIGID/fix_rigid.h @@ -67,7 +67,7 @@ class FixRigid : public Fix { int triclinic; double MINUSPI,TWOPI; - char *infile; // file to read rigid body attributes from + char *inpfile; // file to read rigid body attributes from int rstyle; // SINGLE,MOLECULE,GROUP int setupflag; // 1 if body properties are setup, else 0 int earlyflag; // 1 if forces/torques computed at post_force() @@ -261,7 +261,7 @@ E: Fix rigid: Bad principal moments The principal moments of inertia computed for a rigid body are not within the required tolerances. -E: Cannot open fix rigid infile %s +E: Cannot open fix rigid inpfile %s The specified file cannot be opened. Check that the path and name are correct. diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 54fb83f0aa..dedd71c98d 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -70,7 +70,7 @@ enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), step_respa(NULL), - infile(NULL), body(NULL), bodyown(NULL), bodytag(NULL), atom2body(NULL), + inpfile(NULL), body(NULL), bodyown(NULL), bodytag(NULL), atom2body(NULL), xcmimage(NULL), displace(NULL), eflags(NULL), orient(NULL), dorient(NULL), avec_ellipsoid(NULL), avec_line(NULL), avec_tri(NULL), counts(NULL), itensor(NULL), mass_body(NULL), langextra(NULL), random(NULL), @@ -191,7 +191,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : int seed; langflag = 0; - infile = NULL; + inpfile = NULL; onemols = NULL; reinitflag = 1; @@ -232,12 +232,12 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : if (seed <= 0) error->all(FLERR,"Illegal fix rigid/small command"); iarg += 5; - } else if (strcmp(arg[iarg],"infile") == 0) { + } else if (strcmp(arg[iarg],"inpfile") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid/small command"); - delete [] infile; + delete [] inpfile; int n = strlen(arg[iarg+1]) + 1; - infile = new char[n]; - strcpy(infile,arg[iarg+1]); + inpfile = new char[n]; + strcpy(inpfile,arg[iarg+1]); restart_file = 1; reinitflag = 0; iarg += 2; @@ -546,7 +546,7 @@ FixRigidSmall::~FixRigidSmall() memory->destroy(dorient); delete random; - delete [] infile; + delete [] inpfile; memory->destroy(langextra); memory->destroy(mass_body); @@ -622,10 +622,10 @@ void FixRigidSmall::init() if reinitflag is not set, do the initialization only once, b/c properties may not be re-computable especially if overlapping particles or bodies are inserted from mol template. - do not do dynamic init if read body properties from infile. this - is b/c the infile defines the static and dynamic properties and may not + do not do dynamic init if read body properties from inpfile. this + is b/c the inpfile defines the static and dynamic properties and may not be computable if contain overlapping particles setup_bodies_static() - reads infile itself. + reads inpfile itself. cannot do this until now, b/c requires comm->setup() to have setup stencil invoke pre_neighbor() to insure body xcmimage flags are reset needed if Verlet::setup::pbc() has remapped/migrated atoms for 2nd run @@ -638,7 +638,7 @@ void FixRigidSmall::setup_pre_neighbor() setup_bodies_static(); else pre_neighbor(); - if ((reinitflag || !setupflag) && !infile) + if ((reinitflag || !setupflag) && !inpfile) setup_bodies_dynamic(); setupflag = 1; @@ -1775,7 +1775,7 @@ int FixRigidSmall::rendezvous_body(int n, char *inbuf, sets extended flags, masstotal, center-of-mass sets Cartesian and diagonalized inertia tensor sets body image flags - may read some properties from infile + may read some properties from inpfile ------------------------------------------------------------------------- */ void FixRigidSmall::setup_bodies_static() @@ -1932,7 +1932,7 @@ void FixRigidSmall::setup_bodies_static() xcm[2] /= body[ibody].mass; } - // set vcm, angmom = 0.0 in case infile is used + // set vcm, angmom = 0.0 in case inpfile is used // and doesn't overwrite all body's values // since setup_bodies_dynamic() will not be called @@ -1955,7 +1955,7 @@ void FixRigidSmall::setup_bodies_static() // inbody[i] = 0/1 if Ith rigid body is initialized by file int *inbody; - if (infile) { + if (inpfile) { memory->create(inbody,nlocal_body,"rigid/small:inbody"); for (ibody = 0; ibody < nlocal_body; ibody++) inbody[ibody] = 0; readfile(0,NULL,inbody); @@ -2058,7 +2058,7 @@ void FixRigidSmall::setup_bodies_static() // overwrite Cartesian inertia tensor with file values - if (infile) readfile(1,itensor,inbody); + if (inpfile) readfile(1,itensor,inbody); // diagonalize inertia tensor for each body via Jacobi rotations // inertia = 3 eigenvalues = principal moments of inertia @@ -2257,11 +2257,11 @@ void FixRigidSmall::setup_bodies_static() comm->reverse_comm_fix(this,6); // error check that re-computed moments of inertia match diagonalized ones - // do not do test for bodies with params read from infile + // do not do test for bodies with params read from inpfile double norm; for (ibody = 0; ibody < nlocal_body; ibody++) { - if (infile && inbody[ibody]) continue; + if (inpfile && inbody[ibody]) continue; inertia = body[ibody].inertia; if (inertia[0] == 0.0) { @@ -2295,7 +2295,7 @@ void FixRigidSmall::setup_bodies_static() // clean up memory->destroy(itensor); - if (infile) memory->destroy(inbody); + if (inpfile) memory->destroy(inbody); } /* ---------------------------------------------------------------------- @@ -2437,10 +2437,10 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) // open file and read header if (me == 0) { - fp = fopen(infile,"r"); + fp = fopen(inpfile,"r"); if (fp == NULL) { char str[128]; - snprintf(str,128,"Cannot open fix rigid/small infile %s",infile); + snprintf(str,128,"Cannot open fix rigid/small inpfile %s",inpfile); error->one(FLERR,str); } @@ -2538,7 +2538,7 @@ void FixRigidSmall::readfile(int which, double **array, int *inbody) /* ---------------------------------------------------------------------- write out restart info for mass, COM, inertia tensor to file - identical format to infile option, so info can be read in when restarting + identical format to inpfile option, so info can be read in when restarting each proc contributes info for rigid bodies it owns ------------------------------------------------------------------------- */ diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index b5a3d5208d..6dae443d1c 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -74,7 +74,7 @@ class FixRigidSmall : public Fix { int triclinic; double MINUSPI,TWOPI; - char *infile; // file to read rigid body attributes from + char *inpfile; // file to read rigid body attributes from int setupflag; // 1 if body properties are setup, else 0 int earlyflag; // 1 if forces/torques are computed at post_force() int commflag; // various modes of forward/reverse comm @@ -318,7 +318,7 @@ E: Fix rigid: Bad principal moments The principal moments of inertia computed for a rigid body are not within the required tolerances. -E: Cannot open fix rigid/small infile %s +E: Cannot open fix rigid/small inpfile %s The specified file cannot be opened. Check that the path and name are correct. diff --git a/src/SPIN/neb_spin.cpp b/src/SPIN/neb_spin.cpp index 126cfb09e3..46a0541488 100644 --- a/src/SPIN/neb_spin.cpp +++ b/src/SPIN/neb_spin.cpp @@ -139,12 +139,12 @@ void NEBSpin::command(int narg, char **arg) if (strcmp(arg[5],"final") == 0) { if (narg != 7 && narg !=8) error->universe_all(FLERR,"Illegal NEBSpin command"); - infile = arg[6]; - readfile(infile,0); + inpfile = arg[6]; + readfile(inpfile,0); } else if (strcmp(arg[5],"each") == 0) { if (narg != 7 && narg !=8) error->universe_all(FLERR,"Illegal NEBSpin command"); - infile = arg[6]; - readfile(infile,1); + inpfile = arg[6]; + readfile(inpfile,1); } else if (strcmp(arg[5],"none") == 0) { if (narg != 6 && narg !=7) error->universe_all(FLERR,"Illegal NEBSpin command"); } else error->universe_all(FLERR,"Illegal NEBSpin command"); diff --git a/src/SPIN/neb_spin.h b/src/SPIN/neb_spin.h index 5acd034e95..c128eaffa4 100644 --- a/src/SPIN/neb_spin.h +++ b/src/SPIN/neb_spin.h @@ -46,7 +46,7 @@ class NEBSpin : protected Pointers { double ttol; // torque tolerance convergence criterion int n1steps, n2steps; // number of steps in stage 1 and 2 int nevery; // output interval - char *infile; // name of file containing final state + char *inpfile; // name of file containing final state class FixNEBSpin *fneb; int numall; // per-replica dimension of array all diff --git a/src/read_restart.cpp b/src/read_restart.cpp index 5aa4622a67..6e156da837 100644 --- a/src/read_restart.cpp +++ b/src/read_restart.cpp @@ -580,32 +580,32 @@ void ReadRestart::command(int narg, char **arg) } /* ---------------------------------------------------------------------- - infile contains a "*" - search for all files which match the infile pattern + inpfile contains a "*" + search for all files which match the inpfile pattern replace "*" with latest timestep value to create outfile name search dir referenced by initial pathname of file - if infile also contains "%", use "base" when searching directory + if inpfile also contains "%", use "base" when searching directory only called by proc 0 ------------------------------------------------------------------------- */ -void ReadRestart::file_search(char *infile, char *outfile) +void ReadRestart::file_search(char *inpfile, char *outfile) { char *ptr; - // separate infile into dir + filename + // separate inpfile into dir + filename - char *dirname = new char[strlen(infile) + 1]; - char *filename = new char[strlen(infile) + 1]; + char *dirname = new char[strlen(inpfile) + 1]; + char *filename = new char[strlen(inpfile) + 1]; - if (strchr(infile,'/')) { - ptr = strrchr(infile,'/'); + if (strchr(inpfile,'/')) { + ptr = strrchr(inpfile,'/'); *ptr = '\0'; - strcpy(dirname,infile); + strcpy(dirname,inpfile); strcpy(filename,ptr+1); *ptr = '/'; } else { strcpy(dirname,"./"); - strcpy(filename,infile); + strcpy(filename,inpfile); } // if filename contains "%" replace "%" with "base" @@ -651,11 +651,11 @@ void ReadRestart::file_search(char *infile, char *outfile) if (maxnum < 0) error->one(FLERR,"Found no restart file matching pattern"); // create outfile with maxint substituted for "*" - // use original infile, not pattern, since need to retain "%" in filename + // use original inpfile, not pattern, since need to retain "%" in filename - ptr = strchr(infile,'*'); + ptr = strchr(inpfile,'*'); *ptr = '\0'; - sprintf(outfile,"%s" BIGINT_FORMAT "%s",infile,maxnum,ptr+1); + sprintf(outfile,"%s" BIGINT_FORMAT "%s",inpfile,maxnum,ptr+1); *ptr = '*'; // clean up From 3e93881e44977907fcf321e427416f765aca8b34 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 24 Jun 2019 05:28:54 -0400 Subject: [PATCH 028/117] store compute_tally array in restart and read it back. fix memory leak. --- src/pair_hybrid.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index 4dcd249ac6..eacd437118 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -254,6 +254,7 @@ void PairHybrid::settings(int narg, char **arg) delete[] multiple; delete[] special_lj; delete[] special_coul; + delete[] compute_tally; } if (allocated) { @@ -273,7 +274,6 @@ void PairHybrid::settings(int narg, char **arg) special_lj = new double*[narg]; special_coul = new double*[narg]; - compute_tally = new int[narg]; // allocate each sub-style @@ -649,6 +649,8 @@ void PairHybrid::write_restart(FILE *fp) // each sub-style writes its settings, but no coeff info + fwrite(compute_tally,sizeof(int),nstyles,fp); + int n; for (int m = 0; m < nstyles; m++) { n = strlen(keywords[m]) + 1; @@ -682,6 +684,7 @@ void PairHybrid::read_restart(FILE *fp) delete[] multiple; delete[] special_lj; delete[] special_coul; + delete[] compute_tally; styles = new Pair*[nstyles]; keywords = new char*[nstyles]; @@ -689,10 +692,14 @@ void PairHybrid::read_restart(FILE *fp) special_lj = new double*[nstyles]; special_coul = new double*[nstyles]; + compute_tally = new int[nstyles]; // each sub-style is created via new_pair() // each reads its settings, but no coeff info + if (me == 0) fread(compute_tally,sizeof(int),nstyles,fp); + MPI_Bcast(compute_tally,nstyles,MPI_INT,0,world); + int n,dummy; for (int m = 0; m < nstyles; m++) { if (me == 0) fread(&n,sizeof(int),1,fp); From 2159a8c44a45330f04283b860a6decc04db800a6 Mon Sep 17 00:00:00 2001 From: "Vishnu V. Krishnan" Date: Mon, 24 Jun 2019 16:14:29 +0530 Subject: [PATCH 029/117] Arch specific words --- doc/utils/sphinx-config/false_positives.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt index a023c5b821..9a46983539 100644 --- a/doc/utils/sphinx-config/false_positives.txt +++ b/doc/utils/sphinx-config/false_positives.txt @@ -101,6 +101,7 @@ api Appl Apu arccos +Archlinux arcsin arg args @@ -1527,6 +1528,7 @@ Makefile makefiles Makefiles makelist +makepkg Makse malloc Malolepsza From 8b49cac86a4dc1f9b484e33a744e7e41aa9055ef Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 24 Jun 2019 09:09:22 -0400 Subject: [PATCH 030/117] correct dimensionality of cut_coul property in Pair::extract() --- src/CLASS2/pair_lj_class2_coul_cut.cpp | 3 +-- src/MISC/pair_nm_cut_coul_cut.cpp | 3 +-- src/pair_coul_cut.cpp | 1 + src/pair_lj_cut_coul_cut.cpp | 3 +-- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/CLASS2/pair_lj_class2_coul_cut.cpp b/src/CLASS2/pair_lj_class2_coul_cut.cpp index dafa83c8fd..8dc038b8fc 100644 --- a/src/CLASS2/pair_lj_class2_coul_cut.cpp +++ b/src/CLASS2/pair_lj_class2_coul_cut.cpp @@ -473,9 +473,8 @@ double PairLJClass2CoulCut::single(int i, int j, int itype, int jtype, void *PairLJClass2CoulCut::extract(const char *str, int &dim) { - dim = 0; - if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul; dim = 2; + if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul; if (strcmp(str,"epsilon") == 0) return (void *) epsilon; if (strcmp(str,"sigma") == 0) return (void *) sigma; return NULL; diff --git a/src/MISC/pair_nm_cut_coul_cut.cpp b/src/MISC/pair_nm_cut_coul_cut.cpp index 6a09d579b7..a8428ab980 100644 --- a/src/MISC/pair_nm_cut_coul_cut.cpp +++ b/src/MISC/pair_nm_cut_coul_cut.cpp @@ -487,9 +487,8 @@ double PairNMCutCoulCut::single(int i, int j, int itype, int jtype, void *PairNMCutCoulCut::extract(const char *str, int &dim) { - dim = 0; - if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul; dim = 2; + if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul; if (strcmp(str,"e0") == 0) return (void *) e0; if (strcmp(str,"r0") == 0) return (void *) r0; if (strcmp(str,"nn") == 0) return (void *) nn; diff --git a/src/pair_coul_cut.cpp b/src/pair_coul_cut.cpp index e7c0e0aabb..196e064219 100644 --- a/src/pair_coul_cut.cpp +++ b/src/pair_coul_cut.cpp @@ -303,6 +303,7 @@ double PairCoulCut::single(int i, int j, int /*itype*/, int /*jtype*/, void *PairCoulCut::extract(const char *str, int &dim) { dim = 2; + if (strcmp(str,"cut_coul") == 0) return (void *) &cut; if (strcmp(str,"scale") == 0) return (void *) scale; return NULL; } diff --git a/src/pair_lj_cut_coul_cut.cpp b/src/pair_lj_cut_coul_cut.cpp index 6f2ba75309..38a6e2c431 100644 --- a/src/pair_lj_cut_coul_cut.cpp +++ b/src/pair_lj_cut_coul_cut.cpp @@ -461,9 +461,8 @@ double PairLJCutCoulCut::single(int i, int j, int itype, int jtype, void *PairLJCutCoulCut::extract(const char *str, int &dim) { - dim = 0; - if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul; dim = 2; + if (strcmp(str,"cut_coul") == 0) return (void *) &cut_coul; if (strcmp(str,"epsilon") == 0) return (void *) epsilon; if (strcmp(str,"sigma") == 0) return (void *) sigma; return NULL; From a36d2573cffef6e5cfe3b5b46d581a955941e821 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 24 Jun 2019 09:10:00 -0400 Subject: [PATCH 031/117] make certain, we are not mixing cutoff and long-range coulomb when looking cutoffs --- src/pair_hybrid.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index eacd437118..9aff8a387a 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -945,17 +945,24 @@ void *PairHybrid::extract(const char *str, int &dim) void *cutptr = NULL; void *ptr; double cutvalue = 0.0; + int couldim = -1; for (int m = 0; m < nstyles; m++) { ptr = styles[m]->extract(str,dim); if (ptr && strcmp(str,"cut_coul") == 0) { + if (cutptr && dim != couldim) + error->all(FLERR, + "Coulomb styles of pair hybrid sub-styles do not match"); double *p_newvalue = (double *) ptr; double newvalue = *p_newvalue; - if (cutptr && newvalue != cutvalue) + if (cutptr && (newvalue != cutvalue)) error->all(FLERR, "Coulomb cutoffs of pair hybrid sub-styles do not match"); - cutptr = ptr; - cutvalue = newvalue; + if (dim == 0) { + cutptr = ptr; + cutvalue = newvalue; + } + couldim = dim; } else if (ptr) return ptr; } From 826a14f54ab8bef2472ef16876706ee2f95897e5 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 24 Jun 2019 14:16:00 -0400 Subject: [PATCH 032/117] add workaround for handline include files # Conflicts: # src/input.h --- src/input.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/input.cpp b/src/input.cpp index 0111cb5738..32b6b697b1 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -78,6 +78,7 @@ Input::Input(LAMMPS *lmp, int argc, char **argv) : Pointers(lmp) echo_screen = 0; echo_log = 1; + eof_return = 0; label_active = 0; labelstr = NULL; @@ -206,6 +207,7 @@ void Input::file() MPI_Bcast(&n,1,MPI_INT,0,world); if (n == 0) { if (label_active) error->all(FLERR,"Label wasn't found in input script"); + if (eof_return) break; if (me == 0) { if (infile != stdin) { fclose(infile); @@ -1057,6 +1059,11 @@ void Input::include() error->one(FLERR,str); } infiles[nfile++] = infile; + eof_return = 1; + file(); + eof_return = 0; + nfile--; + infile = infiles[nfile-1]; } } From 7f26862f9841ebaab34961288eca4a5ba1187e9c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 24 Jun 2019 14:16:51 -0400 Subject: [PATCH 033/117] simplify nested include file handling # Conflicts: # src/input.h --- src/input.cpp | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/input.cpp b/src/input.cpp index 32b6b697b1..e5fbec6d5a 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -78,7 +78,6 @@ Input::Input(LAMMPS *lmp, int argc, char **argv) : Pointers(lmp) echo_screen = 0; echo_log = 1; - eof_return = 0; label_active = 0; labelstr = NULL; @@ -207,18 +206,7 @@ void Input::file() MPI_Bcast(&n,1,MPI_INT,0,world); if (n == 0) { if (label_active) error->all(FLERR,"Label wasn't found in input script"); - if (eof_return) break; - if (me == 0) { - if (infile != stdin) { - fclose(infile); - infile = NULL; - } - nfile--; - } - MPI_Bcast(&nfile,1,MPI_INT,0,world); - if (nfile == 0) break; - if (me == 0) infile = infiles[nfile-1]; - continue; + break; } if (n > maxline) reallocate(line,maxline,n); @@ -1059,9 +1047,8 @@ void Input::include() error->one(FLERR,str); } infiles[nfile++] = infile; - eof_return = 1; file(); - eof_return = 0; + fclose(infile); nfile--; infile = infiles[nfile-1]; } From 13237155c8f3d01be9a7b9375672d56bd2e787d5 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 24 Jun 2019 14:24:54 -0400 Subject: [PATCH 034/117] make processing of Input::file(const char* filename) more like processing an include file --- src/input.cpp | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/input.cpp b/src/input.cpp index e5fbec6d5a..644446710f 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -240,8 +240,8 @@ void Input::file() } /* ---------------------------------------------------------------------- - process all input from filename - called from library interface + process all input from file at filename + mostly called from library interface ------------------------------------------------------------------------- */ void Input::file(const char *filename) @@ -251,21 +251,30 @@ void Input::file(const char *filename) // call to file() will close filename and decrement nfile if (me == 0) { - if (nfile > 1) - error->one(FLERR,"Invalid use of library file() function"); + if (nfile == maxfile) { + maxfile++; + infiles = (FILE **) + memory->srealloc(infiles,maxfile*sizeof(FILE *),"input:infiles"); + } - if (infile && infile != stdin) fclose(infile); infile = fopen(filename,"r"); if (infile == NULL) { char str[128]; snprintf(str,128,"Cannot open input script %s",filename); error->one(FLERR,str); } - infiles[0] = infile; - nfile = 1; + infiles[nfile++] = infile; } + // process contents of file + file(); + + if (me == 0) { + fclose(infile); + nfile--; + infile = infiles[nfile-1]; + } } /* ---------------------------------------------------------------------- @@ -1047,7 +1056,13 @@ void Input::include() error->one(FLERR,str); } infiles[nfile++] = infile; - file(); + } + + // process contents of file + + file(); + + if (me == 0) { fclose(infile); nfile--; infile = infiles[nfile-1]; From 423eebda25f381c0cdd6a9e17815204c3081811c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 25 Jun 2019 05:57:48 -0400 Subject: [PATCH 035/117] restore header --- src/CLASS2/pair_lj_class2_coul_long.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/CLASS2/pair_lj_class2_coul_long.cpp b/src/CLASS2/pair_lj_class2_coul_long.cpp index 7bc67a5afa..c92c7b78f1 100644 --- a/src/CLASS2/pair_lj_class2_coul_long.cpp +++ b/src/CLASS2/pair_lj_class2_coul_long.cpp @@ -2,10 +2,12 @@ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov + Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. + See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ From 50e76ff7a1369efdb1c6238d0c937d802db10f6f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 25 Jun 2019 05:59:29 -0400 Subject: [PATCH 036/117] restore virtual keyword and empty lines in comments --- src/CLASS2/pair_lj_class2_coul_long.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/CLASS2/pair_lj_class2_coul_long.h b/src/CLASS2/pair_lj_class2_coul_long.h index 447191ea1f..50d7092541 100644 --- a/src/CLASS2/pair_lj_class2_coul_long.h +++ b/src/CLASS2/pair_lj_class2_coul_long.h @@ -2,10 +2,12 @@ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov + Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. + See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ @@ -29,7 +31,7 @@ class PairLJClass2CoulLong : public Pair { virtual void compute(int, int); virtual void settings(int, char **); void coeff(int, char **); - void init_style(); + virtual void init_style(); virtual double init_one(int, int); void write_restart(FILE *); void read_restart(FILE *); @@ -62,17 +64,28 @@ class PairLJClass2CoulLong : public Pair { #endif /* ERROR/WARNING messages: + E: Illegal ... command + Self-explanatory. Check the input script syntax and compare to the documentation for the command. You can use -echo screen as a command-line option when running LAMMPS to see the offending line. + E: Incorrect args for pair coefficients + Self-explanatory. Check the input script or data file. + E: Pair style lj/class2/coul/long requires atom attribute q + The atom style defined does not have this attribute. + E: Pair style requires a KSpace style + No kspace style is defined. + E: Pair cutoff < Respa interior cutoff + One or more pairwise cutoffs are too short to use with the specified rRESPA cutoffs. + */ From e08146c31fa887300dbba54ee01e3f69782440e5 Mon Sep 17 00:00:00 2001 From: "Dan S. Bolintineanu" Date: Tue, 25 Jun 2019 12:06:18 -0600 Subject: [PATCH 037/117] Allow twisting torque to be used without rolling torque in pair granular --- src/GRANULAR/pair_granular.cpp | 39 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/GRANULAR/pair_granular.cpp b/src/GRANULAR/pair_granular.cpp index 913f6f8b59..77b1981786 100644 --- a/src/GRANULAR/pair_granular.cpp +++ b/src/GRANULAR/pair_granular.cpp @@ -475,16 +475,12 @@ void PairGranular::compute(int eflag, int vflag) fs3 = -Ft*vtr3; } - //**************************************** - // rolling resistance - //**************************************** - - if (roll_model[itype][jtype] != ROLL_NONE) { + if (roll_model[itype][jtype] != ROLL_NONE || + twist_model[itype][jtype] != TWIST_NONE){ relrot1 = omega[i][0] - omega[j][0]; relrot2 = omega[i][1] - omega[j][1]; relrot3 = omega[i][2] - omega[j][2]; - - // rolling velocity, + // rolling velocity, // see eq. 31 of Wang et al, Particuology v 23, p 49 (2015) // this is different from the Marshall papers, // which use the Bagi/Kuhn formulation @@ -492,7 +488,12 @@ void PairGranular::compute(int eflag, int vflag) // - 0.5*((radj-radi)/radsum)*vtr1; // - 0.5*((radj-radi)/radsum)*vtr2; // - 0.5*((radj-radi)/radsum)*vtr3; + } + //**************************************** + // rolling resistance + //**************************************** + if (roll_model[itype][jtype] != ROLL_NONE) { vrl1 = Reff*(relrot2*nz - relrot3*ny); vrl2 = Reff*(relrot3*nx - relrot1*nz); vrl3 = Reff*(relrot1*ny - relrot2*nx); @@ -1231,10 +1232,10 @@ void PairGranular::write_restart(FILE *fp) fwrite(&tangential_model[i][j],sizeof(int),1,fp); fwrite(&roll_model[i][j],sizeof(int),1,fp); fwrite(&twist_model[i][j],sizeof(int),1,fp); - fwrite(&normal_coeffs[i][j],sizeof(double),4,fp); - fwrite(&tangential_coeffs[i][j],sizeof(double),3,fp); - fwrite(&roll_coeffs[i][j],sizeof(double),3,fp); - fwrite(&twist_coeffs[i][j],sizeof(double),3,fp); + fwrite(normal_coeffs[i][j],sizeof(double),4,fp); + fwrite(tangential_coeffs[i][j],sizeof(double),3,fp); + fwrite(roll_coeffs[i][j],sizeof(double),3,fp); + fwrite(twist_coeffs[i][j],sizeof(double),3,fp); fwrite(&cutoff_type[i][j],sizeof(double),1,fp); } } @@ -1261,10 +1262,10 @@ void PairGranular::read_restart(FILE *fp) fread(&tangential_model[i][j],sizeof(int),1,fp); fread(&roll_model[i][j],sizeof(int),1,fp); fread(&twist_model[i][j],sizeof(int),1,fp); - fread(&normal_coeffs[i][j],sizeof(double),4,fp); - fread(&tangential_coeffs[i][j],sizeof(double),3,fp); - fread(&roll_coeffs[i][j],sizeof(double),3,fp); - fread(&twist_coeffs[i][j],sizeof(double),3,fp); + fread(normal_coeffs[i][j],sizeof(double),4,fp); + fread(tangential_coeffs[i][j],sizeof(double),3,fp); + fread(roll_coeffs[i][j],sizeof(double),3,fp); + fread(twist_coeffs[i][j],sizeof(double),3,fp); fread(&cutoff_type[i][j],sizeof(double),1,fp); } MPI_Bcast(&normal_model[i][j],1,MPI_INT,0,world); @@ -1272,10 +1273,10 @@ void PairGranular::read_restart(FILE *fp) MPI_Bcast(&tangential_model[i][j],1,MPI_INT,0,world); MPI_Bcast(&roll_model[i][j],1,MPI_INT,0,world); MPI_Bcast(&twist_model[i][j],1,MPI_INT,0,world); - MPI_Bcast(&normal_coeffs[i][j],4,MPI_DOUBLE,0,world); - MPI_Bcast(&tangential_coeffs[i][j],3,MPI_DOUBLE,0,world); - MPI_Bcast(&roll_coeffs[i][j],3,MPI_DOUBLE,0,world); - MPI_Bcast(&twist_coeffs[i][j],3,MPI_DOUBLE,0,world); + MPI_Bcast(normal_coeffs[i][j],4,MPI_DOUBLE,0,world); + MPI_Bcast(tangential_coeffs[i][j],3,MPI_DOUBLE,0,world); + MPI_Bcast(roll_coeffs[i][j],3,MPI_DOUBLE,0,world); + MPI_Bcast(twist_coeffs[i][j],3,MPI_DOUBLE,0,world); MPI_Bcast(&cutoff_type[i][j],1,MPI_DOUBLE,0,world); } } From f1dfcaf514948935724df4b0b754adecfca3cf04 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 25 Jun 2019 14:47:21 -0600 Subject: [PATCH 038/117] WIP --- src/KOKKOS/pair_snap_kokkos.h | 25 +- src/KOKKOS/pair_snap_kokkos_impl.h | 381 +++++--- src/KOKKOS/sna_kokkos.h | 51 +- src/KOKKOS/sna_kokkos_impl.h | 1461 +++++++++++++++++----------- 4 files changed, 1183 insertions(+), 735 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index b2019879ed..8be0bf9afb 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -31,7 +31,10 @@ PairStyle(snap/kk/host,PairSNAPKokkos) namespace LAMMPS_NS { template -struct TagPairSNAP{}; +struct TagPairSNAPCompute{}; + +struct TagPairSNAPBeta{}; +struct TagPairSNAPBispectrum{}; template class PairSNAPKokkos : public PairSNAP { @@ -53,11 +56,17 @@ public: template KOKKOS_INLINE_FUNCTION - void operator() (TagPairSNAP,const typename Kokkos::TeamPolicy >::member_type& team) const; + void operator() (TagPairSNAPCompute,const typename Kokkos::TeamPolicy >::member_type& team) const; template KOKKOS_INLINE_FUNCTION - void operator() (TagPairSNAP,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT&) const; + void operator() (TagPairSNAPCompute,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT&) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy::member_type& team) const; template KOKKOS_INLINE_FUNCTION @@ -82,10 +91,14 @@ protected: SNAKokkos snaKK; // How much parallelism to use within an interaction - int vector_length; + int vector_length,team_size; + int team_scratch_size; + int thread_scratch_size; int eflag,vflag; + void compute_beta(); + void compute_bispectrum(); void allocate(); //void read_files(char *, char *); /*template @@ -117,7 +130,9 @@ inline double dist2(double* x,double* y); Kokkos::View d_radelem; // element radii Kokkos::View d_wjelem; // elements weights Kokkos::View d_coeffelem; // element bispectrum coefficients - Kokkos::View d_map; // mapping from atom types to elements + Kokkos::View d_map; // mapping from atom types to elements + Kokkos::View d_beta; // betas for all atoms in list + Kokkos::View d_bispectrum; // bispectrum components for all atoms in list typedef Kokkos::DualView tdual_fparams; tdual_fparams k_cutsq; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 0ec4ed0995..687c9dc7cb 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -186,31 +186,45 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) snaKK.nmax = max_neighs; - T_INT team_scratch_size = snaKK.size_team_scratch_arrays(); - T_INT thread_scratch_size = snaKK.size_thread_scratch_arrays(); + team_scratch_size = snaKK.size_team_scratch_arrays(); + thread_scratch_size = snaKK.size_thread_scratch_arrays(); //printf("Sizes: %i %i\n",team_scratch_size/1024,thread_scratch_size/1024); int team_size_max = Kokkos::TeamPolicy::team_size_max(*this); - int vector_length = 8; + vector_length = 8; #ifdef KOKKOS_ENABLE_CUDA - int team_size = 32;//max_neighs; + team_size = 32;//max_neighs; if (team_size*vector_length > team_size_max) team_size = team_size_max/vector_length; #else - int team_size = 1; + team_size = 1; #endif + if (beta_max < list->inum) { // TODO: no init + d_beta = Kokkos::View("PairSNAPKokkos:beta", + list->inum,ncoeff); + d_bispectrum = Kokkos::View("PairSNAPKokkos:bispectrum", + list->inum,ncoeff); + beta_max = list->inum; + } + + // compute dE_i/dB_i = beta_i for all i in list + + if (quadraticflag || eflag) + compute_bispectrum(); + compute_beta(); + EV_FLOAT ev; if (eflag) { if (neighflag == HALF) { - typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); + typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); Kokkos::parallel_reduce(policy .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) ,*this,ev); } else if (neighflag == HALFTHREAD) { - typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); + typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); Kokkos::parallel_reduce(policy .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) @@ -218,13 +232,13 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) } } else { if (neighflag == HALF) { - typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); + typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); Kokkos::parallel_for(policy .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) ,*this); } else if (neighflag == HALFTHREAD) { - typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); + typename Kokkos::TeamPolicy > policy(inum,team_size,vector_length); Kokkos::parallel_for(policy .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) @@ -232,11 +246,6 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) } } -//static int step =0; -//step++; -//if (step%10==0) -// printf(" %e %e %e %e %e (%e %e): %e\n",t1,t2,t3,t4,t5,t6,t7,t1+t2+t3+t4+t5); - if (need_dup) Kokkos::Experimental::contribute(f, dup_f); @@ -275,6 +284,153 @@ void PairSNAPKokkos::compute(int eflag_in, int vflag_in) } } +/* ---------------------------------------------------------------------- + compute beta +------------------------------------------------------------------------- */ + +template +void PairSNAPKokkos::compute_beta() +{ + // TODO: use RangePolicy instead, or thread over ncoeff? + int inum = list->inum; + typename Kokkos::TeamPolicy policy(inum,team_size,vector_length); + Kokkos::parallel_for(policy + .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) + .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) + ,*this); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPBeta,const typename Kokkos::TeamPolicy::member_type& team) const { + + const int ii = team.league_rank(); + const int i = d_ilist[ii]; + const int itype = type[i]; + const int ielem = map[itype]; + Kokkos::View> + d_coeffi(d_coeffelem,ielem,Kokkos::ALL); + + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + d_beta(ii,icoeff) = d_coeffi[icoeff+1]; + + if (quadraticflag) { + int k = ncoeff+1; + for (int icoeff = 0; icoeff < ncoeff; icoeff++) { + double bveci = d_bispectrum(ii,icoeff); + d_beta(ii,icoeff) += d_coeffi[k]*bveci; + k++; + for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { + double bvecj = d_bispectrum(ii,jcoeff); + d_beta(ii,icoeff) += d_coeffi[k]*bvecj; + d_beta(ii,jcoeff) += d_coeffi[k]*bveci; + k++; + } + } + } +} + +/* ---------------------------------------------------------------------- + compute bispectrum +------------------------------------------------------------------------- */ + +template +void PairSNAPKokkos::compute_bispectrum() +{ + int inum = list->inum; + typename Kokkos::TeamPolicy policy(inum,team_size,vector_length); + Kokkos::parallel_for(policy + .set_scratch_size(1,Kokkos::PerThread(thread_scratch_size)) + .set_scratch_size(1,Kokkos::PerTeam(team_scratch_size)) + ,*this); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairSNAPKokkos::operator() (TagPairSNAPBispectrum,const typename Kokkos::TeamPolicy::member_type& team) const { + + const int ii = team.league_rank(); + const int i = d_ilist[ii]; + SNAKokkos my_sna(snaKK,team); + const double xtmp = x(i,0); + const double ytmp = x(i,1); + const double ztmp = x(i,2); + const int itype = type[i]; + const int ielem = d_map[itype]; + const double radi = d_radelem[ielem]; + + const int num_neighs = d_numneigh[i]; + + // rij[][3] = displacements between atom I and those neighbors + // inside = indices of neighbors of I within cutoff + // wj = weights for neighbors of I within cutoff + // rcutij = cutoffs for neighbors of I within cutoff + // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi + + int ninside = 0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs), + [&] (const int jj, int& count) { + Kokkos::single(Kokkos::PerThread(team), [&] (){ + T_INT j = d_neighbors(i,jj); + const F_FLOAT dx = x(j,0) - xtmp; + const F_FLOAT dy = x(j,1) - ytmp; + const F_FLOAT dz = x(j,2) - ztmp; + + const int jtype = type(j); + const F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + const int elem_j = d_map[jtype]; + + if ( rsq < rnd_cutsq(itype,jtype) ) + count++; + }); + },ninside); + + if (team.team_rank() == 0) + Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs), + [&] (const int jj, int& offset, bool final) { + //for (int jj = 0; jj < num_neighs; jj++) { + T_INT j = d_neighbors(i,jj); + const F_FLOAT dx = x(j,0) - xtmp; + const F_FLOAT dy = x(j,1) - ytmp; + const F_FLOAT dz = x(j,2) - ztmp; + + const int jtype = type(j); + const F_FLOAT rsq = dx*dx + dy*dy + dz*dz; + const int elem_j = d_map[jtype]; + + if ( rsq < rnd_cutsq(itype,jtype) ) { + if (final) { + my_sna.rij(offset,0) = dx; + my_sna.rij(offset,1) = dy; + my_sna.rij(offset,2) = dz; + my_sna.inside[offset] = j; + my_sna.wj[offset] = d_wjelem[elem_j]; + my_sna.rcutij[offset] = (radi + d_radelem[elem_j])*rcutfac; + } + offset++; + } + }); + team.team_barrier(); + + // compute Ui, Zi, and Bi for atom I + + my_sna.compute_ui(team,ninside); + team.team_barrier(); + + my_sna.compute_zi(team); + team.team_barrier(); + + my_sna.compute_bi(team); + team.team_barrier(); + + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + d_bispectrum(ii,icoeff) = my_sna.blist[icoeff]; +} + /* ---------------------------------------------------------------------- allocate all arrays ------------------------------------------------------------------------- */ @@ -354,7 +510,7 @@ void PairSNAPKokkos::coeff(int narg, char **arg) template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAP,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT& ev) const { +void PairSNAPKokkos::operator() (TagPairSNAPCompute,const typename Kokkos::TeamPolicy >::member_type& team, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial @@ -364,12 +520,12 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const const int ii = team.league_rank(); const int i = d_ilist[ii]; SNAKokkos my_sna(snaKK,team); - const double x_i = x(i,0); - const double y_i = x(i,1); - const double z_i = x(i,2); - const int type_i = type[i]; - const int elem_i = d_map[type_i]; - const double radi = d_radelem[elem_i]; + const double xtmp = x(i,0); + const double ytmp = x(i,1); + const double ztmp = x(i,2); + const int itype = type[i]; + const int ielem = d_map[itype]; + const double radi = d_radelem[ielem]; const int num_neighs = d_numneigh[i]; @@ -379,41 +535,38 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const // rcutij = cutoffs for neighbors of I within cutoff // note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi - //Kokkos::Timer timer; int ninside = 0; Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,num_neighs), [&] (const int jj, int& count) { Kokkos::single(Kokkos::PerThread(team), [&] (){ T_INT j = d_neighbors(i,jj); - const F_FLOAT dx = x(j,0) - x_i; - const F_FLOAT dy = x(j,1) - y_i; - const F_FLOAT dz = x(j,2) - z_i; + const F_FLOAT dx = x(j,0) - xtmp; + const F_FLOAT dy = x(j,1) - ytmp; + const F_FLOAT dz = x(j,2) - ztmp; - const int type_j = type(j); + const int jtype = type(j); const F_FLOAT rsq = dx*dx + dy*dy + dz*dz; - const int elem_j = d_map[type_j]; + const int elem_j = d_map[jtype]; - if ( rsq < rnd_cutsq(type_i,type_j) ) + if ( rsq < rnd_cutsq(itype,jtype) ) count++; }); },ninside); - //t1 += timer.seconds(); timer.reset(); - if (team.team_rank() == 0) Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,num_neighs), - [&] (const int jj, int& offset, bool final){ + [&] (const int jj, int& offset, bool final) { //for (int jj = 0; jj < num_neighs; jj++) { T_INT j = d_neighbors(i,jj); - const F_FLOAT dx = x(j,0) - x_i; - const F_FLOAT dy = x(j,1) - y_i; - const F_FLOAT dz = x(j,2) - z_i; + const F_FLOAT dx = x(j,0) - xtmp; + const F_FLOAT dy = x(j,1) - ytmp; + const F_FLOAT dz = x(j,2) - ztmp; - const int type_j = type(j); + const int jtype = type(j); const F_FLOAT rsq = dx*dx + dy*dy + dz*dz; - const int elem_j = d_map[type_j]; + const int elem_j = d_map[jtype]; - if ( rsq < rnd_cutsq(type_i,type_j) ) { + if ( rsq < rnd_cutsq(itype,jtype) ) { if (final) { my_sna.rij(offset,0) = dx; my_sna.rij(offset,1) = dy; @@ -425,157 +578,85 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const offset++; } }); - - //t2 += timer.seconds(); timer.reset(); - team.team_barrier(); - // compute Ui, Zi, and Bi for atom I + + // compute Ui, Yi for atom I + my_sna.compute_ui(team,ninside); - //t3 += timer.seconds(); timer.reset(); team.team_barrier(); - my_sna.compute_zi(team); - //t4 += timer.seconds(); timer.reset(); - team.team_barrier(); - - if (quadraticflag) { - my_sna.compute_bi(team); - team.team_barrier(); - my_sna.copy_bi2bvec(team); - team.team_barrier(); - } // for neighbors of I within cutoff: - // compute dUi/drj and dBi/drj - // Fij = dEi/dRj = -dEi/dRi => add to Fi, subtract from Fj + // compute Fij = dEi/dRj = -dEi/dRi + // add to Fi, subtract from Fj + + my_sna.compute_yi(team,d_beta,ii); + team.team_barrier(); Kokkos::View> - d_coeffi(d_coeffelem,elem_i,Kokkos::ALL); + d_coeffi(d_coeffelem,ielem,Kokkos::ALL); Kokkos::parallel_for (Kokkos::TeamThreadRange(team,ninside), [&] (const int jj) { //for (int jj = 0; jj < ninside; jj++) { int j = my_sna.inside[jj]; - //Kokkos::Timer timer2; my_sna.compute_duidrj(team,&my_sna.rij(jj,0), my_sna.wj[jj],my_sna.rcutij[jj]); - //t6 += timer2.seconds(); timer2.reset(); - my_sna.compute_dbidrj(team); - //t7 += timer2.seconds(); timer2.reset(); - my_sna.copy_dbi2dbvec(team); Kokkos::single(Kokkos::PerThread(team), [&] (){ - F_FLOAT fij[3]; - fij[0] = 0.0; - fij[1] = 0.0; - fij[2] = 0.0; - - // linear contributions - - for (int k = 1; k <= ncoeff; k++) { - double bgb = d_coeffi[k]; - fij[0] += bgb*my_sna.dbvec(k-1,0); - fij[1] += bgb*my_sna.dbvec(k-1,1); - fij[2] += bgb*my_sna.dbvec(k-1,2); - } - - if (quadraticflag) { - - int k = ncoeff+1; - for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = my_sna.bvec[icoeff]; - double fack = d_coeffi[k]*bveci; - double dbvecix = my_sna.dbvec(icoeff,0); - double dbveciy = my_sna.dbvec(icoeff,1); - double dbveciz = my_sna.dbvec(icoeff,2); - fij[0] += fack*dbvecix; - fij[1] += fack*dbveciy; - fij[2] += fack*dbveciz; - k++; - for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - double facki = d_coeffi[k]*bveci; - double fackj = d_coeffi[k]*my_sna.bvec[jcoeff]; - fij[0] += facki*my_sna.dbvec(jcoeff,0)+fackj*dbvecix; - fij[1] += facki*my_sna.dbvec(jcoeff,1)+fackj*dbveciy; - fij[2] += facki*my_sna.dbvec(jcoeff,2)+fackj*dbveciz; - k++; + F_FLOAT fij[3]; + my_sna.compute_deidrj(team,fij); + + a_f(i,0) += fij[0]; + a_f(i,1) += fij[1]; + a_f(i,2) += fij[2]; + a_f(j,0) -= fij[0]; + a_f(j,1) -= fij[1]; + a_f(j,2) -= fij[2]; + + // tally global and per-atom virial contribution + + if (EVFLAG) { + if (vflag_either) { + v_tally_xyz(ev,i,j, + fij[0],fij[1],fij[2], + -my_sna.rij(jj,0),-my_sna.rij(jj,1), + -my_sna.rij(jj,2)); } } - } - - // Hard-coded ZBL potential - //const double dx = my_sna.rij(jj,0); - //const double dy = my_sna.rij(jj,1); - //const double dz = my_sna.rij(jj,2); - //const double fdivr = -1.5e6/pow(dx*dx + dy*dy + dz*dz,7.0); - //fij[0] += dx*fdivr; - //fij[1] += dy*fdivr; - //fij[2] += dz*fdivr; - - //OK - //printf("%lf %lf %lf %lf %lf %lf %lf %lf %lf SNAP-COMPARE: FIJ\n" - // ,x(i,0),x(i,1),x(i,2),x(j,0),x(j,1),x(j,2),fij[0],fij[1],fij[2] ); - a_f(i,0) += fij[0]; - a_f(i,1) += fij[1]; - a_f(i,2) += fij[2]; - a_f(j,0) -= fij[0]; - a_f(j,1) -= fij[1]; - a_f(j,2) -= fij[2]; - - // tally global and per-atom virial contribution - - if (EVFLAG) { - if (vflag_either) { - v_tally_xyz(ev,i,j, - fij[0],fij[1],fij[2], - -my_sna.rij(jj,0),-my_sna.rij(jj,1), - -my_sna.rij(jj,2)); - } - } - + }); }); - //t5 += timer.seconds(); timer.reset(); // tally energy contribution if (EVFLAG) { if (eflag_either) { - if (!quadraticflag) { - my_sna.compute_bi(team); - team.team_barrier(); - my_sna.copy_bi2bvec(team); - team.team_barrier(); - } - - // E = beta.B + 0.5*B^t.alpha.B - // coeff[k] = beta[k-1] or - // coeff[k] = alpha_ii or - // coeff[k] = alpha_ij = alpha_ji, j != i - Kokkos::single(Kokkos::PerTeam(team), [&] () { // evdwl = energy of atom I, sum over coeffs_k * Bi_k double evdwl = d_coeffi[0]; - + + // E = beta.B + 0.5*B^t.alpha.B + // linear contributions - // could use thread vector range on this loop - - for (int k = 1; k <= ncoeff; k++) - evdwl += d_coeffi[k]*my_sna.bvec[k-1]; - + + for (int icoeff = 0; icoeff < ncoeff; icoeff++) + evdwl += d_coeffi[icoeff+1]*d_bispectrum(ii,icoeff); + // quadratic contributions - + if (quadraticflag) { int k = ncoeff+1; for (int icoeff = 0; icoeff < ncoeff; icoeff++) { - double bveci = my_sna.bvec[icoeff]; + double bveci = d_bispectrum(ii,icoeff); evdwl += 0.5*d_coeffi[k++]*bveci*bveci; for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) { - evdwl += d_coeffi[k++]*bveci*my_sna.bvec[jcoeff]; + double bvecj = d_bispectrum(ii,jcoeff); + evdwl += d_coeffi[k++]*bveci*bvecj; } } } @@ -591,9 +672,9 @@ void PairSNAPKokkos::operator() (TagPairSNAP,const template template KOKKOS_INLINE_FUNCTION -void PairSNAPKokkos::operator() (TagPairSNAP,const typename Kokkos::TeamPolicy >::member_type& team) const { +void PairSNAPKokkos::operator() (TagPairSNAPCompute,const typename Kokkos::TeamPolicy >::member_type& team) const { EV_FLOAT ev; - this->template operator()(TagPairSNAP(), team, ev); + this->template operator()(TagPairSNAPCompute(), team, ev); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/sna_kokkos.h b/src/KOKKOS/sna_kokkos.h index 40e5fe0ad4..ff2541dca3 100644 --- a/src/KOKKOS/sna_kokkos.h +++ b/src/KOKKOS/sna_kokkos.h @@ -25,7 +25,11 @@ namespace LAMMPS_NS { -struct SNAKK_LOOPINDICES { +struct SNAKK_ZINDICES { + int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju; +}; + +struct SNAKK_BINDICES { int j1, j2, j; }; @@ -35,9 +39,9 @@ class SNAKokkos { public: typedef Kokkos::View t_sna_1i; typedef Kokkos::View t_sna_1d; + typedef Kokkos::View > t_sna_1d_atomic; typedef Kokkos::View t_sna_2d; typedef Kokkos::View t_sna_3d; - typedef Kokkos::View > t_sna_3d_atomic; typedef Kokkos::View t_sna_4d; typedef Kokkos::View t_sna_3d3; typedef Kokkos::View t_sna_5d; @@ -76,9 +80,10 @@ inline KOKKOS_INLINE_FUNCTION void compute_zi(const typename Kokkos::TeamPolicy::member_type& team); // ForceSNAP KOKKOS_INLINE_FUNCTION - void compute_bi(const typename Kokkos::TeamPolicy::member_type& team); // ForceSNAP + void compute_yi(const typename Kokkos::TeamPolicy::member_type& team, + const Kokkos::View &beta, const int ii); // ForceSNAP KOKKOS_INLINE_FUNCTION - void copy_bi2bvec(const typename Kokkos::TeamPolicy::member_type& team); //ForceSNAP + void compute_bi(const typename Kokkos::TeamPolicy::member_type& team); // ForceSNAP // functions for derivatives @@ -87,7 +92,7 @@ inline KOKKOS_INLINE_FUNCTION void compute_dbidrj(const typename Kokkos::TeamPolicy::member_type& team); //ForceSNAP KOKKOS_INLINE_FUNCTION - void copy_dbi2dbvec(const typename Kokkos::TeamPolicy::member_type& team); //ForceSNAP + void compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, double *); // ForceSNAP KOKKOS_INLINE_FUNCTION double compute_sfac(double, double); // add_uarraytot, compute_duarray KOKKOS_INLINE_FUNCTION @@ -114,37 +119,41 @@ inline int twojmax, diagonalstyle; // Per InFlight Particle - t_sna_3d barray; - t_sna_3d uarraytot_r, uarraytot_i; - t_sna_3d_atomic uarraytot_r_a, uarraytot_i_a; - t_sna_5d zarray_r, zarray_i; + t_sna_1d blist; + t_sna_1d ulisttot_r, ulisttot_i; + t_sna_1d_atomic ulisttot_r_a, ulisttot_i_a; + t_sna_1d zlist_r, zlist_i; // Per InFlight Interaction - t_sna_3d uarray_r, uarray_i; - - Kokkos::View bvec; + t_sna_1d ulist_r, ulist_i; + t_sna_1d ylist_r, ylist_i; // derivatives of data - Kokkos::View dbvec; - t_sna_4d duarray_r, duarray_i; - t_sna_4d dbarray; + t_sna_2d dulist_r, dulist_i; + t_sna_2d dblist; private: double rmin0, rfac0; //use indexlist instead of loops, constructor generates these - // Same accross all SNAKokkos - Kokkos::View idxj,idxj_full; - int idxj_max,idxj_full_max; + // Same across all SNAKokkos + Kokkos::View idxz; + Kokkos::View idxb; + int idxcg_max, idxu_max, idxz_max, idxb_max; + Kokkos::View idxcg_block; + Kokkos::View idxu_block; + Kokkos::View idxz_block; + Kokkos::View idxb_block; + // data for bispectrum coefficients // Same accross all SNAKokkos - t_sna_5d cgarray; + t_sna_1d cglist; t_sna_2d rootpqarray; - static const int nmaxfactorial = 167; - KOKKOS_INLINE_FUNCTION + static const double nfac_table[]; + inline double factorial(int); KOKKOS_INLINE_FUNCTION diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index c43003af97..4ca8ae4471 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -47,13 +47,13 @@ SNAKokkos::SNAKokkos(double rfac0_in, build_indexlist(); - int jdim = twojmax + 1; + int jdimpq = twojmax + 2; + rootpqarray = t_sna_2d("SNAKokkos::rootpqarray",jdimpq,jdimpq); - cgarray = t_sna_5d("SNAKokkos::cgarray",jdim,jdim,jdim,jdim,jdim); - rootpqarray = t_sna_2d("SNAKokkos::rootpqarray",jdim+1,jdim+1); + cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max); if (bzero_flag) { - bzero = Kokkos::View("sna:bzero",jdim); + bzero = Kokkos::View("sna:bzero",twojmax+1); auto h_bzero = Kokkos::create_mirror_view(bzero); double www = wself*wself*wself; @@ -77,11 +77,17 @@ SNAKokkos::SNAKokkos(const SNAKokkos& sna, const typenam ncoeff = sna.ncoeff; nmax = sna.nmax; - idxj = sna.idxj; - idxj_max = sna.idxj_max; - idxj_full = sna.idxj_full; - idxj_full_max = sna.idxj_full_max; - cgarray = sna.cgarray; + idxz = sna.idxz; + idxb = sna.idxb; + idxcg_max = sna.idxcg_max; + idxu_max = sna.idxu_max; + idxz_max = sna.idxz_max; + idxb_max = sna.idxb_max; + idxcg_block = sna.idxcg_block; + idxu_block = sna.idxu_block; + idxz_block = sna.idxz_block; + idxb_block = sna.idxb_block; + cglist = sna.cglist; rootpqarray = sna.rootpqarray; bzero = sna.bzero; create_team_scratch_arrays(team); @@ -100,47 +106,133 @@ template inline void SNAKokkos::build_indexlist() { - int idxj_count = 0; - int idxj_full_count = 0; + // index list for cglist + + int jdim = twojmax + 1; + idxcg_block = Kokkos::View("SNAKokkos::idxcg_block",jdim,jdim,jdim); + auto h_idxcg_block = Kokkos::create_mirror_view(idxcg_block); + + int idxcg_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { + h_idxcg_block(j1,j2,j) = idxcg_count; + for (int m1 = 0; m1 <= j1; m1++) + for (int m2 = 0; m2 <= j2; m2++) + idxcg_count++; + } + idxcg_max = idxcg_count; + Kokkos::deep_copy(idxcg_block,h_idxcg_block); + + // index list for uarray + // need to include both halves + + idxu_block = Kokkos::View("SNAKokkos::idxu_block",jdim); + auto h_idxu_block = Kokkos::create_mirror_view(idxu_block); + + int idxu_count = 0; + + for(int j = 0; j <= twojmax; j++) { + h_idxu_block[j] = idxu_count; + for(int mb = 0; mb <= j; mb++) + for(int ma = 0; ma <= j; ma++) + idxu_count++; + } + idxu_max = idxu_count; + Kokkos::deep_copy(idxu_block,h_idxu_block); + + // index list for beta and B + + int idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) idxb_count++; + + idxb_max = idxb_count; + idxb = Kokkos::View("SNAKokkos::idxb",idxb_max); + auto h_idxb = Kokkos::create_mirror_view(idxb); + + idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) { + h_idxb[idxb_count].j1 = j1; + h_idxb[idxb_count].j2 = j2; + h_idxb[idxb_count].j = j; + idxb_count++; + } + Kokkos::deep_copy(idxb,h_idxb); + + // reverse index list for beta and b + + idxb_block = Kokkos::View("SNAKokkos::idxb_block",jdim,jdim,jdim); + auto h_idxb_block = Kokkos::create_mirror_view(idxb_block); + + idxb_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { + if (j >= j1) { + h_idxb_block(j1,j2,j) = idxb_count; + idxb_count++; + } + } + Kokkos::deep_copy(idxb_block,h_idxb_block); + + // index list for zlist + + int idxz_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - if (j >= j1) idxj_count++; - idxj_full_count++; - } - - // indexList can be changed here - - idxj = Kokkos::View("SNAKokkos::idxj",idxj_count); - idxj_full = Kokkos::View("SNAKokkos::idxj_full",idxj_full_count); - auto h_idxj = Kokkos::create_mirror_view(idxj); - auto h_idxj_full = Kokkos::create_mirror_view(idxj_full); - - idxj_max = idxj_count; - idxj_full_max = idxj_full_count; - - idxj_count = 0; - idxj_full_count = 0; + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) + idxz_count++; + + idxz_max = idxz_count; + idxz = Kokkos::View("SNAKokkos::idxz",idxz_max); + auto h_idxz = Kokkos::create_mirror_view(idxz); + idxz_block = Kokkos::View("SNAKokkos::idxz_block", jdim,jdim,jdim); + auto h_idxz_block = Kokkos::create_mirror_view(idxz_block); + + idxz_count = 0; for(int j1 = 0; j1 <= twojmax; j1++) for(int j2 = 0; j2 <= j1; j2++) - for(int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) { - if (j >= j1) { - h_idxj[idxj_count].j1 = j1; - h_idxj[idxj_count].j2 = j2; - h_idxj[idxj_count].j = j; - idxj_count++; - } - h_idxj_full[idxj_full_count].j1 = j1; - h_idxj_full[idxj_full_count].j2 = j2; - h_idxj_full[idxj_full_count].j = j; - idxj_full_count++; - } - Kokkos::deep_copy(idxj,h_idxj); - Kokkos::deep_copy(idxj_full,h_idxj_full); + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { + h_idxz_block(j1,j2,j) = idxz_count; + // find right beta(ii,jjb) entry + // multiply and divide by j+1 factors + // account for multiplicity of 1, 2, or 3 + + for (int mb = 0; 2*mb <= j; mb++) + for (int ma = 0; ma <= j; ma++) { + h_idxz[idxz_count].j1 = j1; + h_idxz[idxz_count].j2 = j2; + h_idxz[idxz_count].j = j; + h_idxz[idxz_count].ma1min = MAX(0, (2 * ma - j - j2 + j1) / 2); + h_idxz[idxz_count].ma2max = (2 * ma - j - (2 * h_idxz[idxz_count].ma1min - j1) + j2) / 2; + h_idxz[idxz_count].na = MIN(j1, (2 * ma - j + j2 + j1) / 2) - h_idxz[idxz_count].ma1min + 1; + h_idxz[idxz_count].mb1min = MAX(0, (2 * mb - j - j2 + j1) / 2); + h_idxz[idxz_count].mb2max = (2 * mb - j - (2 * h_idxz[idxz_count].mb1min - j1) + j2) / 2; + h_idxz[idxz_count].nb = MIN(j1, (2 * mb - j + j2 + j1) / 2) - h_idxz[idxz_count].mb1min + 1; + + // apply to z(j1,j2,j,ma,mb) to unique element of y(j) + + const int jju = idxu_block[j] + (j+1)*mb + ma; + h_idxz[idxz_count].jju = jju; + + idxz_count++; + } + } + Kokkos::deep_copy(idxz,h_idxz); + Kokkos::deep_copy(idxz_block,h_idxz_block); } + /* ---------------------------------------------------------------------- */ template @@ -166,6 +258,7 @@ template KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::member_type& team, int jnum) { + //printf("jnum %i\n",jnum); double rsq, r, x, y, z, z0, theta0; // utot(j,ma,mb) = 0 for all j,ma,ma @@ -211,93 +304,234 @@ template KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_zi(const typename Kokkos::TeamPolicy::member_type& team) { - // for j1 = 0,...,twojmax - // for j2 = 0,twojmax - // for j = |j1-j2|,Min(twojmax,j1+j2),2 - // for ma = 0,...,j - // for mb = 0,...,jmid - // z(j1,j2,j,ma,mb) = 0 - // for ma1 = Max(0,ma+(j1-j2-j)/2),Min(j1,ma+(j1+j2-j)/2) - // sumb1 = 0 - // ma2 = ma-ma1+(j1+j2-j)/2; - // for mb1 = Max(0,mb+(j1-j2-j)/2),Min(j1,mb+(j1+j2-j)/2) - // mb2 = mb-mb1+(j1+j2-j)/2; - // sumb1 += cg(j1,mb1,j2,mb2,j) * - // u(j1,ma1,mb1) * u(j2,ma2,mb2) - // z(j1,j2,j,ma,mb) += sumb1*cg(j1,ma1,j2,ma2,j) + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxz_max), + [&] (const int& jjz) { + //for(int jjz = 0; jjz < idxz_max; jjz++) { + const int j1 = idxz[jjz].j1; + const int j2 = idxz[jjz].j2; + const int j = idxz[jjz].j; + const int ma1min = idxz[jjz].ma1min; + const int ma2max = idxz[jjz].ma2max; + const int na = idxz[jjz].na; + const int mb1min = idxz[jjz].mb1min; + const int mb2max = idxz[jjz].mb2max; + const int nb = idxz[jjz].nb; -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif + const double* cgblock = cglist.data() + idxcg_block(j1,j2,j); - // compute_dbidrj() requires full j1/j2/j chunk of z elements - // use zarray j1/j2 symmetry + zlist_r[jjz] = 0.0; + zlist_i[jjz] = 0.0; - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxj_full_max), - [&] (const int& idx) { - const int j1 = idxj_full(idx).j1; - const int j2 = idxj_full(idx).j2; - const int j = idxj_full(idx).j; + int jju1 = idxu_block[j1] + (j1+1)*mb1min; + int jju2 = idxu_block[j2] + (j2+1)*mb2max; + int icgb = mb1min*(j2+1) + mb2max; + for(int ib = 0; ib < nb; ib++) { - const int bound = (j+2)/2; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,(j+1)*bound), - [&] (const int mbma ) { - //for(int mb = 0; 2*mb <= j; mb++) - //for(int ma = 0; ma <= j; ma++) { - const int ma = mbma%(j+1); - const int mb = mbma/(j+1); + double suma1_r = 0.0; + double suma1_i = 0.0; - //zarray_r(j1,j2,j,ma,mb) = 0.0; - //zarray_i(j1,j2,j,ma,mb) = 0.0; - double z_r = 0.0; - double z_i = 0.0; + const double* u1_r = ulisttot_r.data() + jju1; + const double* u1_i = ulisttot_i.data() + jju1; + const double* u2_r = ulisttot_r.data() + jju2; + const double* u2_i = ulisttot_i.data() + jju2; - for(int ma1 = MAX(0, (2 * ma - j - j2 + j1) / 2); - ma1 <= MIN(j1, (2 * ma - j + j2 + j1) / 2); ma1++) { - double sumb1_r = 0.0; - double sumb1_i = 0.0; + int ma1 = ma1min; + int ma2 = ma2max; + int icga = ma1min*(j2+1) + ma2max; + for(int ia = 0; ia < na; ia++) { + suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]); + suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); + ma1++; + ma2--; + icga += j2; + } // end loop over ia - const int ma2 = (2 * ma - j - (2 * ma1 - j1) + j2) / 2; + zlist_r[jjz] += cgblock[icgb] * suma1_r; + zlist_i[jjz] += cgblock[icgb] * suma1_i; + //printf("%i %i %i %g %g\n",j1,j2,j,cgblock[icgb],suma1_r); + jju1 += j1+1; + jju2 -= j2+1; + icgb += j2; + } // end loop over ib - for(int mb1 = MAX( 0, (2 * mb - j - j2 + j1) / 2); - mb1 <= MIN(j1, (2 * mb - j + j2 + j1) / 2); mb1++) { - - const int mb2 = (2 * mb - j - (2 * mb1 - j1) + j2) / 2; - const double cga = cgarray(j1,j2,j,mb1,mb2); - const double uat1_r = uarraytot_r(j1,ma1,mb1); - const double uat1_i = uarraytot_i(j1,ma1,mb1); - const double uat2_r = uarraytot_r(j2,ma2,mb2); - const double uat2_i = uarraytot_i(j2,ma2,mb2); - sumb1_r += cga * (uat1_r * uat2_r - uat1_i * uat2_i); - sumb1_i += cga * (uat1_r * uat2_i + uat1_i * uat2_r); - /*sumb1_r += cgarray(j1,j2,j,mb1,mb2) * - (uarraytot_r(j1,ma1,mb1) * uarraytot_r(j2,ma2,mb2) - - uarraytot_i(j1,ma1,mb1) * uarraytot_i(j2,ma2,mb2)); - sumb1_i += cgarray(j1,j2,j,mb1,mb2) * - (uarraytot_r(j1,ma1,mb1) * uarraytot_i(j2,ma2,mb2) + - uarraytot_i(j1,ma1,mb1) * uarraytot_r(j2,ma2,mb2));*/ - } // end loop over mb1 - - const double cga = cgarray(j1,j2,j,ma1,ma2); - z_r += sumb1_r * cga;//rray(j1,j2,j,ma1,ma2); - z_i += sumb1_i * cga;//rray(j1,j2,j,ma1,ma2); - } // end loop over ma1 - zarray_r(j1,j2,j,mb,ma) = z_r; - zarray_i(j1,j2,j,mb,ma) = z_i; - }); // end loop over ma, mb - // } - //} - }); - //} // end loop over j - //} // end loop over j1, j2 - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[1] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif + }); // end loop over jjz } +/* ---------------------------------------------------------------------- + compute Yi from Ui without storing Zi, looping over zlist indices +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void SNAKokkos::compute_yi(const typename Kokkos::TeamPolicy::member_type& team, + const Kokkos::View &beta, const int ii) +{ + int j; + int jjz; + int jju; + double betaj; + + { + double* const ptr = ylist_r.data(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist_r.span()), + [&] (const int& i) { + ptr[i] = 0.0; + }); + } + { + double* const ptr = ylist_i.data(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,ylist_i.span()), + [&] (const int& i) { + ptr[i] = 0.0; + }); + } + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,idxz_max), + [&] (const int& jjz) { + //for(int jjz = 0; jjz < idxz_max; jjz++) { + const int j1 = idxz[jjz].j1; + const int j2 = idxz[jjz].j2; + const int j = idxz[jjz].j; + const int ma1min = idxz[jjz].ma1min; + const int ma2max = idxz[jjz].ma2max; + const int na = idxz[jjz].na; + const int mb1min = idxz[jjz].mb1min; + const int mb2max = idxz[jjz].mb2max; + const int nb = idxz[jjz].nb; + + const double* cgblock = cglist.data() + idxcg_block(j1,j2,j); + int mb = (2 * (mb1min+mb2max) - j1 - j2 + j) / 2; + int ma = (2 * (ma1min+ma2max) - j1 - j2 + j) / 2; + + double ztmp_r = 0.0; + double ztmp_i = 0.0; + + int jju1 = idxu_block[j1] + (j1+1)*mb1min; + int jju2 = idxu_block[j2] + (j2+1)*mb2max; + int icgb = mb1min*(j2+1) + mb2max; + for(int ib = 0; ib < nb; ib++) { + + double suma1_r = 0.0; + double suma1_i = 0.0; + + const double* u1_r = ulisttot_r.data() + jju1; + const double* u1_i = ulisttot_i.data() + jju1; + const double* u2_r = ulisttot_r.data() + jju2; + const double* u2_i = ulisttot_i.data() + jju2; + + int ma1 = ma1min; + int ma2 = ma2max; + int icga = ma1min*(j2+1) + ma2max; + + for(int ia = 0; ia < na; ia++) { + suma1_r += cgblock[icga] * (u1_r[ma1] * u2_r[ma2] - u1_i[ma1] * u2_i[ma2]); + suma1_i += cgblock[icga] * (u1_r[ma1] * u2_i[ma2] + u1_i[ma1] * u2_r[ma2]); ma1++; + ma2--; + icga += j2; + } // end loop over ia + + ztmp_r += cgblock[icgb] * suma1_r; + ztmp_i += cgblock[icgb] * suma1_i; + jju1 += j1+1; + jju2 -= j2+1; + icgb += j2; + } // end loop over ib + + // apply to z(j1,j2,j,ma,mb) to unique element of y(j) + // find right y_list[jju] and beta(ii,jjb) entries + // multiply and divide by j+1 factors + // account for multiplicity of 1, 2, or 3 + + const int jju = idxz[jjz].jju; + + // pick out right beta value + + if (j >= j1) { + const int jjb = idxb_block(j1,j2,j); + if (j1 == j) { + if (j2 == j) betaj = 3*beta(ii,jjb); + else betaj = 2*beta(ii,jjb); + } else betaj = beta(ii,jjb); + } else if (j >= j2) { + const int jjb = idxb_block(j,j2,j1); + if (j2 == j) betaj = 2*beta(ii,jjb)*(j1+1)/(j+1.0); + else betaj = beta(ii,jjb)*(j1+1)/(j+1.0); + } else { + const int jjb = idxb_block(j2,j,j1); + betaj = beta(ii,jjb)*(j1+1)/(j+1.0); + } + + ylist_r[jju] += betaj*ztmp_r; + ylist_i[jju] += betaj*ztmp_i; + //printf("yi %i %g %g\n",jju,ylist_r[jju],ylist_i[jju]); + + }); // end loop over jjz +} + +/* ---------------------------------------------------------------------- + compute dEidRj +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void SNAKokkos::compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, double* dedr) +{ + + for(int k = 0; k < 3; k++) + dedr[k] = 0.0; + + // TODO: which loop is faster to parallelize? + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,twojmax+1), + [&] (const int& j) { + //for(int j = 0; j <= twojmax; j++) { + int jju = idxu_block[j]; + + for(int mb = 0; 2*mb < j; mb++) + for(int ma = 0; ma <= j; ma++) { + + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + + for(int k = 0; k < 3; k++) + dedr[k] += + dulist_r(jju,k) * jjjmambyarray_r + + dulist_i(jju,k) * jjjmambyarray_i; + jju++; + } //end loop over ma mb + + // For j even, handle middle column + + if (j%2 == 0) { + + int mb = j/2; + for(int ma = 0; ma < mb; ma++) { + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + + for(int k = 0; k < 3; k++) + dedr[k] += + dulist_r(jju,k) * jjjmambyarray_r + + dulist_i(jju,k) * jjjmambyarray_i; + jju++; + } + + int ma = mb; + double jjjmambyarray_r = ylist_r[jju]; + double jjjmambyarray_i = ylist_i[jju]; + + for(int k = 0; k < 3; k++) + dedr[k] += + (dulist_r(jju,k) * jjjmambyarray_r + + dulist_i(jju,k) * jjjmambyarray_i)*0.5; + } // end if jeven + + }); // end loop over j + + for(int k = 0; k < 3; k++) + dedr[k] *= 2.0; + + //printf("dedr %g %g %g\n",dedr[0],dedr[1],dedr[2]); +} /* ---------------------------------------------------------------------- compute Bi by summing conj(Ui)*Zi @@ -316,31 +550,33 @@ void SNAKokkos::compute_bi(const typename Kokkos::TeamPolicy::compute_bi(const typename Kokkos::TeamPolicy -KOKKOS_INLINE_FUNCTION -void SNAKokkos::copy_bi2bvec(const typename Kokkos::TeamPolicy::member_type& team) -{ - /* int ncount, j1, j2, j; - - ncount = 0; - - for(j1 = 0; j1 <= twojmax; j1++) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) {*/ - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,idxj_max), - [&] (const int& JJ) { - //for(int JJ = 0; JJ < idxj_max; JJ++) { - const int j1 = idxj[JJ].j1; - const int j2 = idxj[JJ].j2; - const int j = idxj[JJ].j; - bvec(JJ) = barray(j1,j2,j); - //ncount++; - }); -} - -/* ---------------------------------------------------------------------- - calculate derivative of Ui w.r.t. atom j -------------------------------------------------------------------------- */ - -template -KOKKOS_INLINE_FUNCTION -void SNAKokkos::compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, - double* rij, double wj, double rcut) -{ - double rsq, r, x, y, z, z0, theta0, cs, sn; - double dz0dr; - - x = rij[0]; - y = rij[1]; - z = rij[2]; - rsq = x * x + y * y + z * z; - r = sqrt(rsq); - double rscale0 = rfac0 * MY_PI / (rcut - rmin0); - theta0 = (r - rmin0) * rscale0; - cs = cos(theta0); - sn = sin(theta0); - z0 = r * cs / sn; - dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &starttime); -#endif - - compute_duarray(team, x, y, z, z0, r, dz0dr, wj, rcut); - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[3] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - } /* ---------------------------------------------------------------------- @@ -478,21 +649,18 @@ void SNAKokkos::compute_dbidrj(const typename Kokkos::TeamPolicy::compute_dbidrj(const typename Kokkos::TeamPolicy dbdr,sumzdu_r; // Sum terms Conj(dudr(j,ma,mb))*z(j1,j2,j,ma,mb) - // use zarray j1/j2 symmetry (optional) - - int j_,j1_,j2_; - if (j1 >= j2) { - //jjjzarray_r = &zarray_r(j1,j2,j); - //jjjzarray_i = &zarray_i(j1,j2,j); - j1_ = j1; - j2_ = j2; - j_ = j; - } else { - j1_ = j2; - j2_ = j1; - j_ = j; - //jjjzarray_r = &zarray_r(j2,j1,j); - //jjjzarray_i = &zarray_i(j2,j1,j); - } + int jjz = idxz_block(j1,j2,j); + int jju = idxu_block[j]; for(int mb = 0; 2*mb < j; mb++) for(int ma = 0; ma <= j; ma++) { - - dudr_r = &duarray_r(j,mb,ma,0); - dudr_i = &duarray_i(j,mb,ma,0); - jjjmambzarray_r = zarray_r(j1_,j2_,j_,mb,ma); - jjjmambzarray_i = zarray_i(j1_,j2_,j_,mb,ma); - sumzdu_r.x += (dudr_r[0] * jjjmambzarray_r + dudr_i[0] * jjjmambzarray_i); - sumzdu_r.y += (dudr_r[1] * jjjmambzarray_r + dudr_i[1] * jjjmambzarray_i); - sumzdu_r.z += (dudr_r[2] * jjjmambzarray_r + dudr_i[2] * jjjmambzarray_i); - + const int jju_index = jju+mb*(j+1)+ma; + const int jjz_index = jjz+mb*(j+1)+ma; + sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); + sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); + sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); } //end loop over ma mb // For j even, handle middle column @@ -535,14 +685,19 @@ void SNAKokkos::compute_dbidrj(const typename Kokkos::TeamPolicy::compute_dbidrj(const typename Kokkos::TeamPolicy= j2) { - j1_ = j; - j2_ = j2; - j_ = j1; - - //jjjzarray_r = zarray_r(j,j2,j1); - //jjjzarray_i = zarray_i(j,j2,j1); - } else { - j1_ = j2; - j2_ = j; - j_ = j1; - //jjjzarray_r = zarray_r(j2,j,j1); - //jjjzarray_i = zarray_i(j2,j,j1); - } - - for(int mb1 = 0; 2*mb1 < j1; mb1++) - for(int ma1 = 0; ma1 <= j1; ma1++) { - - dudr_r = &duarray_r(j1,mb1,ma1,0); - dudr_i = &duarray_i(j1,mb1,ma1,0); - jjjmambzarray_r = zarray_r(j1_,j2_,j_,mb1,ma1); - jjjmambzarray_i = zarray_i(j1_,j2_,j_,mb1,ma1); - sumzdu_r.x += (dudr_r[0] * jjjmambzarray_r + dudr_i[0] * jjjmambzarray_i); - sumzdu_r.y += (dudr_r[1] * jjjmambzarray_r + dudr_i[1] * jjjmambzarray_i); - sumzdu_r.z += (dudr_r[2] * jjjmambzarray_r + dudr_i[2] * jjjmambzarray_i); + for(int mb = 0; 2*mb < j1; mb++) + for(int ma = 0; ma <= j1; ma++) { + const int jju_index = jju+mb*(j1+1)+ma; + const int jjz_index = jjz+mb*(j1+1)+ma; + sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); + sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); + sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); } //end loop over ma1 mb1 // For j1 even, handle middle column if (j1%2 == 0) { - const int mb1 = j1/2; - for(int ma1 = 0; ma1 <= mb1; ma1++) { - dudr_r = &duarray_r(j1,mb1,ma1,0); - dudr_i = &duarray_i(j1,mb1,ma1,0); - const double factor = ma1==mb1?0.5:1.0; - jjjmambzarray_r = zarray_r(j1_,j2_,j_,mb1,ma1) * factor; - jjjmambzarray_i = zarray_i(j1_,j2_,j_,mb1,ma1) * factor; - sumzdu_r.x += (dudr_r[0] * jjjmambzarray_r + dudr_i[0] * jjjmambzarray_i); - sumzdu_r.y += (dudr_r[1] * jjjmambzarray_r + dudr_i[1] * jjjmambzarray_i); - sumzdu_r.z += (dudr_r[2] * jjjmambzarray_r + dudr_i[2] * jjjmambzarray_i); + const int mb = j1/2; + for(int ma = 0; ma <= mb; ma++) { + const int jju_index = jju+(mb-1)*(j1+1)+(j1+1)+ma; + const int jjz_index = jjz+(mb-1)*(j1+1)+(j1+1)+ma; + sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); + sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); + sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); + } + int ma = mb; + const int jju_index = jju+(mb-1)*(j1+1)+(j1+1)+ma; + const int jjz_index = jjz+(mb-1)*(j1+1)+(j1+1)+ma; + for(int k = 0; k < 3; k++) { + sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz] + dulist_i(jju_index,0) * zlist_i[jjz_index])*0.5; + sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz] + dulist_i(jju_index,1) * zlist_i[jjz_index])*0.5; + sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz] + dulist_i(jju_index,2) * zlist_i[jjz_index])*0.5; } } // end if j1even @@ -605,94 +748,74 @@ void SNAKokkos::compute_dbidrj(const typename Kokkos::TeamPolicy= j) { - j1_ = j1; - j2_ = j; - j_ = j2; - //jjjzarray_r = zarray_r(j1,j,j2); - //jjjzarray_i = zarray_i(j1,j,j2); - } else { - j1_ = j; - j2_ = j1; - j_ = j2; - //jjjzarray_r = zarray_r(j,j1,j2); - //jjjzarray_i = zarray_i(j,j1,j2); - } - - for(int mb2 = 0; 2*mb2 < j2; mb2++) - for(int ma2 = 0; ma2 <= j2; ma2++) { - - dudr_r = &duarray_r(j2,mb2,ma2,0); - dudr_i = &duarray_i(j2,mb2,ma2,0); - jjjmambzarray_r = zarray_r(j1_,j2_,j_,mb2,ma2); - jjjmambzarray_i = zarray_i(j1_,j2_,j_,mb2,ma2); - sumzdu_r.x += (dudr_r[0] * jjjmambzarray_r + dudr_i[0] * jjjmambzarray_i); - sumzdu_r.y += (dudr_r[1] * jjjmambzarray_r + dudr_i[1] * jjjmambzarray_i); - sumzdu_r.z += (dudr_r[2] * jjjmambzarray_r + dudr_i[2] * jjjmambzarray_i); + for(int mb = 0; 2*mb < j2; mb++) + for(int ma = 0; ma <= j2; ma++) { + const int jju_index = jju+mb*(j2+1)+ma; + const int jjz_index = jjz+mb*(j2+1)+ma; + sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); + sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); + sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); } //end loop over ma2 mb2 // For j2 even, handle middle column if (j2%2 == 0) { - const int mb2 = j2/2; - for(int ma2 = 0; ma2 <= mb2; ma2++) { - dudr_r = &duarray_r(j2,mb2,ma2,0); - dudr_i = &duarray_i(j2,mb2,ma2,0); - const double factor = ma2==mb2?0.5:1.0; - jjjmambzarray_r = zarray_r(j1_,j2_,j_,mb2,ma2) * factor; - jjjmambzarray_i = zarray_i(j1_,j2_,j_,mb2,ma2) * factor; - sumzdu_r.x += (dudr_r[0] * jjjmambzarray_r + dudr_i[0] * jjjmambzarray_i); - sumzdu_r.y += (dudr_r[1] * jjjmambzarray_r + dudr_i[1] * jjjmambzarray_i); - sumzdu_r.z += (dudr_r[2] * jjjmambzarray_r + dudr_i[2] * jjjmambzarray_i); + const int mb = j2/2; + for(int ma = 0; ma <= mb; ma++) { + const int jju_index = jju+(mb-1)*(j2+1)+(j2+1)+ma; + const int jjz_index = jjz+(mb-1)*(j2+1)+(j2+1)+ma; + sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz_index] + dulist_i(jju_index,0) * zlist_i[jjz_index]); + sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz_index] + dulist_i(jju_index,1) * zlist_i[jjz_index]); + sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz_index] + dulist_i(jju_index,2) * zlist_i[jjz_index]); + } + int ma = mb; + const int jju_index = jju+(mb-1)*(j2+1)+(j2+1)+ma; + const int jjz_index = jjz+(mb-1)*(j2+1)+(j2+1)+ma; + for(int k = 0; k < 3; k++) { + sumzdu_r.x += (dulist_r(jju_index,0) * zlist_r[jjz] + dulist_i(jju_index,0) * zlist_i[jjz_index])*0.5; + sumzdu_r.y += (dulist_r(jju_index,1) * zlist_r[jjz] + dulist_i(jju_index,1) * zlist_i[jjz_index])*0.5; + sumzdu_r.z += (dulist_r(jju_index,2) * zlist_r[jjz] + dulist_i(jju_index,2) * zlist_i[jjz_index])*0.5; } } // end if j2even dbdr += 2.0*sumzdu_r*j2fac; - dbarray(j1,j2,j,0) = dbdr.x; - dbarray(j1,j2,j,1) = dbdr.y; - dbarray(j1,j2,j,2) = dbdr.z; + dblist(jjb,0) = dbdr.x; + dblist(jjb,1) = dbdr.y; + dblist(jjb,2) = dbdr.z; + }); //end loop over j1 j2 j - -#ifdef TIMING_INFO - clock_gettime(CLOCK_REALTIME, &endtime); - timers[4] += (endtime.tv_sec - starttime.tv_sec + 1.0 * - (endtime.tv_nsec - starttime.tv_nsec) / 1000000000); -#endif - } /* ---------------------------------------------------------------------- - copy Bi derivatives into a vector + calculate derivative of Ui w.r.t. atom j ------------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -void SNAKokkos::copy_dbi2dbvec(const typename Kokkos::TeamPolicy::member_type& team) +void SNAKokkos::compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, + double* rij, double wj, double rcut) { - /* int ncount, j1, j2, j; + double rsq, r, x, y, z, z0, theta0, cs, sn; + double dz0dr; - ncount = 0; + x = rij[0]; + y = rij[1]; + z = rij[2]; + rsq = x * x + y * y + z * z; + r = sqrt(rsq); + double rscale0 = rfac0 * MY_PI / (rcut - rmin0); + theta0 = (r - rmin0) * rscale0; + cs = cos(theta0); + sn = sin(theta0); + z0 = r * cs / sn; + dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - for(j1 = 0; j1 <= twojmax; j1++) { - for(j2 = 0; j2 <= j1; j2++) - for(j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) {*/ - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,idxj_max), - [&] (const int& JJ) { - //for(int JJ = 0; JJ < idxj_max; JJ++) { - const int j1 = idxj[JJ].j1; - const int j2 = idxj[JJ].j2; - const int j = idxj[JJ].j; - dbvec(JJ,0) = dbarray(j1,j2,j,0); - dbvec(JJ,1) = dbarray(j1,j2,j,1); - dbvec(JJ,2) = dbarray(j1,j2,j,2); - //ncount++; - }); + compute_duarray(team, x, y, z, z0, r, dz0dr, wj, rcut); } /* ---------------------------------------------------------------------- */ @@ -702,15 +825,15 @@ KOKKOS_INLINE_FUNCTION void SNAKokkos::zero_uarraytot(const typename Kokkos::TeamPolicy::member_type& team) { { - double* const ptr = uarraytot_r.data(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,uarraytot_r.span()), + double* const ptr = ulisttot_r.data(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_r.span()), [&] (const int& i) { ptr[i] = 0.0; }); } { - double* const ptr = uarraytot_i.data(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,uarraytot_r.span()), + double* const ptr = ulisttot_i.data(); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_i.span()), [&] (const int& i) { ptr[i] = 0.0; }); @@ -723,12 +846,14 @@ template KOKKOS_INLINE_FUNCTION void SNAKokkos::addself_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, double wself_in) { - //for (int j = 0; j <= twojmax; j++) Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,twojmax+1), [&] (const int& j) { + //for (int j = 0; j <= twojmax; j++) + int jju = idxu_block[j]; for (int ma = 0; ma <= j; ma++) { - uarraytot_r(j,ma,ma) = wself_in; - uarraytot_i(j,ma,ma) = 0.0; + ulisttot_r[jju] = wself_in; + ulisttot_i[jju] = 0.0; + jju += j+2; } }); } @@ -743,20 +868,11 @@ void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy::compute_uarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION void SNAKokkos::create_team_scratch_arrays(const typename Kokkos::TeamPolicy::member_type& team) { - int jdim = twojmax + 1; - uarraytot_r_a = uarraytot_r = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim); - uarraytot_i_a = uarraytot_i = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim); - zarray_r = t_sna_5d(team.team_scratch(1),jdim,jdim,jdim,jdim,jdim); - zarray_i = t_sna_5d(team.team_scratch(1),jdim,jdim,jdim,jdim,jdim); - bvec = Kokkos::View(team.team_scratch(1),ncoeff); - barray = t_sna_3d(team.team_scratch(1),jdim,jdim,jdim); + ulisttot_r_a = ulisttot_r = t_sna_1d(team.team_scratch(1),idxu_max); + ulisttot_i_a = ulisttot_i = t_sna_1d(team.team_scratch(1),idxu_max); + ylist_r = t_sna_1d(team.team_scratch(1),idxu_max); + ylist_i = t_sna_1d(team.team_scratch(1),idxu_max); + zlist_r = t_sna_1d(team.team_scratch(1),idxz_max); + zlist_i = t_sna_1d(team.team_scratch(1),idxz_max); + blist = t_sna_1d(team.team_scratch(1),idxb_max); + dblist = t_sna_2d(team.team_scratch(1),idxb_max,3); rij = t_sna_2d(team.team_scratch(1),nmax,3); rcutij = t_sna_1d(team.team_scratch(1),nmax); @@ -1043,19 +1184,16 @@ void SNAKokkos::create_team_scratch_arrays(const typename Kokkos::Te inside = t_sna_1i(team.team_scratch(1),nmax); } - template inline T_INT SNAKokkos::size_team_scratch_arrays() { T_INT size = 0; - int jdim = twojmax + 1; - size += t_sna_3d::shmem_size(jdim,jdim,jdim); // uarraytot_r_a - size += t_sna_3d::shmem_size(jdim,jdim,jdim); // uarraytot_i_a - size += t_sna_5d::shmem_size(jdim,jdim,jdim,jdim,jdim); // zarray_r - size += t_sna_5d::shmem_size(jdim,jdim,jdim,jdim,jdim); // zarray_i - size += Kokkos::View::shmem_size(ncoeff); // bvec - size += t_sna_3d::shmem_size(jdim,jdim,jdim); // barray + size += t_sna_1d::shmem_size(idxu_max)*2; // ulisttot + size += t_sna_1d::shmem_size(idxu_max)*2; // ylist + size += t_sna_1d::shmem_size(idxz_max)*2; // zlist + size += t_sna_1d::shmem_size(idxb_max); // blist + size += t_sna_2d::shmem_size(idxb_max,3); // dblist size += t_sna_2d::shmem_size(nmax,3); // rij size += t_sna_1d::shmem_size(nmax); // rcutij @@ -1071,53 +1209,225 @@ template KOKKOS_INLINE_FUNCTION void SNAKokkos::create_thread_scratch_arrays(const typename Kokkos::TeamPolicy::member_type& team) { - int jdim = twojmax + 1; + dblist = t_sna_2d(team.thread_scratch(1),idxb_max,3); - dbvec = Kokkos::View(team.thread_scratch(1),ncoeff); - dbarray = t_sna_4d(team.thread_scratch(1),jdim,jdim,jdim); - - uarray_r = t_sna_3d(team.thread_scratch(1),jdim,jdim,jdim); - uarray_i = t_sna_3d(team.thread_scratch(1),jdim,jdim,jdim); - duarray_r = t_sna_4d(team.thread_scratch(1),jdim,jdim,jdim); - duarray_i = t_sna_4d(team.thread_scratch(1),jdim,jdim,jdim); + ulist_r = t_sna_1d(team.thread_scratch(1),idxu_max); + ulist_i = t_sna_1d(team.thread_scratch(1),idxu_max); + dulist_r = t_sna_2d(team.thread_scratch(1),idxu_max,3); + dulist_i = t_sna_2d(team.thread_scratch(1),idxu_max,3); } template inline T_INT SNAKokkos::size_thread_scratch_arrays() { T_INT size = 0; - int jdim = twojmax + 1; - size += Kokkos::View::shmem_size(ncoeff); // dbvec - size += t_sna_4d::shmem_size(jdim,jdim,jdim); // dbarray + size += t_sna_2d::shmem_size(idxb_max,3); // dblist - size += t_sna_3d::shmem_size(jdim,jdim,jdim); // uarray_r - size += t_sna_3d::shmem_size(jdim,jdim,jdim); // uarray_i - size += t_sna_4d::shmem_size(jdim,jdim,jdim); // duarray_r - size += t_sna_4d::shmem_size(jdim,jdim,jdim); // duarray_i + size += t_sna_1d::shmem_size(idxu_max)*2; // ulist + size += t_sna_2d::shmem_size(idxu_max,3)*2; // dulist return size; } /* ---------------------------------------------------------------------- - factorial n + factorial n, wrapper for precomputed table ------------------------------------------------------------------------- */ template -KOKKOS_INLINE_FUNCTION +inline double SNAKokkos::factorial(int n) { - double result = 1.0; - for(int i=1; i<=n; i++) - result *= 1.0*i; - return result; + //if (n < 0 || n > nmaxfactorial) { + // char str[128]; + // sprintf(str, "Invalid argument to factorial %d", n); + // error->all(FLERR, str); + //} + + return nfac_table[n]; } +/* ---------------------------------------------------------------------- + factorial n table, size SNA::nmaxfactorial+1 +------------------------------------------------------------------------- */ + +template +const double SNAKokkos::nfac_table[] = { + 1, + 1, + 2, + 6, + 24, + 120, + 720, + 5040, + 40320, + 362880, + 3628800, + 39916800, + 479001600, + 6227020800, + 87178291200, + 1307674368000, + 20922789888000, + 355687428096000, + 6.402373705728e+15, + 1.21645100408832e+17, + 2.43290200817664e+18, + 5.10909421717094e+19, + 1.12400072777761e+21, + 2.5852016738885e+22, + 6.20448401733239e+23, + 1.5511210043331e+25, + 4.03291461126606e+26, + 1.08888694504184e+28, + 3.04888344611714e+29, + 8.8417619937397e+30, + 2.65252859812191e+32, + 8.22283865417792e+33, + 2.63130836933694e+35, + 8.68331761881189e+36, + 2.95232799039604e+38, + 1.03331479663861e+40, + 3.71993326789901e+41, + 1.37637530912263e+43, + 5.23022617466601e+44, + 2.03978820811974e+46, + 8.15915283247898e+47, + 3.34525266131638e+49, + 1.40500611775288e+51, + 6.04152630633738e+52, + 2.65827157478845e+54, + 1.1962222086548e+56, + 5.50262215981209e+57, + 2.58623241511168e+59, + 1.24139155925361e+61, + 6.08281864034268e+62, + 3.04140932017134e+64, + 1.55111875328738e+66, + 8.06581751709439e+67, + 4.27488328406003e+69, + 2.30843697339241e+71, + 1.26964033536583e+73, + 7.10998587804863e+74, + 4.05269195048772e+76, + 2.35056133128288e+78, + 1.3868311854569e+80, + 8.32098711274139e+81, + 5.07580213877225e+83, + 3.14699732603879e+85, + 1.98260831540444e+87, + 1.26886932185884e+89, + 8.24765059208247e+90, + 5.44344939077443e+92, + 3.64711109181887e+94, + 2.48003554243683e+96, + 1.71122452428141e+98, + 1.19785716699699e+100, + 8.50478588567862e+101, + 6.12344583768861e+103, + 4.47011546151268e+105, + 3.30788544151939e+107, + 2.48091408113954e+109, + 1.88549470166605e+111, + 1.45183092028286e+113, + 1.13242811782063e+115, + 8.94618213078297e+116, + 7.15694570462638e+118, + 5.79712602074737e+120, + 4.75364333701284e+122, + 3.94552396972066e+124, + 3.31424013456535e+126, + 2.81710411438055e+128, + 2.42270953836727e+130, + 2.10775729837953e+132, + 1.85482642257398e+134, + 1.65079551609085e+136, + 1.48571596448176e+138, + 1.3520015276784e+140, + 1.24384140546413e+142, + 1.15677250708164e+144, + 1.08736615665674e+146, + 1.03299784882391e+148, + 9.91677934870949e+149, + 9.61927596824821e+151, + 9.42689044888324e+153, + 9.33262154439441e+155, + 9.33262154439441e+157, + 9.42594775983835e+159, + 9.61446671503512e+161, + 9.90290071648618e+163, + 1.02990167451456e+166, + 1.08139675824029e+168, + 1.14628056373471e+170, + 1.22652020319614e+172, + 1.32464181945183e+174, + 1.44385958320249e+176, + 1.58824554152274e+178, + 1.76295255109024e+180, + 1.97450685722107e+182, + 2.23119274865981e+184, + 2.54355973347219e+186, + 2.92509369349301e+188, + 3.3931086844519e+190, + 3.96993716080872e+192, + 4.68452584975429e+194, + 5.5745857612076e+196, + 6.68950291344912e+198, + 8.09429852527344e+200, + 9.8750442008336e+202, + 1.21463043670253e+205, + 1.50614174151114e+207, + 1.88267717688893e+209, + 2.37217324288005e+211, + 3.01266001845766e+213, + 3.8562048236258e+215, + 4.97450422247729e+217, + 6.46685548922047e+219, + 8.47158069087882e+221, + 1.118248651196e+224, + 1.48727070609069e+226, + 1.99294274616152e+228, + 2.69047270731805e+230, + 3.65904288195255e+232, + 5.01288874827499e+234, + 6.91778647261949e+236, + 9.61572319694109e+238, + 1.34620124757175e+241, + 1.89814375907617e+243, + 2.69536413788816e+245, + 3.85437071718007e+247, + 5.5502938327393e+249, + 8.04792605747199e+251, + 1.17499720439091e+254, + 1.72724589045464e+256, + 2.55632391787286e+258, + 3.80892263763057e+260, + 5.71338395644585e+262, + 8.62720977423323e+264, + 1.31133588568345e+267, + 2.00634390509568e+269, + 3.08976961384735e+271, + 4.78914290146339e+273, + 7.47106292628289e+275, + 1.17295687942641e+278, + 1.85327186949373e+280, + 2.94670227249504e+282, + 4.71472363599206e+284, + 7.59070505394721e+286, + 1.22969421873945e+289, + 2.0044015765453e+291, + 3.28721858553429e+293, + 5.42391066613159e+295, + 9.00369170577843e+297, + 1.503616514865e+300, // nmaxfactorial = 167 +}; + /* ---------------------------------------------------------------------- the function delta given by VMK Eq. 8.2(1) ------------------------------------------------------------------------- */ template -KOKKOS_INLINE_FUNCTION +inline double SNAKokkos::deltacg(int j1, int j2, int j) { double sfaccg = factorial((j1 + j2 + j) / 2 + 1); @@ -1135,33 +1445,39 @@ template inline void SNAKokkos::init_clebsch_gordan() { + auto h_cglist = Kokkos::create_mirror_view(cglist); + double sum,dcg,sfaccg; int m, aa2, bb2, cc2; int ifac; - auto h_cgarray = Kokkos::create_mirror_view(cgarray); - for (int j1 = 0; j1 <= twojmax; j1++) - for (int j2 = 0; j2 <= twojmax; j2++) - for (int j = abs(j1 - j2); j <= MIN(twojmax, j1 + j2); j += 2) - for (int m1 = 0; m1 <= j1; m1 += 1) { + int idxcg_count = 0; + for(int j1 = 0; j1 <= twojmax; j1++) + for(int j2 = 0; j2 <= j1; j2++) + for(int j = j1 - j2; j <= MIN(twojmax, j1 + j2); j += 2) { + for (int m1 = 0; m1 <= j1; m1++) { aa2 = 2 * m1 - j1; - for (int m2 = 0; m2 <= j2; m2 += 1) { + for (int m2 = 0; m2 <= j2; m2++) { // -c <= cc <= c bb2 = 2 * m2 - j2; m = (aa2 + bb2 + j) / 2; - if(m < 0 || m > j) continue; + if(m < 0 || m > j) { + h_cglist[idxcg_count] = 0.0; + idxcg_count++; + continue; + } sum = 0.0; for (int z = MAX(0, MAX(-(j - j2 + aa2) - / 2, -(j - j1 - bb2) / 2)); - z <= MIN((j1 + j2 - j) / 2, - MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); - z++) { + / 2, -(j - j1 - bb2) / 2)); + z <= MIN((j1 + j2 - j) / 2, + MIN((j1 - aa2) / 2, (j2 + bb2) / 2)); + z++) { ifac = z % 2 ? -1 : 1; sum += ifac / (factorial(z) * @@ -1175,18 +1491,19 @@ void SNAKokkos::init_clebsch_gordan() cc2 = 2 * m - j; dcg = deltacg(j1, j2, j); sfaccg = sqrt(factorial((j1 + aa2) / 2) * - factorial((j1 - aa2) / 2) * - factorial((j2 + bb2) / 2) * - factorial((j2 - bb2) / 2) * - factorial((j + cc2) / 2) * - factorial((j - cc2) / 2) * - (j + 1)); - - h_cgarray(j1,j2,j,m1,m2) = sum * dcg * sfaccg; - //printf("SNAP-COMPARE: CG: %i %i %i %i %i %e\n",j1,j2,j,m1,m2,cgarray(j1,j2,j,m1,m2)); + factorial((j1 - aa2) / 2) * + factorial((j2 + bb2) / 2) * + factorial((j2 - bb2) / 2) * + factorial((j + cc2) / 2) * + factorial((j - cc2) / 2) * + (j + 1)); + + h_cglist[idxcg_count] = sum * dcg * sfaccg; + idxcg_count++; } } - Kokkos::deep_copy(cgarray,h_cgarray); + } + Kokkos::deep_copy(cglist,h_cglist); } /* ---------------------------------------------------------------------- @@ -1207,6 +1524,7 @@ void SNAKokkos::init_rootpqarray() /* ---------------------------------------------------------------------- */ + template inline int SNAKokkos::compute_ncoeff() @@ -1217,9 +1535,10 @@ int SNAKokkos::compute_ncoeff() for (int j1 = 0; j1 <= twojmax; j1++) for (int j2 = 0; j2 <= j1; j2++) - for (int j = abs(j1 - j2); - j <= MIN(twojmax, j1 + j2); j += 2) - if (j >= j1) ncount++; + for (int j = j1 - j2; + j <= MIN(twojmax, j1 + j2); j += 2) + if (j >= j1) ncount++; + return ncount; } @@ -1266,15 +1585,39 @@ double SNAKokkos::compute_dsfac(double r, double rcut) template double SNAKokkos::memory_usage() { + int jdimpq = twojmax + 2; int jdim = twojmax + 1; double bytes; - bytes = jdim * jdim * jdim * jdim * jdim * sizeof(double); - bytes += 2 * jdim * jdim * jdim * sizeof(std::complex); - bytes += 2 * jdim * jdim * jdim * sizeof(double); - bytes += jdim * jdim * jdim * 3 * sizeof(std::complex); - bytes += jdim * jdim * jdim * 3 * sizeof(double); - bytes += ncoeff * sizeof(double); - bytes += jdim * jdim * jdim * jdim * jdim * sizeof(std::complex); + + bytes = 0; + + bytes += jdimpq*jdimpq * sizeof(double); // pqarray + bytes += idxcg_max * sizeof(double); // cglist + + bytes += idxu_max * sizeof(double) * 2; // ulist + bytes += idxu_max * sizeof(double) * 2; // ulisttot + bytes += idxu_max * 3 * sizeof(double) * 2; // dulist + + bytes += idxz_max * sizeof(double) * 2; // zlist + bytes += idxb_max * sizeof(double); // blist + bytes += idxb_max * 3 * sizeof(double); // dblist + bytes += idxu_max * sizeof(double) * 2; // ylist + + bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block + bytes += jdim * sizeof(int); // idxu_block + bytes += jdim * jdim * jdim * sizeof(int); // idxz_block + bytes += jdim * jdim * jdim * sizeof(int); // idxb_block + + bytes += idxz_max * sizeof(SNAKK_ZINDICES); // idxz + bytes += idxb_max * sizeof(SNAKK_BINDICES); // idxb + + bytes += jdim * sizeof(double); // bzero + + bytes += nmax * 3 * sizeof(double); // rij + bytes += nmax * sizeof(int); // inside + bytes += nmax * sizeof(double); // wj + bytes += nmax * sizeof(double); // rcutij + return bytes; } From 8c3d18520dd40ee990252aff3e9fb1e5f120b9a0 Mon Sep 17 00:00:00 2001 From: Christoph Junghans Date: Wed, 26 Jun 2019 10:45:31 -0600 Subject: [PATCH 039/117] add missing include needed on ppc64le --- lib/gpu/lal_device.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 5534d32e5f..9410cc5250 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -17,6 +17,7 @@ #include "lal_precision.h" #include #include +#include #ifdef _OPENMP #include #endif From 2be0fd61802b824363308985f415b40e8fb0e38a Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 26 Jun 2019 16:22:37 -0600 Subject: [PATCH 040/117] Fix GPU issues --- src/KOKKOS/pair_snap_kokkos_impl.h | 9 ++-- src/KOKKOS/sna_kokkos.h | 4 +- src/KOKKOS/sna_kokkos_impl.h | 78 ++++++++++-------------------- 3 files changed, 31 insertions(+), 60 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index 687c9dc7cb..d56db05d19 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -309,7 +309,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPBeta,const typename Kokk const int ii = team.league_rank(); const int i = d_ilist[ii]; const int itype = type[i]; - const int ielem = map[itype]; + const int ielem = d_map[itype]; Kokkos::View> d_coeffi(d_coeffelem,ielem,Kokkos::ALL); @@ -603,11 +603,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCompute::member_type& team, double, double, double, double, double); // compute_ui - KOKKOS_INLINE_FUNCTION + inline double deltacg(int, int, int); // init_clebsch_gordan inline diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 4ca8ae4471..e3ce1626d9 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -223,7 +223,7 @@ void SNAKokkos::build_indexlist() // apply to z(j1,j2,j,ma,mb) to unique element of y(j) - const int jju = idxu_block[j] + (j+1)*mb + ma; + const int jju = h_idxu_block[j] + (j+1)*mb + ma; h_idxz[idxz_count].jju = jju; idxz_count++; @@ -258,7 +258,6 @@ template KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::member_type& team, int jnum) { - //printf("jnum %i\n",jnum); double rsq, r, x, y, z, z0, theta0; // utot(j,ma,mb) = 0 for all j,ma,ma @@ -348,7 +347,7 @@ void SNAKokkos::compute_zi(const typename Kokkos::TeamPolicy::compute_yi(const typename Kokkos::TeamPolicy::member_type& team, const Kokkos::View &beta, const int ii) { - int j; - int jjz; - int jju; double betaj; { @@ -400,8 +396,8 @@ void SNAKokkos::compute_yi(const typename Kokkos::TeamPolicy::compute_yi(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_deidrj(const typename Kokkos::TeamPolicy::member_type& team, double* dedr) { - - for(int k = 0; k < 3; k++) - dedr[k] = 0.0; + t_scalar3 sum; // TODO: which loop is faster to parallelize? - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,twojmax+1), - [&] (const int& j) { + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,twojmax+1), + [&] (const int& j, t_scalar3& sum_tmp) { //for(int j = 0; j <= twojmax; j++) { int jju = idxu_block[j]; for(int mb = 0; 2*mb < j; mb++) for(int ma = 0; ma <= j; ma++) { - - double jjjmambyarray_r = ylist_r[jju]; - double jjjmambyarray_i = ylist_i[jju]; - - for(int k = 0; k < 3; k++) - dedr[k] += - dulist_r(jju,k) * jjjmambyarray_r + - dulist_i(jju,k) * jjjmambyarray_i; + sum_tmp.x += dulist_r(jju,0) * ylist_r[jju] + dulist_i(jju,0) * ylist_i[jju]; + sum_tmp.y += dulist_r(jju,1) * ylist_r[jju] + dulist_i(jju,1) * ylist_i[jju]; + sum_tmp.z += dulist_r(jju,2) * ylist_r[jju] + dulist_i(jju,2) * ylist_i[jju]; jju++; } //end loop over ma mb @@ -505,32 +495,26 @@ void SNAKokkos::compute_deidrj(const typename Kokkos::TeamPolicy::compute_bi(const typename Kokkos::TeamPolicy::compute_bi(const typename Kokkos::TeamPolicy::compute_uarray(const typename Kokkos::TeamPolicy::compute_uarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy::create_team_scratch_arrays(const typename Kokkos::Te zlist_r = t_sna_1d(team.team_scratch(1),idxz_max); zlist_i = t_sna_1d(team.team_scratch(1),idxz_max); blist = t_sna_1d(team.team_scratch(1),idxb_max); - dblist = t_sna_2d(team.team_scratch(1),idxb_max,3); rij = t_sna_2d(team.team_scratch(1),nmax,3); rcutij = t_sna_1d(team.team_scratch(1),nmax); @@ -1193,7 +1168,6 @@ T_INT SNAKokkos::size_team_scratch_arrays() { size += t_sna_1d::shmem_size(idxu_max)*2; // ylist size += t_sna_1d::shmem_size(idxz_max)*2; // zlist size += t_sna_1d::shmem_size(idxb_max); // blist - size += t_sna_2d::shmem_size(idxb_max,3); // dblist size += t_sna_2d::shmem_size(nmax,3); // rij size += t_sna_1d::shmem_size(nmax); // rcutij @@ -1210,7 +1184,6 @@ KOKKOS_INLINE_FUNCTION void SNAKokkos::create_thread_scratch_arrays(const typename Kokkos::TeamPolicy::member_type& team) { dblist = t_sna_2d(team.thread_scratch(1),idxb_max,3); - ulist_r = t_sna_1d(team.thread_scratch(1),idxu_max); ulist_i = t_sna_1d(team.thread_scratch(1),idxu_max); dulist_r = t_sna_2d(team.thread_scratch(1),idxu_max,3); @@ -1223,7 +1196,6 @@ T_INT SNAKokkos::size_thread_scratch_arrays() { T_INT size = 0; size += t_sna_2d::shmem_size(idxb_max,3); // dblist - size += t_sna_1d::shmem_size(idxu_max)*2; // ulist size += t_sna_2d::shmem_size(idxu_max,3)*2; // dulist return size; From 60adaa24cb8f19d9effc6da6f1ea6967c736a947 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 27 Jun 2019 08:52:34 -0600 Subject: [PATCH 041/117] Remove redundant computation --- src/KOKKOS/sna_kokkos_impl.h | 22 ---------------------- src/SNAP/sna.cpp | 22 ---------------------- 2 files changed, 44 deletions(-) diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index e3ce1626d9..9b96cb8a16 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -1006,11 +1006,9 @@ void SNAKokkos::compute_duarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy::compute_duarray(const typename Kokkos::TeamPolicy Date: Thu, 27 Jun 2019 08:53:58 -0600 Subject: [PATCH 042/117] Remove no-op --- src/SNAP/sna.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index 949e0ee071..ada3f528d2 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -610,8 +610,6 @@ void SNA::compute_bi() sumzu += 0.5*(ulisttot_r[jju]*zlist_r[jjz] + ulisttot_i[jju]*zlist_i[jjz]); - jjz++; - jju++; } // end if jeven blist[jjb] = 2.0*sumzu; From df7c56d88160add5ce3492c1f5b9e757b34ebace Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 28 Jun 2019 05:28:54 -0400 Subject: [PATCH 043/117] use suffix compatible pair style matching when looking for ReaxFF pair style --- src/QEQ/fix_qeq_shielded.cpp | 2 +- src/USER-REAXC/fix_qeq_reax.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/QEQ/fix_qeq_shielded.cpp b/src/QEQ/fix_qeq_shielded.cpp index a74eee7d29..6367feacc5 100644 --- a/src/QEQ/fix_qeq_shielded.cpp +++ b/src/QEQ/fix_qeq_shielded.cpp @@ -80,7 +80,7 @@ void FixQEqShielded::init() void FixQEqShielded::extract_reax() { - Pair *pair = force->pair_match("reax/c",1); + Pair *pair = force->pair_match("^reax/c",0); if (pair == NULL) error->all(FLERR,"No pair reax/c for fix qeq/shielded"); int tmp; chi = (double *) pair->extract("chi",tmp); diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp index b37c8fff83..faa0632608 100644 --- a/src/USER-REAXC/fix_qeq_reax.cpp +++ b/src/USER-REAXC/fix_qeq_reax.cpp @@ -124,7 +124,7 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : // register with Atom class reaxc = NULL; - reaxc = (PairReaxC *) force->pair_match("reax/c",0); + reaxc = (PairReaxC *) force->pair_match("^reax/c",0); s_hist = t_hist = NULL; grow_arrays(atom->nmax); From d918432047743f70a0650713ba7a21f0935d5245 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 28 Jun 2019 09:54:24 -0600 Subject: [PATCH 044/117] Fix issue with compute_duarray --- src/KOKKOS/pair_snap_kokkos_impl.h | 11 +++++----- src/KOKKOS/sna_kokkos.h | 11 +++++----- src/KOKKOS/sna_kokkos_impl.h | 33 +++++++++++++++++++++++------- src/SNAP/compute_snad_atom.cpp | 2 +- src/SNAP/compute_snav_atom.cpp | 2 +- src/SNAP/pair_snap.cpp | 2 +- src/SNAP/sna.cpp | 27 +++++++++++++++++++----- src/SNAP/sna.h | 7 ++++--- 8 files changed, 67 insertions(+), 28 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index d56db05d19..d0c7e09e84 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -309,7 +309,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPBeta,const typename Kokk const int ii = team.league_rank(); const int i = d_ilist[ii]; const int itype = type[i]; - const int ielem = d_map[itype]; + const int ielem = map[itype]; Kokkos::View> d_coeffi(d_coeffelem,ielem,Kokkos::ALL); @@ -601,12 +601,13 @@ void PairSNAPKokkos::operator() (TagPairSNAPCompute::member_type& team, double*, double, double); //ForceSNAP + void compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, double*, double, double, int); //ForceSNAP KOKKOS_INLINE_FUNCTION void compute_dbidrj(const typename Kokkos::TeamPolicy::member_type& team); //ForceSNAP KOKKOS_INLINE_FUNCTION @@ -123,10 +123,11 @@ inline t_sna_1d ulisttot_r, ulisttot_i; t_sna_1d_atomic ulisttot_r_a, ulisttot_i_a; t_sna_1d zlist_r, zlist_i; + t_sna_2d ulist_r_ij, ulist_i_ij; // Per InFlight Interaction t_sna_1d ulist_r, ulist_i; - t_sna_1d_atomic ylist_r, ylist_i; + t_sna_1d ylist_r, ylist_i; // derivatives of data t_sna_2d dulist_r, dulist_i; @@ -171,13 +172,13 @@ inline KOKKOS_INLINE_FUNCTION void addself_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, double); // compute_ui KOKKOS_INLINE_FUNCTION - void add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, double, double, double); // compute_ui + void add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, double, double, double, int); // compute_ui KOKKOS_INLINE_FUNCTION void compute_uarray(const typename Kokkos::TeamPolicy::member_type& team, double, double, double, double, double); // compute_ui - inline + KOKKOS_INLINE_FUNCTION double deltacg(int, int, int); // init_clebsch_gordan inline @@ -185,7 +186,7 @@ inline KOKKOS_INLINE_FUNCTION void compute_duarray(const typename Kokkos::TeamPolicy::member_type& team, double, double, double, // compute_duidrj - double, double, double, double, double); + double, double, double, double, double, int); // Sets the style for the switching function // 0 = none diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 9b96cb8a16..8cbb3eb3f6 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -289,7 +289,7 @@ void SNAKokkos::compute_ui(const typename Kokkos::TeamPolicy::compute_dbidrj(const typename Kokkos::TeamPolicy KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_duidrj(const typename Kokkos::TeamPolicy::member_type& team, - double* rij, double wj, double rcut) + double* rij, double wj, double rcut, int jj) { double rsq, r, x, y, z, z0, theta0, cs, sn; double dz0dr; @@ -797,7 +797,7 @@ void SNAKokkos::compute_duidrj(const typename Kokkos::TeamPolicy::addself_uarraytot(const typename Kokkos::TeamPolicy< template KOKKOS_INLINE_FUNCTION -void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, double r, double wj, double rcut) +void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy::member_type& team, + double r, double wj, double rcut, int j) { const double sfac = compute_sfac(r, rcut) * wj; @@ -854,10 +855,19 @@ void SNAKokkos::add_uarraytot(const typename Kokkos::TeamPolicy> + ulist_r_j(ulist_r_ij,j,Kokkos::ALL); + Kokkos::View> + ulist_i_j(ulist_i_ij,j,Kokkos::ALL); + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,ulisttot_r.span()), [&] (const int& i) { Kokkos::atomic_add(ptrtot_r+i, sfac * ptr_r[i]); Kokkos::atomic_add(ptrtot_i+i, sfac * ptr_i[i]); + + ulist_r_j(i) = ulist_r(i); + ulist_i_j(i) = ulist_i(i); }); } @@ -962,7 +972,7 @@ KOKKOS_INLINE_FUNCTION void SNAKokkos::compute_duarray(const typename Kokkos::TeamPolicy::member_type& team, double x, double y, double z, double z0, double r, double dz0dr, - double wj, double rcut) + double wj, double rcut, int jj) { double r0inv; double a_r, a_i, b_r, b_i; @@ -1006,6 +1016,11 @@ void SNAKokkos::compute_duarray(const typename Kokkos::TeamPolicy> + ulist_r(ulist_r_ij,jj,Kokkos::ALL); + Kokkos::View> + ulist_i(ulist_i_ij,jj,Kokkos::ALL); + dulist_r(0,0) = 0.0; dulist_r(0,1) = 0.0; dulist_r(0,2) = 0.0; @@ -1135,6 +1150,8 @@ void SNAKokkos::create_team_scratch_arrays(const typename Kokkos::Te rcutij = t_sna_1d(team.team_scratch(1),nmax); wj = t_sna_1d(team.team_scratch(1),nmax); inside = t_sna_1i(team.team_scratch(1),nmax); + ulist_r_ij = t_sna_2d(team.team_scratch(1),nmax,idxu_max); + ulist_i_ij = t_sna_2d(team.team_scratch(1),nmax,idxu_max); } template @@ -1151,6 +1168,7 @@ T_INT SNAKokkos::size_team_scratch_arrays() { size += t_sna_1d::shmem_size(nmax); // rcutij size += t_sna_1d::shmem_size(nmax); // wj size += t_sna_1i::shmem_size(nmax); // inside + size += t_sna_2d::shmem_size(nmax,idxu_max)*2; // ulist_ij return size; } @@ -1558,8 +1576,8 @@ double SNAKokkos::memory_usage() bytes += jdim * jdim * jdim * sizeof(int); // idxz_block bytes += jdim * jdim * jdim * sizeof(int); // idxb_block - bytes += idxz_max * sizeof(SNAKK_ZINDICES); // idxz - bytes += idxb_max * sizeof(SNAKK_BINDICES); // idxb + bytes += idxz_max * sizeof(SNAKK_ZINDICES); // idxz + bytes += idxb_max * sizeof(SNAKK_BINDICES); // idxb bytes += jdim * sizeof(double); // bzero @@ -1567,6 +1585,7 @@ double SNAKokkos::memory_usage() bytes += nmax * sizeof(int); // inside bytes += nmax * sizeof(double); // wj bytes += nmax * sizeof(double); // rcutij + bytes += nmax * idxu_max * sizeof(double) * 2; // ulist_ij return bytes; } diff --git a/src/SNAP/compute_snad_atom.cpp b/src/SNAP/compute_snad_atom.cpp index 37587a0aae..0a82cdeb00 100644 --- a/src/SNAP/compute_snad_atom.cpp +++ b/src/SNAP/compute_snad_atom.cpp @@ -263,7 +263,7 @@ void ComputeSNADAtom::compute_peratom() const int j = snaptr->inside[jj]; snaptr->compute_duidrj(snaptr->rij[jj], snaptr->wj[jj], - snaptr->rcutij[jj]); + snaptr->rcutij[jj],jj); snaptr->compute_dbidrj(); // Accumulate -dBi/dRi, -dBi/dRj diff --git a/src/SNAP/compute_snav_atom.cpp b/src/SNAP/compute_snav_atom.cpp index 1f702496ed..374bf32298 100644 --- a/src/SNAP/compute_snav_atom.cpp +++ b/src/SNAP/compute_snav_atom.cpp @@ -258,7 +258,7 @@ void ComputeSNAVAtom::compute_peratom() snaptr->compute_duidrj(snaptr->rij[jj], snaptr->wj[jj], - snaptr->rcutij[jj]); + snaptr->rcutij[jj],jj); snaptr->compute_dbidrj(); // Accumulate -dBi/dRi*Ri, -dBi/dRj*Rj diff --git a/src/SNAP/pair_snap.cpp b/src/SNAP/pair_snap.cpp index f9ba8922a0..4dce39361f 100644 --- a/src/SNAP/pair_snap.cpp +++ b/src/SNAP/pair_snap.cpp @@ -176,7 +176,7 @@ void PairSNAP::compute(int eflag, int vflag) for (int jj = 0; jj < ninside; jj++) { int j = snaptr->inside[jj]; snaptr->compute_duidrj(snaptr->rij[jj], - snaptr->wj[jj],snaptr->rcutij[jj]); + snaptr->wj[jj],snaptr->rcutij[jj],jj); snaptr->compute_deidrj(fij); diff --git a/src/SNAP/sna.cpp b/src/SNAP/sna.cpp index ada3f528d2..f9696e1f12 100644 --- a/src/SNAP/sna.cpp +++ b/src/SNAP/sna.cpp @@ -134,6 +134,8 @@ SNA::SNA(LAMMPS* lmp, double rfac0_in, nmax = 0; idxz = NULL; idxb = NULL; + ulist_r_ij = NULL; + ulist_i_ij = NULL; build_indexlist(); create_twojmax_arrays(); @@ -154,6 +156,8 @@ SNA::~SNA() memory->destroy(inside); memory->destroy(wj); memory->destroy(rcutij); + memory->destroy(ulist_r_ij); + memory->destroy(ulist_i_ij); delete[] idxz; delete[] idxb; destroy_twojmax_arrays(); @@ -299,10 +303,14 @@ void SNA::grow_rij(int newnmax) memory->destroy(inside); memory->destroy(wj); memory->destroy(rcutij); + memory->destroy(ulist_r_ij); + memory->destroy(ulist_i_ij); memory->create(rij, nmax, 3, "pair:rij"); memory->create(inside, nmax, "pair:inside"); memory->create(wj, nmax, "pair:wj"); memory->create(rcutij, nmax, "pair:rcutij"); + memory->create(ulist_r_ij, nmax, idxu_max, "sna:ulist_ij"); + memory->create(ulist_i_ij, nmax, idxu_max, "sna:ulist_ij"); } /* ---------------------------------------------------------------------- @@ -334,7 +342,7 @@ void SNA::compute_ui(int jnum) z0 = r / tan(theta0); compute_uarray(x, y, z, z0, r); - add_uarraytot(r, wj[j], rcutij[j]); + add_uarraytot(r, wj[j], rcutij[j], j); } } @@ -826,7 +834,7 @@ void SNA::compute_dbidrj() calculate derivative of Ui w.r.t. atom j ------------------------------------------------------------------------- */ -void SNA::compute_duidrj(double* rij, double wj, double rcut) +void SNA::compute_duidrj(double* rij, double wj, double rcut, int jj) { double rsq, r, x, y, z, z0, theta0, cs, sn; double dz0dr; @@ -843,7 +851,7 @@ void SNA::compute_duidrj(double* rij, double wj, double rcut) z0 = r * cs / sn; dz0dr = z0 / r - (r*rscale0) * (rsq + z0 * z0) / rsq; - compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut); + compute_duarray(x, y, z, z0, r, dz0dr, wj, rcut, jj); } /* ---------------------------------------------------------------------- */ @@ -879,7 +887,7 @@ void SNA::addself_uarraytot(double wself_in) add Wigner U-functions for one neighbor to the total ------------------------------------------------------------------------- */ -void SNA::add_uarraytot(double r, double wj, double rcut) +void SNA::add_uarraytot(double r, double wj, double rcut, int jj) { double sfac; @@ -887,6 +895,9 @@ void SNA::add_uarraytot(double r, double wj, double rcut) sfac *= wj; + double* ulist_r_j = ulist_r_ij[jj]; + double* ulist_i_j = ulist_i_ij[jj]; + for (int j = 0; j <= twojmax; j++) { int jju = idxu_block[j]; for (int mb = 0; mb <= j; mb++) @@ -895,6 +906,9 @@ void SNA::add_uarraytot(double r, double wj, double rcut) sfac * ulist_r[jju]; ulisttot_i[jju] += sfac * ulist_i[jju]; + + ulist_r_j[jju] = ulist_r[jju]; + ulist_i_j[jju] = ulist_i[jju]; jju++; } } @@ -992,7 +1006,7 @@ void SNA::compute_uarray(double x, double y, double z, void SNA::compute_duarray(double x, double y, double z, double z0, double r, double dz0dr, - double wj, double rcut) + double wj, double rcut, int jj) { double r0inv; double a_r, a_i, b_r, b_i; @@ -1036,6 +1050,9 @@ void SNA::compute_duarray(double x, double y, double z, db_i[0] += -r0inv; db_r[1] += r0inv; + double* ulist_r = ulist_r_ij[jj]; + double* ulist_i = ulist_i_ij[jj]; + dulist_r[0][0] = 0.0; dulist_r[0][1] = 0.0; dulist_r[0][2] = 0.0; diff --git a/src/SNAP/sna.h b/src/SNAP/sna.h index 1e08ef123c..81582cf9e5 100644 --- a/src/SNAP/sna.h +++ b/src/SNAP/sna.h @@ -53,7 +53,7 @@ public: // functions for derivatives - void compute_duidrj(double*, double, double); + void compute_duidrj(double*, double, double, int); void compute_dbidrj(); void compute_deidrj(double*); double compute_sfac(double, double); @@ -86,6 +86,7 @@ private: double* ulisttot_r, * ulisttot_i; double* ulist_r, * ulist_i; + double** ulist_r_ij, ** ulist_i_ij; int* idxu_block; double* zlist_r, * zlist_i; @@ -106,13 +107,13 @@ private: void init_rootpqarray(); void zero_uarraytot(); void addself_uarraytot(double); - void add_uarraytot(double, double, double); + void add_uarraytot(double, double, double, int); void compute_uarray(double, double, double, double, double); double deltacg(int, int, int); int compute_ncoeff(); void compute_duarray(double, double, double, - double, double, double, double, double); + double, double, double, double, double, int); // Sets the style for the switching function // 0 = none From 5eabc820240604ef4ea3859d054e6c03febc3827 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 28 Jun 2019 10:17:22 -0600 Subject: [PATCH 045/117] Restore lost changes --- src/KOKKOS/pair_snap_kokkos_impl.h | 9 ++++----- src/KOKKOS/sna_kokkos.h | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index d0c7e09e84..20e04605ce 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -309,7 +309,7 @@ void PairSNAPKokkos::operator() (TagPairSNAPBeta,const typename Kokk const int ii = team.league_rank(); const int i = d_ilist[ii]; const int itype = type[i]; - const int ielem = map[itype]; + const int ielem = d_map[itype]; Kokkos::View> d_coeffi(d_coeffelem,ielem,Kokkos::ALL); @@ -603,11 +603,10 @@ void PairSNAPKokkos::operator() (TagPairSNAPCompute::member_type& team, double, double, double, double, double); // compute_ui - KOKKOS_INLINE_FUNCTION + inline double deltacg(int, int, int); // init_clebsch_gordan inline From ea2e73119da7c6c7633da5073d8cef8ed8e05b5c Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Fri, 28 Jun 2019 11:23:24 -0600 Subject: [PATCH 046/117] Update Kokkos library in LAMMPS to v2.9.00 --- lib/kokkos/CHANGELOG.md | 34 + lib/kokkos/Makefile.kokkos | 111 +- lib/kokkos/Makefile.targets | 13 +- .../algorithms/cmake/Dependencies.cmake | 2 +- lib/kokkos/algorithms/src/Kokkos_Sort.hpp | 2 + .../algorithms/unit_tests/CMakeLists.txt | 6 + lib/kokkos/algorithms/unit_tests/Makefile | 12 + lib/kokkos/algorithms/unit_tests/TestHPX.cpp | 96 + lib/kokkos/algorithms/unit_tests/TestSort.hpp | 8 +- lib/kokkos/cmake/kokkos_build.cmake | 24 +- lib/kokkos/cmake/kokkos_functions.cmake | 2 +- lib/kokkos/cmake/kokkos_options.cmake | 41 +- lib/kokkos/cmake/kokkos_settings.cmake | 6 + lib/kokkos/cmake/tribits.cmake | 8 + .../containers/cmake/Dependencies.cmake | 2 +- .../performance_tests/CMakeLists.txt | 4 + .../containers/performance_tests/Makefile | 12 + .../performance_tests/TestDynRankView.hpp | 12 +- .../performance_tests/TestGlobal2LocalIds.hpp | 6 +- .../containers/performance_tests/TestHPX.cpp | 130 ++ .../performance_tests/TestScatterView.hpp | 2 + .../TestUnorderedMapPerformance.hpp | 4 +- lib/kokkos/containers/src/Kokkos_Bitset.hpp | 26 +- lib/kokkos/containers/src/Kokkos_DualView.hpp | 4 +- .../containers/src/Kokkos_DynRankView.hpp | 19 +- .../containers/src/Kokkos_DynamicView.hpp | 14 +- .../containers/src/Kokkos_OffsetView.hpp | 16 +- .../containers/src/Kokkos_ScatterView.hpp | 405 +++- .../containers/src/Kokkos_UnorderedMap.hpp | 8 +- lib/kokkos/containers/src/Kokkos_Vector.hpp | 4 +- .../containers/unit_tests/CMakeLists.txt | 25 + lib/kokkos/containers/unit_tests/Makefile | 26 + .../containers/unit_tests/TestBitset.hpp | 6 +- .../containers/unit_tests/TestDynViewAPI.hpp | 2 + .../unit_tests/TestErrorReporter.hpp | 2 + .../containers/unit_tests/TestScatterView.hpp | 452 +++- .../unit_tests/TestUnorderedMap.hpp | 14 +- .../unit_tests/hpx/TestHPX_BitSet.cpp | 47 + .../unit_tests/hpx/TestHPX_Category.hpp | 65 + .../unit_tests/hpx/TestHPX_DualView.cpp | 47 + .../hpx/TestHPX_DynRankViewAPI_generic.cpp | 47 + .../hpx/TestHPX_DynRankViewAPI_rank12345.cpp | 47 + .../hpx/TestHPX_DynRankViewAPI_rank67.cpp | 47 + .../unit_tests/hpx/TestHPX_DynamicView.cpp | 47 + .../unit_tests/hpx/TestHPX_ErrorReporter.cpp | 47 + .../unit_tests/hpx/TestHPX_OffsetView.cpp | 47 + .../unit_tests/hpx/TestHPX_ScatterView.cpp | 47 + .../unit_tests/hpx/TestHPX_StaticCrsGraph.cpp | 47 + .../unit_tests/hpx/TestHPX_UnorderedMap.cpp | 47 + .../unit_tests/hpx/TestHPX_Vector.cpp | 47 + .../hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp | 47 + lib/kokkos/core/cmake/Dependencies.cmake | 2 +- lib/kokkos/core/perf_test/CMakeLists.txt | 1 + lib/kokkos/core/perf_test/Makefile | 1 + .../core/perf_test/PerfTestBlasKernels.hpp | 75 +- .../core/perf_test/PerfTestGramSchmidt.cpp | 2 +- lib/kokkos/core/perf_test/PerfTestHexGrad.cpp | 4 +- lib/kokkos/core/perf_test/PerfTestMDRange.hpp | 24 +- .../PerfTest_ExecSpacePartitioning.cpp | 564 +++++ .../core/perf_test/PerfTest_ViewAllocate.cpp | 1 + .../core/perf_test/PerfTest_ViewCopy.hpp | 5 + .../core/perf_test/PerfTest_ViewFill.hpp | 5 + .../core/perf_test/PerfTest_ViewResize.hpp | 10 + lib/kokkos/core/perf_test/test_atomic.cpp | 40 +- lib/kokkos/core/perf_test/test_mempool.cpp | 1 + lib/kokkos/core/perf_test/test_taskdag.cpp | 45 +- lib/kokkos/core/src/CMakeLists.txt | 10 + lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp | 419 ---- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 12 +- .../Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp | 657 ++++++ ...uda_Atomic_Intrinsics_Restore_Builtins.hpp | 68 + ...pp => Kokkos_Cuda_BlockSize_Deduction.hpp} | 131 +- ...Cuda_Impl.cpp => Kokkos_Cuda_Instance.cpp} | 200 +- .../core/src/Cuda/Kokkos_Cuda_Instance.hpp | 156 ++ .../src/Cuda/Kokkos_Cuda_KernelLaunch.hpp | 579 +++++ .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 719 ++---- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 17 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp | 190 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp | 728 ++++-- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp | 178 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 2 +- .../src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp | 3 +- lib/kokkos/core/src/HPX/Kokkos_HPX.cpp | 152 ++ .../Kokkos_HPX_Task.cpp} | 41 +- lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp | 298 +++ .../src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp | 57 + .../src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp | 57 + .../src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp | 116 + .../core/src/KokkosExp_MDRangePolicy.hpp | 79 +- lib/kokkos/core/src/Kokkos_Atomic.hpp | 15 + lib/kokkos/core/src/Kokkos_Complex.hpp | 6 +- lib/kokkos/core/src/Kokkos_Concepts.hpp | 54 +- lib/kokkos/core/src/Kokkos_CopyViews.hpp | 860 ++++++- lib/kokkos/core/src/Kokkos_Core.hpp | 5 + lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 10 + lib/kokkos/core/src/Kokkos_Crs.hpp | 4 +- lib/kokkos/core/src/Kokkos_Cuda.hpp | 41 +- lib/kokkos/core/src/Kokkos_ExecPolicy.hpp | 175 +- lib/kokkos/core/src/Kokkos_Extents.hpp | 186 ++ lib/kokkos/core/src/Kokkos_Future.hpp | 567 +++++ lib/kokkos/core/src/Kokkos_HPX.hpp | 1999 +++++++++++++++++ lib/kokkos/core/src/Kokkos_HostSpace.hpp | 13 +- lib/kokkos/core/src/Kokkos_Layout.hpp | 21 +- lib/kokkos/core/src/Kokkos_Macros.hpp | 112 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 6 + lib/kokkos/core/src/Kokkos_MemoryTraits.hpp | 9 +- lib/kokkos/core/src/Kokkos_OpenMP.hpp | 8 +- lib/kokkos/core/src/Kokkos_Pair.hpp | 9 + lib/kokkos/core/src/Kokkos_Parallel.hpp | 4 +- .../core/src/Kokkos_Parallel_Reduce.hpp | 257 ++- .../core/src/Kokkos_PointerOwnership.hpp | 74 + lib/kokkos/core/src/Kokkos_ROCm.hpp | 9 +- lib/kokkos/core/src/Kokkos_Serial.hpp | 26 +- lib/kokkos/core/src/Kokkos_TaskScheduler.hpp | 1110 ++++----- .../core/src/Kokkos_TaskScheduler_fwd.hpp | 249 ++ lib/kokkos/core/src/Kokkos_Threads.hpp | 6 + lib/kokkos/core/src/Kokkos_View.hpp | 64 +- .../core/src/Kokkos_WorkGraphPolicy.hpp | 13 +- lib/kokkos/core/src/Makefile | 8 + .../core/src/Makefile.generate_build_files | 5 +- .../core/src/Makefile.generate_header_lists | 4 + .../core/src/OpenMP/Kokkos_OpenMP_Exec.cpp | 4 + .../core/src/OpenMP/Kokkos_OpenMP_Exec.hpp | 5 + .../src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 25 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 220 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 384 +++- .../core/src/OpenMP/Kokkos_OpenMP_Team.hpp | 23 +- .../OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp | 5 +- .../Kokkos_OpenMPTarget_Parallel.hpp | 16 +- .../src/Qthreads/Kokkos_Qthreads_Parallel.hpp | 1 - lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp | 2 +- .../core/src/ROCm/Kokkos_ROCm_Parallel.hpp | 25 +- .../core/src/Threads/Kokkos_ThreadsExec.cpp | 4 + .../core/src/Threads/Kokkos_ThreadsExec.hpp | 4 + .../core/src/Threads/Kokkos_ThreadsTeam.hpp | 169 +- .../src/Threads/Kokkos_Threads_Parallel.hpp | 1 - lib/kokkos/core/src/eti/CMakeLists.txt | 3 + lib/kokkos/core/src/eti/HPX/CMakeLists.txt | 148 ++ ...TIInst_int64_t_double_LayoutLeft_Rank1.cpp | 54 + ...TIInst_int64_t_double_LayoutLeft_Rank2.cpp | 54 + ...TIInst_int64_t_double_LayoutLeft_Rank3.cpp | 54 + ...TIInst_int64_t_double_LayoutLeft_Rank4.cpp | 54 + ...TIInst_int64_t_double_LayoutLeft_Rank5.cpp | 54 + ...TIInst_int64_t_double_LayoutLeft_Rank8.cpp | 54 + ...IInst_int64_t_double_LayoutRight_Rank1.cpp | 54 + ...IInst_int64_t_double_LayoutRight_Rank2.cpp | 54 + ...IInst_int64_t_double_LayoutRight_Rank3.cpp | 54 + ...IInst_int64_t_double_LayoutRight_Rank4.cpp | 54 + ...IInst_int64_t_double_LayoutRight_Rank5.cpp | 54 + ...IInst_int64_t_double_LayoutRight_Rank8.cpp | 54 + ...Inst_int64_t_double_LayoutStride_Rank1.cpp | 54 + ...Inst_int64_t_double_LayoutStride_Rank2.cpp | 54 + ...Inst_int64_t_double_LayoutStride_Rank3.cpp | 54 + ...Inst_int64_t_double_LayoutStride_Rank4.cpp | 54 + ...Inst_int64_t_double_LayoutStride_Rank5.cpp | 54 + ...Inst_int64_t_double_LayoutStride_Rank8.cpp | 54 + ...ETIInst_int64_t_float_LayoutLeft_Rank1.cpp | 54 + ...ETIInst_int64_t_float_LayoutLeft_Rank2.cpp | 54 + ...ETIInst_int64_t_float_LayoutLeft_Rank3.cpp | 54 + ...ETIInst_int64_t_float_LayoutLeft_Rank4.cpp | 54 + ...ETIInst_int64_t_float_LayoutLeft_Rank5.cpp | 54 + ...ETIInst_int64_t_float_LayoutLeft_Rank8.cpp | 54 + ...TIInst_int64_t_float_LayoutRight_Rank1.cpp | 54 + ...TIInst_int64_t_float_LayoutRight_Rank2.cpp | 54 + ...TIInst_int64_t_float_LayoutRight_Rank3.cpp | 54 + ...TIInst_int64_t_float_LayoutRight_Rank4.cpp | 54 + ...TIInst_int64_t_float_LayoutRight_Rank5.cpp | 54 + ...TIInst_int64_t_float_LayoutRight_Rank8.cpp | 54 + ...IInst_int64_t_float_LayoutStride_Rank1.cpp | 54 + ...IInst_int64_t_float_LayoutStride_Rank2.cpp | 54 + ...IInst_int64_t_float_LayoutStride_Rank3.cpp | 54 + ...IInst_int64_t_float_LayoutStride_Rank4.cpp | 54 + ...IInst_int64_t_float_LayoutStride_Rank5.cpp | 54 + ...IInst_int64_t_float_LayoutStride_Rank8.cpp | 54 + ...IInst_int64_t_int64_t_LayoutLeft_Rank1.cpp | 54 + ...IInst_int64_t_int64_t_LayoutLeft_Rank2.cpp | 54 + ...IInst_int64_t_int64_t_LayoutLeft_Rank3.cpp | 54 + ...IInst_int64_t_int64_t_LayoutLeft_Rank4.cpp | 54 + ...IInst_int64_t_int64_t_LayoutLeft_Rank5.cpp | 54 + ...IInst_int64_t_int64_t_LayoutLeft_Rank8.cpp | 54 + ...Inst_int64_t_int64_t_LayoutRight_Rank1.cpp | 54 + ...Inst_int64_t_int64_t_LayoutRight_Rank2.cpp | 54 + ...Inst_int64_t_int64_t_LayoutRight_Rank3.cpp | 54 + ...Inst_int64_t_int64_t_LayoutRight_Rank4.cpp | 54 + ...Inst_int64_t_int64_t_LayoutRight_Rank5.cpp | 54 + ...Inst_int64_t_int64_t_LayoutRight_Rank8.cpp | 54 + ...nst_int64_t_int64_t_LayoutStride_Rank1.cpp | 54 + ...nst_int64_t_int64_t_LayoutStride_Rank2.cpp | 54 + ...nst_int64_t_int64_t_LayoutStride_Rank3.cpp | 54 + ...nst_int64_t_int64_t_LayoutStride_Rank4.cpp | 54 + ...nst_int64_t_int64_t_LayoutStride_Rank5.cpp | 54 + ...nst_int64_t_int64_t_LayoutStride_Rank8.cpp | 54 + ...pyETIInst_int64_t_int_LayoutLeft_Rank1.cpp | 54 + ...pyETIInst_int64_t_int_LayoutLeft_Rank2.cpp | 54 + ...pyETIInst_int64_t_int_LayoutLeft_Rank3.cpp | 54 + ...pyETIInst_int64_t_int_LayoutLeft_Rank4.cpp | 54 + ...pyETIInst_int64_t_int_LayoutLeft_Rank5.cpp | 54 + ...pyETIInst_int64_t_int_LayoutLeft_Rank8.cpp | 54 + ...yETIInst_int64_t_int_LayoutRight_Rank1.cpp | 54 + ...yETIInst_int64_t_int_LayoutRight_Rank2.cpp | 54 + ...yETIInst_int64_t_int_LayoutRight_Rank3.cpp | 54 + ...yETIInst_int64_t_int_LayoutRight_Rank4.cpp | 54 + ...yETIInst_int64_t_int_LayoutRight_Rank5.cpp | 54 + ...yETIInst_int64_t_int_LayoutRight_Rank8.cpp | 54 + ...ETIInst_int64_t_int_LayoutStride_Rank1.cpp | 54 + ...ETIInst_int64_t_int_LayoutStride_Rank2.cpp | 54 + ...ETIInst_int64_t_int_LayoutStride_Rank3.cpp | 54 + ...ETIInst_int64_t_int_LayoutStride_Rank4.cpp | 54 + ...ETIInst_int64_t_int_LayoutStride_Rank5.cpp | 54 + ...ETIInst_int64_t_int_LayoutStride_Rank8.cpp | 54 + ...opyETIInst_int_double_LayoutLeft_Rank1.cpp | 54 + ...opyETIInst_int_double_LayoutLeft_Rank2.cpp | 54 + ...opyETIInst_int_double_LayoutLeft_Rank3.cpp | 54 + ...opyETIInst_int_double_LayoutLeft_Rank4.cpp | 54 + ...opyETIInst_int_double_LayoutLeft_Rank5.cpp | 54 + ...opyETIInst_int_double_LayoutLeft_Rank8.cpp | 54 + ...pyETIInst_int_double_LayoutRight_Rank1.cpp | 54 + ...pyETIInst_int_double_LayoutRight_Rank2.cpp | 54 + ...pyETIInst_int_double_LayoutRight_Rank3.cpp | 54 + ...pyETIInst_int_double_LayoutRight_Rank4.cpp | 54 + ...pyETIInst_int_double_LayoutRight_Rank5.cpp | 54 + ...pyETIInst_int_double_LayoutRight_Rank8.cpp | 54 + ...yETIInst_int_double_LayoutStride_Rank1.cpp | 54 + ...yETIInst_int_double_LayoutStride_Rank2.cpp | 54 + ...yETIInst_int_double_LayoutStride_Rank3.cpp | 54 + ...yETIInst_int_double_LayoutStride_Rank4.cpp | 54 + ...yETIInst_int_double_LayoutStride_Rank5.cpp | 54 + ...yETIInst_int_double_LayoutStride_Rank8.cpp | 54 + ...CopyETIInst_int_float_LayoutLeft_Rank1.cpp | 54 + ...CopyETIInst_int_float_LayoutLeft_Rank2.cpp | 54 + ...CopyETIInst_int_float_LayoutLeft_Rank3.cpp | 54 + ...CopyETIInst_int_float_LayoutLeft_Rank4.cpp | 54 + ...CopyETIInst_int_float_LayoutLeft_Rank5.cpp | 54 + ...CopyETIInst_int_float_LayoutLeft_Rank8.cpp | 54 + ...opyETIInst_int_float_LayoutRight_Rank1.cpp | 54 + ...opyETIInst_int_float_LayoutRight_Rank2.cpp | 54 + ...opyETIInst_int_float_LayoutRight_Rank3.cpp | 54 + ...opyETIInst_int_float_LayoutRight_Rank4.cpp | 54 + ...opyETIInst_int_float_LayoutRight_Rank5.cpp | 54 + ...opyETIInst_int_float_LayoutRight_Rank8.cpp | 54 + ...pyETIInst_int_float_LayoutStride_Rank1.cpp | 54 + ...pyETIInst_int_float_LayoutStride_Rank2.cpp | 54 + ...pyETIInst_int_float_LayoutStride_Rank3.cpp | 54 + ...pyETIInst_int_float_LayoutStride_Rank4.cpp | 54 + ...pyETIInst_int_float_LayoutStride_Rank5.cpp | 54 + ...pyETIInst_int_float_LayoutStride_Rank8.cpp | 54 + ...pyETIInst_int_int64_t_LayoutLeft_Rank1.cpp | 54 + ...pyETIInst_int_int64_t_LayoutLeft_Rank2.cpp | 54 + ...pyETIInst_int_int64_t_LayoutLeft_Rank3.cpp | 54 + ...pyETIInst_int_int64_t_LayoutLeft_Rank4.cpp | 54 + ...pyETIInst_int_int64_t_LayoutLeft_Rank5.cpp | 54 + ...pyETIInst_int_int64_t_LayoutLeft_Rank8.cpp | 54 + ...yETIInst_int_int64_t_LayoutRight_Rank1.cpp | 54 + ...yETIInst_int_int64_t_LayoutRight_Rank2.cpp | 54 + ...yETIInst_int_int64_t_LayoutRight_Rank3.cpp | 54 + ...yETIInst_int_int64_t_LayoutRight_Rank4.cpp | 54 + ...yETIInst_int_int64_t_LayoutRight_Rank5.cpp | 54 + ...yETIInst_int_int64_t_LayoutRight_Rank8.cpp | 54 + ...ETIInst_int_int64_t_LayoutStride_Rank1.cpp | 54 + ...ETIInst_int_int64_t_LayoutStride_Rank2.cpp | 54 + ...ETIInst_int_int64_t_LayoutStride_Rank3.cpp | 54 + ...ETIInst_int_int64_t_LayoutStride_Rank4.cpp | 54 + ...ETIInst_int_int64_t_LayoutStride_Rank5.cpp | 54 + ...ETIInst_int_int64_t_LayoutStride_Rank8.cpp | 54 + ...ewCopyETIInst_int_int_LayoutLeft_Rank1.cpp | 54 + ...ewCopyETIInst_int_int_LayoutLeft_Rank2.cpp | 54 + ...ewCopyETIInst_int_int_LayoutLeft_Rank3.cpp | 54 + ...ewCopyETIInst_int_int_LayoutLeft_Rank4.cpp | 54 + ...ewCopyETIInst_int_int_LayoutLeft_Rank5.cpp | 54 + ...ewCopyETIInst_int_int_LayoutLeft_Rank8.cpp | 54 + ...wCopyETIInst_int_int_LayoutRight_Rank1.cpp | 54 + ...wCopyETIInst_int_int_LayoutRight_Rank2.cpp | 54 + ...wCopyETIInst_int_int_LayoutRight_Rank3.cpp | 54 + ...wCopyETIInst_int_int_LayoutRight_Rank4.cpp | 54 + ...wCopyETIInst_int_int_LayoutRight_Rank5.cpp | 54 + ...wCopyETIInst_int_int_LayoutRight_Rank8.cpp | 54 + ...CopyETIInst_int_int_LayoutStride_Rank1.cpp | 54 + ...CopyETIInst_int_int_LayoutStride_Rank2.cpp | 54 + ...CopyETIInst_int_int_LayoutStride_Rank3.cpp | 54 + ...CopyETIInst_int_int_LayoutStride_Rank4.cpp | 54 + ...CopyETIInst_int_int_LayoutStride_Rank5.cpp | 54 + ...CopyETIInst_int_int_LayoutStride_Rank8.cpp | 54 + lib/kokkos/core/src/eti/HPX/Makefile.eti_HPX | 288 +++ .../core/src/impl/Kokkos_AnalyzePolicy.hpp | 31 +- .../Kokkos_Atomic_Compare_Exchange_Strong.hpp | 167 +- .../Kokkos_Atomic_Compare_Exchange_Weak.hpp | 418 ++++ .../core/src/impl/Kokkos_Atomic_Fetch_Add.hpp | 14 +- .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 24 +- .../core/src/impl/Kokkos_Atomic_Generic.hpp | 28 +- .../core/src/impl/Kokkos_Atomic_Load.hpp | 266 +++ .../src/impl/Kokkos_Atomic_Memory_Order.hpp | 122 + .../core/src/impl/Kokkos_Atomic_Store.hpp | 258 +++ lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp | 314 +++ lib/kokkos/core/src/impl/Kokkos_Core.cpp | 52 +- lib/kokkos/core/src/impl/Kokkos_EBO.hpp | 343 +++ lib/kokkos/core/src/impl/Kokkos_Error.hpp | 48 + .../src/impl/Kokkos_FixedBufferMemoryPool.hpp | 307 +++ .../core/src/impl/Kokkos_FunctorAdapter.hpp | 5 +- lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 12 +- .../src/impl/Kokkos_HostSpace_deepcopy.cpp | 134 ++ .../src/impl/Kokkos_HostSpace_deepcopy.hpp | 54 + .../core/src/impl/Kokkos_HostThreadTeam.hpp | 292 ++- lib/kokkos/core/src/impl/Kokkos_LIFO.hpp | 431 ++++ .../core/src/impl/Kokkos_LinkedListNode.hpp | 206 ++ .../src/impl/Kokkos_MemoryPoolAllocator.hpp | 140 ++ .../src/impl/Kokkos_MultipleTaskQueue.hpp | 616 +++++ .../core/src/impl/Kokkos_OptionalRef.hpp | 242 ++ .../core/src/impl/Kokkos_Serial_Task.cpp | 99 +- .../core/src/impl/Kokkos_Serial_Task.hpp | 223 +- .../core/src/impl/Kokkos_SharedAlloc.cpp | 37 +- .../src/impl/Kokkos_SimpleTaskScheduler.hpp | 646 ++++++ .../core/src/impl/Kokkos_SingleTaskQueue.hpp | 207 ++ lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp | 329 +++ lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp | 758 +++++++ .../core/src/impl/Kokkos_TaskPolicyData.hpp | 195 ++ lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp | 388 +--- .../core/src/impl/Kokkos_TaskQueueCommon.hpp | 569 +++++ .../impl/Kokkos_TaskQueueMemoryManager.hpp | 251 +++ .../src/impl/Kokkos_TaskQueueMultiple.hpp | 286 +++ .../impl/Kokkos_TaskQueueMultiple_impl.hpp | 72 + .../core/src/impl/Kokkos_TaskQueue_impl.hpp | 119 +- .../core/src/impl/Kokkos_TaskResult.hpp | 151 ++ .../core/src/impl/Kokkos_TaskTeamMember.hpp | 135 ++ lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 48 + .../core/src/impl/Kokkos_VLAEmulation.hpp | 295 +++ lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp | 2 + .../core/src/impl/Kokkos_ViewMapping.hpp | 146 +- lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp | 12 + lib/kokkos/core/unit_test/CMakeLists.txt | 682 +++++- lib/kokkos/core/unit_test/Makefile | 521 +++-- lib/kokkos/core/unit_test/TestAtomic.hpp | 20 +- .../core/unit_test/TestAtomicOperations.hpp | 52 +- .../TestAtomicOperations_complexdouble.hpp | 57 + .../TestAtomicOperations_complexfloat.hpp | 57 + lib/kokkos/core/unit_test/TestCXX11.hpp | 2 + .../core/unit_test/TestCompilerMacros.hpp | 2 +- lib/kokkos/core/unit_test/TestDeepCopy.hpp | 167 ++ .../unit_test/TestDefaultDeviceTypeInit.hpp | 8 + .../core/unit_test/TestLocalDeepCopy.hpp | 904 ++++++++ lib/kokkos/core/unit_test/TestMDRange.hpp | 5 + .../core/unit_test/TestPolicyConstruction.hpp | 3 + .../unit_test/TestReduceCombinatorical.hpp | 6 + .../core/unit_test/TestReduceDeviceView.hpp | 8 +- lib/kokkos/core/unit_test/TestReducers.hpp | 11 + lib/kokkos/core/unit_test/TestScan.hpp | 7 +- lib/kokkos/core/unit_test/TestSharedAlloc.hpp | 12 +- .../core/unit_test/TestTaskScheduler.hpp | 470 +++- .../unit_test/TestTaskScheduler_single.hpp | 92 + lib/kokkos/core/unit_test/TestTeam.hpp | 14 +- lib/kokkos/core/unit_test/TestTeamVector.hpp | 53 +- .../core/unit_test/TestTeamVectorRange.hpp | 464 ++++ lib/kokkos/core/unit_test/TestTile.hpp | 7 + lib/kokkos/core/unit_test/TestViewAPI.hpp | 46 +- lib/kokkos/core/unit_test/TestViewAPI_e.hpp | 42 + .../core/unit_test/TestViewMapping_a.hpp | 6 +- .../core/unit_test/TestViewMapping_b.hpp | 6 +- .../unit_test/TestViewMapping_subview.hpp | 1 + lib/kokkos/core/unit_test/TestViewSubview.hpp | 204 ++ lib/kokkos/core/unit_test/TestWorkGraph.hpp | 1 + ...estCuda_AtomicOperations_complexdouble.cpp | 46 + ...TestCuda_AtomicOperations_complexfloat.cpp | 46 + .../cuda/TestCuda_DeepCopyAlignment.cpp | 48 + ..._InterOp.cpp => TestCuda_InterOp_Init.cpp} | 0 .../cuda/TestCuda_InterOp_Streams.cpp | 180 ++ .../unit_test/cuda/TestCuda_LocalDeepCopy.cpp | 46 + .../core/unit_test/cuda/TestCuda_Spaces.cpp | 6 +- .../unit_test/cuda/TestCuda_SubView_a.cpp | 7 + .../cuda/TestCuda_TeamVectorRange.cpp | 48 + .../hpx/TestHPX_AtomicOperations_double.cpp | 46 + .../hpx/TestHPX_AtomicOperations_float.cpp | 46 + .../hpx/TestHPX_AtomicOperations_int.cpp | 46 + .../hpx/TestHPX_AtomicOperations_longint.cpp | 46 + .../TestHPX_AtomicOperations_longlongint.cpp | 46 + .../TestHPX_AtomicOperations_unsignedint.cpp | 46 + ...stHPX_AtomicOperations_unsignedlongint.cpp | 46 + .../unit_test/hpx/TestHPX_AtomicViews.cpp | 47 + .../core/unit_test/hpx/TestHPX_Atomics.cpp | 46 + .../core/unit_test/hpx/TestHPX_Category.hpp | 65 + .../core/unit_test/hpx/TestHPX_Complex.cpp | 47 + lib/kokkos/core/unit_test/hpx/TestHPX_Crs.cpp | 45 + .../core/unit_test/hpx/TestHPX_Init.cpp | 50 + .../core/unit_test/hpx/TestHPX_InterOp.cpp | 56 + .../core/unit_test/hpx/TestHPX_MDRange_a.cpp | 47 + .../core/unit_test/hpx/TestHPX_MDRange_b.cpp | 47 + .../core/unit_test/hpx/TestHPX_MDRange_c.cpp | 47 + .../core/unit_test/hpx/TestHPX_MDRange_d.cpp | 47 + .../core/unit_test/hpx/TestHPX_MDRange_e.cpp | 47 + .../core/unit_test/hpx/TestHPX_Other.cpp | 43 + .../unit_test/hpx/TestHPX_RangePolicy.cpp | 47 + .../core/unit_test/hpx/TestHPX_Reducers_a.cpp | 45 + .../core/unit_test/hpx/TestHPX_Reducers_b.cpp | 45 + .../core/unit_test/hpx/TestHPX_Reducers_c.cpp | 45 + .../core/unit_test/hpx/TestHPX_Reducers_d.cpp | 45 + .../core/unit_test/hpx/TestHPX_Reductions.cpp | 46 + .../core/unit_test/hpx/TestHPX_Scan.cpp | 47 + .../unit_test/hpx/TestHPX_SharedAlloc.cpp | 55 + .../core/unit_test/hpx/TestHPX_SubView_a.cpp | 104 + .../core/unit_test/hpx/TestHPX_SubView_b.cpp | 63 + .../unit_test/hpx/TestHPX_SubView_c01.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c02.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c03.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c04.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c05.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c06.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c07.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c08.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c09.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c10.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c11.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c12.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c13.cpp | 54 + .../unit_test/hpx/TestHPX_SubView_c_all.cpp | 13 + .../core/unit_test/hpx/TestHPX_Task.cpp | 47 + .../core/unit_test/hpx/TestHPX_Team.cpp | 75 + .../hpx/TestHPX_TeamReductionScan.cpp | 81 + .../unit_test/hpx/TestHPX_TeamScratch.cpp | 83 + .../unit_test/hpx/TestHPX_TeamVectorRange.cpp | 48 + .../unit_test/hpx/TestHPX_UniqueToken.cpp | 46 + .../core/unit_test/hpx/TestHPX_ViewAPI_a.cpp | 45 + .../core/unit_test/hpx/TestHPX_ViewAPI_b.cpp | 45 + .../core/unit_test/hpx/TestHPX_ViewAPI_c.cpp | 45 + .../core/unit_test/hpx/TestHPX_ViewAPI_d.cpp | 45 + .../core/unit_test/hpx/TestHPX_ViewAPI_e.cpp | 45 + .../unit_test/hpx/TestHPX_ViewMapping_a.cpp | 46 + .../unit_test/hpx/TestHPX_ViewMapping_b.cpp | 46 + .../hpx/TestHPX_ViewMapping_subview.cpp | 46 + .../unit_test/hpx/TestHPX_ViewOfClass.cpp | 46 + .../core/unit_test/hpx/TestHPX_View_64bit.cpp | 45 + .../core/unit_test/hpx/TestHPX_WorkGraph.cpp | 45 + ...tOpenMP_AtomicOperations_complexdouble.cpp | 46 + ...stOpenMP_AtomicOperations_complexfloat.cpp | 46 + .../openmp/TestOpenMP_DeepCopyAlignment.cpp | 45 + .../openmp/TestOpenMP_LocalDeepCopy.cpp | 46 + .../unit_test/openmp/TestOpenMP_SubView_a.cpp | 7 + .../openmp/TestOpenMP_TeamVectorRange.cpp | 48 + ...PTarget_AtomicOperations_complexdouble.cpp | 46 + ...MPTarget_AtomicOperations_complexfloat.cpp | 46 + .../TestOpenMPTarget_DeepCopyAlignment.cpp | 45 + .../TestOpenMPTarget_SubView_a.cpp | 7 + ...threads_AtomicOperations_complexdouble.cpp | 46 + ...qthreads_AtomicOperations_complexfloat.cpp | 46 + .../TestQthreads_DeepCopyAlignment.cpp | 45 + .../qthreads/TestQthreads_Reductions.cpp | 4 +- .../rocm/TestROCm_DeepCopyAlignment.cpp | 45 + .../core/unit_test/rocm/TestROCm_Spaces.cpp | 2 +- .../unit_test/rocm/TestROCm_SubView_a.cpp | 7 + ...tSerial_AtomicOperations_complexdouble.cpp | 46 + ...stSerial_AtomicOperations_complexfloat.cpp | 46 + .../serial/TestSerial_DeepCopyAlignment.cpp | 45 + .../serial/TestSerial_LocalDeepCopy.cpp | 46 + .../unit_test/serial/TestSerial_SubView_a.cpp | 7 + .../serial/TestSerial_TeamVectorRange.cpp | 48 + .../unit_test/standalone/UnitTestMainInit.cpp | 11 +- ...Threads_AtomicOperations_complexdouble.cpp | 46 + ...tThreads_AtomicOperations_complexfloat.cpp | 46 + .../threads/TestThreads_DeepCopyAlignment.cpp | 45 + .../threads/TestThreads_LocalDeepCopy.cpp | 46 + .../threads/TestThreads_SubView_a.cpp | 7 + .../threads/TestThreads_TeamVectorRange.cpp | 48 + lib/kokkos/doc/Doxyfile | 127 -- lib/kokkos/doc/Kokkos_PG.pdf | Bin 1359256 -> 0 bytes .../doc/SAND2017-10464-Kokkos-Task-DAG.pdf | Bin 761252 -> 0 bytes lib/kokkos/doc/build_docs | 15 - .../doc/design_notes_space_instances.md | 131 -- lib/kokkos/doc/develop_builds.md | 76 - .../query_cuda_arch.cpp | 24 - lib/kokkos/doc/index.doc | 72 - lib/kokkos/doc/kokkos-promotion.txt | 196 -- lib/kokkos/example/cmake/Dependencies.cmake | 2 +- lib/kokkos/example/common/VectorImport.hpp | 10 +- lib/kokkos/example/feint/Makefile | 4 + lib/kokkos/example/feint/feint_hpx.cpp | 67 + lib/kokkos/example/fenl/CGSolve.hpp | 4 +- lib/kokkos/example/fenl/fenl.cpp | 22 + lib/kokkos/example/fenl/fenl_functors.hpp | 14 +- lib/kokkos/example/fenl/fenl_impl.hpp | 6 +- lib/kokkos/example/fenl/main.cpp | 12 +- lib/kokkos/example/md_skeleton/force.cpp | 2 +- lib/kokkos/example/md_skeleton/neighbor.cpp | 6 +- lib/kokkos/example/multi_fem/Explicit.hpp | 6 +- .../example/multi_fem/ExplicitFunctors.hpp | 24 +- lib/kokkos/example/multi_fem/Implicit.hpp | 8 +- lib/kokkos/example/multi_fem/LinAlgBLAS.hpp | 74 +- lib/kokkos/example/multi_fem/Nonlinear.hpp | 8 +- lib/kokkos/example/multi_fem/TestCuda.cpp | 46 +- lib/kokkos/example/multi_fem/TestHost.cpp | 25 +- .../Advanced_Views/03_subviews/subviews.cpp | 1 + .../overlapping_deepcopy.cpp | 2 +- lib/kokkos/generate_makefile.bash | 35 +- lib/kokkos/master_history.txt | 1 + .../scripts/eti/generate_view_copy_cpp_files | 12 - .../eti/generate_view_copy_cpp_files_iterate | 21 - .../eti/generate_view_copy_cpp_files_rank | 17 - .../eti/generate_view_copy_cpp_files_write | 41 - lib/kokkos/scripts/snapshot.py | 291 --- lib/kokkos/scripts/testing_scripts/README | 5 - .../testing_scripts/jenkins_test_driver | 83 - .../testing_scripts/obj_size_opt_check | 287 --- .../scripts/testing_scripts/test_all_sandia | 811 ------- .../test_kokkos_master_develop_promotion.sh | 66 - .../blake_jenkins_run_script_pthread_intel | 63 - .../blake_jenkins_run_script_serial_intel | 63 - .../scripts/trilinos-integration/checkin-test | 4 - .../prepare_trilinos_repos.sh | 59 - .../white_run_jenkins_script_cuda | 67 - .../white_run_jenkins_script_omp | 62 - 506 files changed, 37043 insertions(+), 6851 deletions(-) create mode 100644 lib/kokkos/algorithms/unit_tests/TestHPX.cpp create mode 100644 lib/kokkos/containers/performance_tests/TestHPX.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_BitSet.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_Category.hpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_DualView.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_DynamicView.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_OffsetView.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_ScatterView.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_Vector.cpp create mode 100644 lib/kokkos/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp create mode 100644 lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp delete mode 100644 lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp rename lib/kokkos/core/src/Cuda/{Kokkos_Cuda_Internal.hpp => Kokkos_Cuda_BlockSize_Deduction.hpp} (69%) rename lib/kokkos/core/src/Cuda/{Kokkos_Cuda_Impl.cpp => Kokkos_Cuda_Instance.cpp} (86%) create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp create mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX.cpp rename lib/kokkos/core/src/{impl/Kokkos_StaticAssert.hpp => HPX/Kokkos_HPX_Task.cpp} (76%) create mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp create mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp create mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp create mode 100644 lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Extents.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Future.hpp create mode 100644 lib/kokkos/core/src/Kokkos_HPX.hpp create mode 100644 lib/kokkos/core/src/Kokkos_PointerOwnership.hpp create mode 100644 lib/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp create mode 100644 lib/kokkos/core/src/eti/HPX/CMakeLists.txt create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp create mode 100644 lib/kokkos/core/src/eti/HPX/Makefile.eti_HPX create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_EBO.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_LIFO.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp create mode 100644 lib/kokkos/core/unit_test/TestAtomicOperations_complexdouble.hpp create mode 100644 lib/kokkos/core/unit_test/TestAtomicOperations_complexfloat.hpp create mode 100644 lib/kokkos/core/unit_test/TestDeepCopy.hpp create mode 100644 lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp create mode 100644 lib/kokkos/core/unit_test/TestTaskScheduler_single.hpp create mode 100644 lib/kokkos/core/unit_test/TestTeamVectorRange.hpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_AtomicOperations_complexdouble.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_AtomicOperations_complexfloat.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_DeepCopyAlignment.cpp rename lib/kokkos/core/unit_test/cuda/{TestCuda_InterOp.cpp => TestCuda_InterOp_Init.cpp} (100%) create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_InterOp_Streams.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_LocalDeepCopy.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_TeamVectorRange.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_double.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_float.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_int.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_longint.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_longlongint.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_unsignedint.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicOperations_unsignedlongint.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_AtomicViews.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Atomics.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Category.hpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Complex.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Crs.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Init.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_InterOp.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_MDRange_a.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_MDRange_b.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_MDRange_c.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_MDRange_d.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_MDRange_e.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Other.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_RangePolicy.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Reducers_a.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Reducers_b.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Reducers_c.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Reducers_d.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Reductions.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Scan.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SharedAlloc.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_a.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_b.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c01.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c02.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c03.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c04.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c05.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c06.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c07.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c08.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c09.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c10.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c11.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c12.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c13.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_SubView_c_all.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Task.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_Team.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_TeamReductionScan.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_TeamScratch.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_TeamVectorRange.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_UniqueToken.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewAPI_a.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewAPI_b.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewAPI_c.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewAPI_d.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewAPI_e.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewMapping_a.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewMapping_b.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewMapping_subview.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_ViewOfClass.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_View_64bit.cpp create mode 100644 lib/kokkos/core/unit_test/hpx/TestHPX_WorkGraph.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicOperations_complexdouble.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicOperations_complexfloat.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_DeepCopyAlignment.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_LocalDeepCopy.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_TeamVectorRange.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicOperations_complexdouble.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicOperations_complexfloat.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_DeepCopyAlignment.cpp create mode 100644 lib/kokkos/core/unit_test/qthreads/TestQqthreads_AtomicOperations_complexdouble.cpp create mode 100644 lib/kokkos/core/unit_test/qthreads/TestQqthreads_AtomicOperations_complexfloat.cpp create mode 100644 lib/kokkos/core/unit_test/qthreads/TestQthreads_DeepCopyAlignment.cpp create mode 100644 lib/kokkos/core/unit_test/rocm/TestROCm_DeepCopyAlignment.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_AtomicOperations_complexdouble.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_AtomicOperations_complexfloat.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_DeepCopyAlignment.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_LocalDeepCopy.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_TeamVectorRange.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_AtomicOperations_complexdouble.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_AtomicOperations_complexfloat.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_DeepCopyAlignment.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_LocalDeepCopy.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_TeamVectorRange.cpp delete mode 100644 lib/kokkos/doc/Doxyfile delete mode 100644 lib/kokkos/doc/Kokkos_PG.pdf delete mode 100644 lib/kokkos/doc/SAND2017-10464-Kokkos-Task-DAG.pdf delete mode 100755 lib/kokkos/doc/build_docs delete mode 100644 lib/kokkos/doc/design_notes_space_instances.md delete mode 100644 lib/kokkos/doc/develop_builds.md delete mode 100644 lib/kokkos/doc/hardware_identification/query_cuda_arch.cpp delete mode 100644 lib/kokkos/doc/index.doc delete mode 100644 lib/kokkos/doc/kokkos-promotion.txt create mode 100644 lib/kokkos/example/feint/feint_hpx.cpp delete mode 100755 lib/kokkos/scripts/eti/generate_view_copy_cpp_files delete mode 100755 lib/kokkos/scripts/eti/generate_view_copy_cpp_files_iterate delete mode 100755 lib/kokkos/scripts/eti/generate_view_copy_cpp_files_rank delete mode 100755 lib/kokkos/scripts/eti/generate_view_copy_cpp_files_write delete mode 100755 lib/kokkos/scripts/snapshot.py delete mode 100644 lib/kokkos/scripts/testing_scripts/README delete mode 100755 lib/kokkos/scripts/testing_scripts/jenkins_test_driver delete mode 100755 lib/kokkos/scripts/testing_scripts/obj_size_opt_check delete mode 100755 lib/kokkos/scripts/testing_scripts/test_all_sandia delete mode 100755 lib/kokkos/scripts/testing_scripts/test_kokkos_master_develop_promotion.sh delete mode 100755 lib/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_pthread_intel delete mode 100755 lib/kokkos/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel delete mode 100644 lib/kokkos/scripts/trilinos-integration/checkin-test delete mode 100755 lib/kokkos/scripts/trilinos-integration/prepare_trilinos_repos.sh delete mode 100755 lib/kokkos/scripts/trilinos-integration/white_run_jenkins_script_cuda delete mode 100755 lib/kokkos/scripts/trilinos-integration/white_run_jenkins_script_omp diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 9d503663ae..8d196e2c35 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,39 @@ # Change Log +## [2.9.00](https://github.com/kokkos/kokkos/tree/2.9.00) (2019-06-24) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.8.00...2.9.00) + +**Implemented enhancements:** + +- Capability: CUDA Streams [\#1723](https://github.com/kokkos/kokkos/issues/1723) +- Capability: CUDA Stream support for parallel\_reduce [\#2061](https://github.com/kokkos/kokkos/issues/2061) +- Capability: Feature Request: TeamVectorRange [\#713](https://github.com/kokkos/kokkos/issues/713) +- Capability: Adding HPX backend [\#2080](https://github.com/kokkos/kokkos/issues/2080) +- Capability: TaskScheduler to have multiple queues [\#565](https://github.com/kokkos/kokkos/issues/565) +- Capability: Support for additional reductions in ScatterView [\#1674](https://github.com/kokkos/kokkos/issues/1674) +- Capability: Request: deep\_copy within parallel regions [\#689](https://github.com/kokkos/kokkos/issues/689) +- Capability: Feature Request: `create\_mirror\_view\_without\_initializing` [\#1765](https://github.com/kokkos/kokkos/issues/1765) +- View: Use SFINAE to restrict possible View type conversions [\#2127](https://github.com/kokkos/kokkos/issues/2127) +- Deprecation: Deprecate ExecutionSpace::fence\(\) as static function and make it non-static [\#2140](https://github.com/kokkos/kokkos/issues/2140) +- Deprecation: Deprecate LayoutTileLeft [\#2122](https://github.com/kokkos/kokkos/issues/2122) +- Macros: KOKKOS\_RESTRICT defined for non-Intel compilers [\#2038](https://github.com/kokkos/kokkos/issues/2038) + +**Fixed bugs:** + +- Cuda: TeamThreadRange loop count on device is passed by reference to host static constexpr [\#1733](https://github.com/kokkos/kokkos/issues/1733) +- Cuda: Build error with relocatable device code with CUDA 10.1 GCC 7.3 [\#2134](https://github.com/kokkos/kokkos/issues/2134) +- Cuda: cudaFuncSetCacheConfig is setting CachePreferShared too often [\#2066](https://github.com/kokkos/kokkos/issues/2066) +- Cuda: TeamPolicy doesn't throw then created with non-viable vector length and also doesn't backscale to viable one [\#2020](https://github.com/kokkos/kokkos/issues/2020) +- Cuda: cudaMemcpy error for large league sizes on V100 [\#1991](https://github.com/kokkos/kokkos/issues/1991) +- Cuda: illegal warp sync in parallel\_reduce by functor on Turing 75 [\#1958](https://github.com/kokkos/kokkos/issues/1958) +- TeamThreadRange: Inconsistent results from TeamThreadRange reduction [\#1905](https://github.com/kokkos/kokkos/issues/1905) +- Atomics: atomic\_fetch\_oper & atomic\_oper\_fetch don't build for complex\ [\#1964](https://github.com/kokkos/kokkos/issues/1964) +- Views: Kokkos randomread Views leak memory [\#2155](https://github.com/kokkos/kokkos/issues/2155) +- ScatterView: LayoutLeft overload currently non-functional [\#2165](https://github.com/kokkos/kokkos/issues/2165) +- KNL: With intel 17.2.174 illegal instruction in random number test [\#2078](https://github.com/kokkos/kokkos/issues/2078) +- Bitset: Enable copy constructor on device [\#2094](https://github.com/kokkos/kokkos/issues/2094) +- Examples: do not compile due to template deduction error \(multi\_fem\) [\#1928](https://github.com/kokkos/kokkos/issues/1928) + ## [2.8.00](https://github.com/kokkos/kokkos/tree/2.8.00) (2019-02-05) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.7.24...2.8.00) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index a90e86b9f8..e9ad57f0ae 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -23,7 +23,7 @@ KOKKOS_DEBUG ?= "no" KOKKOS_USE_TPLS ?= "" # Options: c++11,c++14,c++1y,c++17,c++1z,c++2a KOKKOS_CXX_STANDARD ?= "c++11" -# Options: aggressive_vectorization,disable_profiling,disable_deprecated_code,enable_large_mem_tests +# Options: aggressive_vectorization,disable_profiling,enable_deprecated_code,disable_deprecated_code,enable_large_mem_tests KOKKOS_OPTIONS ?= "" # Option for setting ETI path KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti @@ -33,11 +33,19 @@ KOKKOS_CMAKE ?= "no" # Options: force_uvm,use_ldg,rdc,enable_lambda KOKKOS_CUDA_OPTIONS ?= "enable_lambda" +# Default settings specific options. +# Options: enable_async_dispatch +KOKKOS_HPX_OPTIONS ?= "" + # Return a 1 if a string contains a substring and 0 if not # Note the search string should be without '"' # Example: $(call kokkos_has_string,"hwloc,librt",hwloc) # Will return a 1 kokkos_has_string=$(if $(findstring $2,$1),1,0) +# Returns 1 if the path exists, 0 otherwise +# Example: $(call kokkos_path_exists,/path/to/file) +# Will return a 1 if /path/to/file exists +kokkos_path_exists=$(if $(wildcard $1),1,0) # Check for general settings. KOKKOS_INTERNAL_ENABLE_DEBUG := $(call kokkos_has_string,$(KOKKOS_DEBUG),yes) @@ -58,6 +66,7 @@ KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OP KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization) KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling) KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code) +KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_deprecated_code) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check) KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print) KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_large_mem_tests) @@ -65,6 +74,7 @@ KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS), KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm) KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda) +KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti) @@ -72,12 +82,15 @@ KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_ KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread) KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads) +KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX) KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 + endif endif endif endif @@ -112,7 +125,7 @@ KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2 KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l)) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) -KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin) +KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) # Check Host Compiler if using NVCC through nvcc_wrapper @@ -283,9 +296,9 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLE + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ - + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53)) @@ -300,19 +313,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ - + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ - + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \ + + $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \ + + $(KOKKOS_INTERNAL_USE_ARCH_TURING75) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53)) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) - ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) - CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) + CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH) endif endif @@ -441,6 +454,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_QTHREADS") endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX") +endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_SERIAL") endif @@ -559,9 +576,15 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING") endif -ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) + ifeq ($(KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") + endif + ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE") + endif endif + ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_ETI") endif @@ -593,8 +616,13 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") - KOKKOS_CXXFLAGS += --relocatable-device-code=true - KOKKOS_LDFLAGS += --relocatable-device-code=true + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_CXXFLAGS += -fcuda-rdc + KOKKOS_LDFLAGS += -fcuda-rdc + else + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true + endif endif ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) @@ -625,6 +653,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + ifeq ($(KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX_ASYNC_DISPATCH") + endif +endif + # Add Architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) @@ -908,7 +942,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch KOKKOS_CXXFLAGS += -x cuda else - $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang) + $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) ) endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) @@ -1058,10 +1092,18 @@ endif ifneq ($(KOKKOS_CMAKE), yes) KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include endif - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 - KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 + ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib64), 1) + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 + else ifeq ($(call kokkos_path_exists,$(CUDA_PATH)/lib), 1) + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib + KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib + KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib + else + $(error Can't find CUDA library directory: no lib64 or lib directory in $(CUDA_PATH)) + endif KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64 ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) endif @@ -1124,6 +1166,33 @@ ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) KOKKOS_TPL_LIBRARY_NAMES += qthread endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) + ifneq ($(HPX_PATH),) + ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) + KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application_debug) + KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) + KOKKOS_LDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application_debug) + else + KOKKOS_CXXFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --cflags hpx_application) + KOKKOS_CXXLDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) + KOKKOS_LDFLAGS += $(shell PKG_CONFIG_PATH=$(HPX_PATH)/lib64/pkgconfig pkg-config --libs hpx_application) + endif + else + ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) + KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application_debug) + KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application_debug) + KOKKOS_LDFLAGS += $(shell pkg-config --libs hpx_application_debug) + else + KOKKOS_CXXFLAGS += $(shell pkg-config --cflags hpx_application) + KOKKOS_CXXLDFLAGS += $(shell pkg-config --libs hpx_application) + KOKKOS_LDFLAGS += $(shell pkg-config --libs hpx_application) + endif + endif + KOKKOS_TPL_LIBRARY_NAMES += hpx +endif + # Explicitly set the GCC Toolchain for Clang. ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 44da1e082a..e7d5a3c907 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -30,6 +30,8 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp +Kokkos_HostSpace_deepcopy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace_deepcopy.cpp ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) @@ -38,8 +40,8 @@ endif endif ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) -Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp +Kokkos_Cuda_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Instance.cpp Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -92,6 +94,13 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) +Kokkos_HPX.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX.cpp +Kokkos_HPX_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HPX/Kokkos_HPX_Task.cpp +endif + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp diff --git a/lib/kokkos/algorithms/cmake/Dependencies.cmake b/lib/kokkos/algorithms/cmake/Dependencies.cmake index c36b62523f..1b41310681 100644 --- a/lib/kokkos/algorithms/cmake/Dependencies.cmake +++ b/lib/kokkos/algorithms/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( LIB_REQUIRED_PACKAGES KokkosCore KokkosContainers - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC HPX TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 8bdd876723..7fb8505fe5 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -328,6 +328,8 @@ public: parallel_for("Kokkos::Sort::Copy", Kokkos::RangePolicy(0,len),functor); } + + Kokkos::fence(); } template diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt index f5aa24e9be..e238b37c8e 100644 --- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt +++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -42,6 +42,12 @@ IF(Kokkos_ENABLE_OpenMP) ) ENDIF() +IF(Kokkos_ENABLE_HPX) + LIST( APPEND SOURCES + TestHPX.cpp + ) +ENDIF() + IF(Kokkos_ENABLE_Serial) LIST( APPEND SOURCES TestSerial.cpp diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index b5848c451e..3c862d03dc 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -49,6 +49,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) TEST_TARGETS += test-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + OBJ_HPX = TestHPX.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_HPX + TEST_TARGETS += test-hpx +endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o TARGETS += KokkosAlgorithms_UnitTest_Serial @@ -67,6 +73,9 @@ KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_OpenMP +KokkosAlgorithms_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_HPX + KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Serial @@ -82,6 +91,9 @@ test-threads: KokkosAlgorithms_UnitTest_Threads test-openmp: KokkosAlgorithms_UnitTest_OpenMP ./KokkosAlgorithms_UnitTest_OpenMP +test-hpx: KokkosAlgorithms_UnitTest_HPX + ./KokkosAlgorithms_UnitTest_HPX + test-serial: KokkosAlgorithms_UnitTest_Serial ./KokkosAlgorithms_UnitTest_Serial diff --git a/lib/kokkos/algorithms/unit_tests/TestHPX.cpp b/lib/kokkos/algorithms/unit_tests/TestHPX.cpp new file mode 100644 index 0000000000..e5b7dbdb7a --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestHPX.cpp @@ -0,0 +1,96 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#include +#ifdef KOKKOS_ENABLE_HPX + +#include +#include + +//---------------------------------------------------------------------------- +#include +#include +#include + +namespace Test { + +class hpx : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + } + + static void TearDownTestCase() + { + } +}; + +#define HPX_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( hpx, Random_XorShift64 ) { \ + Impl::test_random >(num_draws); \ + } + +#define HPX_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( hpx, Random_XorShift1024 ) { \ + Impl::test_random >(num_draws); \ + } + +#define HPX_SORT_UNSIGNED( size ) \ + TEST_F( hpx, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Experimental::HPX, unsigned >(size); \ + } + +HPX_RANDOM_XORSHIFT64( 10240000 ) +HPX_RANDOM_XORSHIFT1024( 10130144 ) +HPX_SORT_UNSIGNED(171) + +#undef HPX_RANDOM_XORSHIFT64 +#undef HPX_RANDOM_XORSHIFT1024 +#undef HPX_SORT_UNSIGNED +} // namespace test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTHPX_PREVENT_LINK_ERROR() {} +#endif + diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index e0c646c199..5fd7f09b50 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -225,9 +225,9 @@ void test_dynamic_view_sort(unsigned int n ) Kokkos::Random_XorShift64_Pool g(1931); Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); - ExecutionSpace::fence(); + ExecutionSpace().fence(); Kokkos::deep_copy(keys,keys_view); - //ExecutionSpace::fence(); + //ExecutionSpace().fence(); double sum_before = 0.0; double sum_after = 0.0; @@ -237,9 +237,9 @@ void test_dynamic_view_sort(unsigned int n ) Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); - ExecutionSpace::fence(); // Need this fence to prevent BusError with Cuda + ExecutionSpace().fence(); // Need this fence to prevent BusError with Cuda Kokkos::deep_copy( keys_view , keys ); - //ExecutionSpace::fence(); + //ExecutionSpace().fence(); Kokkos::parallel_reduce(n,sum(keys_view),sum_after); Kokkos::parallel_reduce(n-1,is_sorted_struct(keys_view),sort_fails); diff --git a/lib/kokkos/cmake/kokkos_build.cmake b/lib/kokkos/cmake/kokkos_build.cmake index 8178483d01..f9b995baae 100644 --- a/lib/kokkos/cmake/kokkos_build.cmake +++ b/lib/kokkos/cmake/kokkos_build.cmake @@ -76,8 +76,20 @@ IF(KOKKOS_SEPARATE_LIBS) ) foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if ("${lib}" STREQUAL "cuda") + if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) set(LIB_cuda "-lcuda") + elseif ("${lib}" STREQUAL "hpx") + find_package(HPX REQUIRED) + if(${HPX_FOUND}) + target_link_libraries(kokkoscore PUBLIC ${HPX_LIBRARIES}) + target_link_libraries(kokkoscontainers PUBLIC ${HPX_LIBRARIES}) + target_link_libraries(kokkosalgorithms PUBLIC ${HPX_LIBRARIES}) + target_include_directories(kokkoscore PUBLIC ${HPX_INCLUDE_DIRS}) + target_include_directories(kokkoscontainers PUBLIC ${HPX_INCLUDE_DIRS}) + target_include_directories(kokkosalgorithms PUBLIC ${HPX_INCLUDE_DIRS}) + else() + message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") + endif() else() find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) endif() @@ -158,8 +170,16 @@ ELSE() ) foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES) - if ("${lib}" STREQUAL "cuda") + if (("${lib}" STREQUAL "cuda") AND (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")) set(LIB_cuda "-lcuda") + elseif ("${lib}" STREQUAL "hpx") + find_package(HPX REQUIRED) + if(${HPX_FOUND}) + target_link_libraries(kokkos PUBLIC ${HPX_LIBRARIES}) + target_include_directories(kokkos PUBLIC ${HPX_INCLUDE_DIRS}) + else() + message(ERROR "HPX not found. Check the value of HPX_DIR (= ${HPX_DIR}) or CMAKE_PREFIX_PATH (= ${CMAKE_PREFIX_PATH}).") + endif() else() find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS}) endif() diff --git a/lib/kokkos/cmake/kokkos_functions.cmake b/lib/kokkos/cmake/kokkos_functions.cmake index bc490115af..616618753b 100644 --- a/lib/kokkos/cmake/kokkos_functions.cmake +++ b/lib/kokkos/cmake/kokkos_functions.cmake @@ -95,7 +95,7 @@ function(set_kokkos_cxx_compiler) message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") endif() elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) - message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang.") + message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang, but compiler ID was ${INTERNAL_CXX_COMPILER_ID}") endif() endif() diff --git a/lib/kokkos/cmake/kokkos_options.cmake b/lib/kokkos/cmake/kokkos_options.cmake index be494e5df0..e730a94664 100644 --- a/lib/kokkos/cmake/kokkos_options.cmake +++ b/lib/kokkos/cmake/kokkos_options.cmake @@ -14,6 +14,7 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST OpenMP Pthread Qthread + HPX Cuda ROCm HWLOC @@ -23,6 +24,7 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST Cuda_Relocatable_Device_Code Cuda_UVM Cuda_LDG_Intrinsic + HPX_ASYNC_DISPATCH Debug Debug_DualView_Modify_Check Debug_Bounds_Check @@ -116,6 +118,7 @@ list(APPEND KOKKOS_DEVICES_LIST OpenMP # OpenMP Pthread # pthread Qthreads # qthreads + HPX # HPX Serial # serial ROCm # Relocatable device code ) @@ -173,6 +176,19 @@ set(KOKKOS_INTERNAL_RELOCATABLE_DEVICE_CODE rdc) set(KOKKOS_INTERNAL_LAMBDA enable_lambda) +#------------------------------------------------------------------------------- +# List of possible Options for HPX +#------------------------------------------------------------------------------- +# From Makefile.kokkos: Options: enable_async_dispatch +set(KOKKOS_HPX_OPTIONS_LIST) +list(APPEND KOKKOS_HPX_OPTIONS_LIST + ASYNC_DISPATCH # enable_async_dispatch + ) + +# Map of cmake variables to Makefile variables +set(KOKKOS_INTERNAL_ENABLE_ASYNC_DISPATCH enable_async_dispatch) + + #------------------------------------------------------------------------------- #------------------------------- Create doc strings ---------------------------- #------------------------------------------------------------------------------- @@ -202,6 +218,11 @@ set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkosc # Qthreads options. set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") +# HPX options. +set(KOKKOS_HPX_DIR "" CACHE PATH "Location of HPX library.") + +# Whether to build separate libraries or now +set(KOKKOS_SEPARATE_TESTS OFF CACHE BOOL "Provide unit test targets with finer granularity.") #------------------------------------------------------------------------------- #------------------------------- KOKKOS_DEVICES -------------------------------- @@ -215,6 +236,11 @@ IF(Trilinos_ENABLE_Kokkos) ELSE() set_kokkos_default_default(QTHREADS OFF) ENDIF() + IF(TPL_ENABLE_HPX) + set_kokkos_default_default(HPX ON) + ELSE() + set_kokkos_default_default(HPX OFF) + ENDIF() IF(Trilinos_ENABLE_OpenMP) set_kokkos_default_default(OPENMP ${Trilinos_ENABLE_OpenMP}) ELSE() @@ -231,6 +257,7 @@ ELSE() set_kokkos_default_default(OPENMP OFF) set_kokkos_default_default(PTHREAD OFF) set_kokkos_default_default(QTHREAD OFF) + set_kokkos_default_default(HPX OFF) set_kokkos_default_default(CUDA OFF) set_kokkos_default_default(ROCM OFF) ENDIF() @@ -241,6 +268,7 @@ set(KOKKOS_ENABLE_SERIAL ${KOKKOS_INTERNAL_ENABLE_SERIAL_DEFAULT} CACHE BOOL "Wh set(KOKKOS_ENABLE_OPENMP ${KOKKOS_INTERNAL_ENABLE_OPENMP_DEFAULT} CACHE BOOL "Enable OpenMP support in Kokkos." FORCE) set(KOKKOS_ENABLE_PTHREAD ${KOKKOS_INTERNAL_ENABLE_PTHREAD_DEFAULT} CACHE BOOL "Enable Pthread support in Kokkos.") set(KOKKOS_ENABLE_QTHREADS ${KOKKOS_INTERNAL_ENABLE_QTHREADS_DEFAULT} CACHE BOOL "Enable Qthreads support in Kokkos.") +set(KOKKOS_ENABLE_HPX ${KOKKOS_INTERNAL_ENABLE_HPX_DEFAULT} CACHE BOOL "Enable HPX support in Kokkos.") set(KOKKOS_ENABLE_CUDA ${KOKKOS_INTERNAL_ENABLE_CUDA_DEFAULT} CACHE BOOL "Enable CUDA support in Kokkos.") set(KOKKOS_ENABLE_ROCM ${KOKKOS_INTERNAL_ENABLE_ROCM_DEFAULT} CACHE BOOL "Enable ROCm support in Kokkos.") @@ -343,6 +371,18 @@ set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ${KOKKOS_INTERNAL_ENABLE_CUDA_REL set(KOKKOS_ENABLE_CUDA_LAMBDA ${KOKKOS_INTERNAL_ENABLE_CUDA_LAMBDA_DEFAULT} CACHE BOOL "Enable lambdas for CUDA. (cuda option)") +#------------------------------------------------------------------------------- +#------------------------------- KOKKOS_HPX_OPTIONS ---------------------------- +#------------------------------------------------------------------------------- + +# HPX options. +# Set Defaults +set_kokkos_default_default(HPX_ASYNC_DISPATCH OFF) + +# Set actual options +set(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH ${KOKKOS_INTERNAL_ENABLE_HPX_ASYNC_DISPATCH_DEFAULT} CACHE BOOL "Enable HPX async dispatch.") + + #------------------------------------------------------------------------------- #----------------------- HOST ARCH AND LEGACY TRIBITS -------------------------- #------------------------------------------------------------------------------- @@ -376,4 +416,3 @@ foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST}) SET(Kokkos_ENABLE_${opt} ${KOKKOS_ENABLE_${OPT}} CACHE BOOL "CamelCase Compatibility setting for KOKKOS_ENABLE_${OPT}") ENDIF() endforeach() - diff --git a/lib/kokkos/cmake/kokkos_settings.cmake b/lib/kokkos/cmake/kokkos_settings.cmake index 387ced6d52..2c622d0de9 100644 --- a/lib/kokkos/cmake/kokkos_settings.cmake +++ b/lib/kokkos/cmake/kokkos_settings.cmake @@ -198,6 +198,8 @@ if(KOKKOS_CMAKE_VERBOSE) message(STATUS " Host Parallel: Pthread") elseif(KOKKOS_ENABLE_QTHREADS) message(STATUS " Host Parallel: Qthreads") + elseif(KOKKOS_ENABLE_HPX) + message(STATUS " Host Parallel: HPX") else() message(STATUS " Host Parallel: None") endif() @@ -244,6 +246,10 @@ if(KOKKOS_CMAKE_VERBOSE) message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}") endif() + if(KOKKOS_HPX_DIR) + message(STATUS " KOKKOS_HPX_DIR: ${KOKKOS_HPX_DIR}") + endif() + message(STATUS "") message(STATUS "Final kokkos settings variable:") message(STATUS " ${KOKKOS_SETTINGS}") diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake index f8eebc29f8..1f467f0662 100644 --- a/lib/kokkos/cmake/tribits.cmake +++ b/lib/kokkos/cmake/tribits.cmake @@ -9,6 +9,10 @@ IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP) SET(${PROJECT_NAME}_ENABLE_OpenMP OFF) ENDIF() +IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_HPX) + SET(${PROJECT_NAME}_ENABLE_HPX OFF) +ENDIF() + IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_DEBUG) SET(${PROJECT_NAME}_ENABLE_DEBUG OFF) ENDIF() @@ -309,6 +313,10 @@ ENDFUNCTION() FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE) ENDFUNCTION() +FUNCTION(TRIBITS_ADD_ADVANCED_TEST) + # TODO Write this +ENDFUNCTION() + FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME) SET(options STANDARD_PASS_OUTPUT WILL_FAIL) diff --git a/lib/kokkos/containers/cmake/Dependencies.cmake b/lib/kokkos/containers/cmake/Dependencies.cmake index 1d71d8af34..5e29157369 100644 --- a/lib/kokkos/containers/cmake/Dependencies.cmake +++ b/lib/kokkos/containers/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( LIB_REQUIRED_PACKAGES KokkosCore - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC HPX TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt index 1203a8bd81..3c6584bc34 100644 --- a/lib/kokkos/containers/performance_tests/CMakeLists.txt +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -24,6 +24,10 @@ IF(Kokkos_ENABLE_OpenMP) LIST( APPEND SOURCES TestOpenMP.cpp) ENDIF() +IF(Kokkos_ENABLE_HPX) + LIST( APPEND SOURCES TestHPX.cpp) +ENDIF() + # Per #374, we always want to build this test, but we only want to run # it as a PERFORMANCE test. That's why we separate building the test # from running the test. diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile index ebed75ccd6..f309a220d0 100644 --- a/lib/kokkos/containers/performance_tests/Makefile +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -49,6 +49,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) TEST_TARGETS += test-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + OBJ_HPX = TestHPX.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_HPX + TEST_TARGETS += test-hpx +endif + KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda @@ -61,6 +67,9 @@ KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP +KokkosContainers_PerformanceTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_HPX + test-cuda: KokkosContainers_PerformanceTest_Cuda ./KokkosContainers_PerformanceTest_Cuda @@ -73,6 +82,9 @@ test-threads: KokkosContainers_PerformanceTest_Threads test-openmp: KokkosContainers_PerformanceTest_OpenMP ./KokkosContainers_PerformanceTest_OpenMP +test-hpx: KokkosContainers_PerformanceTest_HPX + ./KokkosContainers_PerformanceTest_HPX + build_all: $(TARGETS) test: $(TEST_TARGETS) diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp index 0d2fae32a3..db6274e057 100644 --- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -197,7 +197,7 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::RangePolicy policy(0,par_size); Kokkos::parallel_for( policy , FunctorType(testview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_view = timer.seconds(); std::cout << " View time (init only): " << elapsed_time_view << std::endl; @@ -205,7 +205,7 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::View sumview("sumview",par_size); Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testview, sumview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_compview = timer.seconds(); std::cout << " View sum computation time: " << elapsed_time_view << std::endl; @@ -215,7 +215,7 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::parallel_for( policy , FunctorStrideType(teststrideview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_strideview = timer.seconds(); std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl; } @@ -226,7 +226,7 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::RangePolicy policy(0,par_size); Kokkos::parallel_for( policy , FunctorType(testview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_view_rank7 = timer.seconds(); std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl; } @@ -237,14 +237,14 @@ void test_dynrankview_op_perf( const int par_size ) timer.reset(); Kokkos::RangePolicy policy(0,par_size); Kokkos::parallel_for( policy , FunctorType(testdrview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_drview = timer.seconds(); std::cout << " DynRankView time (init only): " << elapsed_time_drview << std::endl; timer.reset(); Kokkos::DynRankView sumview("sumview",par_size); Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testdrview, sumview) ); - DeviceType::fence(); + DeviceType().fence(); elapsed_time_compdrview = timer.seconds(); std::cout << " DynRankView sum computation time: " << elapsed_time_compdrview << std::endl; diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp index dcaca776be..98997b3239 100644 --- a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp +++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -192,7 +192,7 @@ void test_global_to_local_ids(unsigned num_ids) { generate_ids gen(local_2_global); } - Device::fence(); + Device().fence(); // generate elasped_time = timer.seconds(); std::cout << elasped_time << ", "; @@ -201,7 +201,7 @@ void test_global_to_local_ids(unsigned num_ids) { fill_map fill(global_2_local, local_2_global); } - Device::fence(); + Device().fence(); // fill elasped_time = timer.seconds(); @@ -214,7 +214,7 @@ void test_global_to_local_ids(unsigned num_ids) { find_test find(global_2_local, local_2_global,num_errors); } - Device::fence(); + Device().fence(); // find elasped_time = timer.seconds(); diff --git a/lib/kokkos/containers/performance_tests/TestHPX.cpp b/lib/kokkos/containers/performance_tests/TestHPX.cpp new file mode 100644 index 0000000000..0f43377cee --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestHPX.cpp @@ -0,0 +1,130 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#if defined( KOKKOS_ENABLE_HPX ) + +#include + +#include + +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include + + +namespace Performance { + +class hpx : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + Kokkos::initialize(); + Kokkos::print_configuration( std::cout ); + } + + static void TearDownTestCase() + { + Kokkos::finalize(); + } +}; + +TEST_F( hpx, dynrankview_perf ) +{ + std::cout << "HPX" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf( 8192 ); +} + +TEST_F( hpx, global_2_local) +{ + std::cout << "HPX" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids(i); +} + +TEST_F( hpx, unordered_map_performance_near) +{ + unsigned num_hpx = 4; + std::ostringstream base_file_name; + base_file_name << "hpx-" << num_hpx << "-near"; + Perf::run_performance_tests(base_file_name.str()); +} + +TEST_F( hpx, unordered_map_performance_far) +{ + unsigned num_hpx = 4; + std::ostringstream base_file_name; + base_file_name << "hpx-" << num_hpx << "-far"; + Perf::run_performance_tests(base_file_name.str()); +} + +TEST_F( hpx, scatter_view) +{ + std::cout << "ScatterView data-duplicated test:\n"; + Perf::test_scatter_view(10, 1000 * 1000); +//std::cout << "ScatterView atomics test:\n"; +//Perf::test_scatter_view(10, 1000 * 1000); +} + +} // namespace test +#else +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTHPX_PREVENT_EMPTY_LINK_ERROR() {} +#endif + diff --git a/lib/kokkos/containers/performance_tests/TestScatterView.hpp b/lib/kokkos/containers/performance_tests/TestScatterView.hpp index 03129d2b09..bd9121bb82 100644 --- a/lib/kokkos/containers/performance_tests/TestScatterView.hpp +++ b/lib/kokkos/containers/performance_tests/TestScatterView.hpp @@ -83,6 +83,7 @@ void test_scatter_view(int m, int n) for (int k = 0; k < m; ++k) { Kokkos::parallel_for(policy, f2, "hand_coded_duplicate_scatter_view_test"); } + Kokkos::fence(); auto t = timer.seconds(); std::cout << "hand-coded test took " << t << " seconds\n"; } @@ -101,6 +102,7 @@ void test_scatter_view(int m, int n) for (int k = 0; k < m; ++k) { Kokkos::parallel_for(policy, f, "scatter_view_test"); } + Kokkos::fence(); auto t = timer.seconds(); std::cout << "test took " << t << " seconds\n"; } diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp index e8734b259d..8d09281ed3 100644 --- a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp +++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -108,7 +108,7 @@ struct UnorderedMapTest std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush; histogram.calculate(); - Device::fence(); + Device().fence(); } void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out) @@ -236,7 +236,7 @@ void run_performance_tests(std::string const & base_file_name) uint32_t inserts = static_cast(test_ratios[j]*(capacity)); std::cout << capacity << std::flush; UnorderedMapTest test(capacity, inserts*collisions[i], collisions[i]); - Device::fence(); + Device().fence(); test.print(metrics_out, length_out, distance_out, block_distance_out); } std::cout << "\b\b " << std::endl; diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp index bfe8080f3b..4d78430fc6 100644 --- a/lib/kokkos/containers/src/Kokkos_Bitset.hpp +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -107,22 +107,20 @@ public: } } - /// assignment - Bitset & operator = (Bitset const & rhs) - { - this->m_size = rhs.m_size; - this->m_last_block_mask = rhs.m_last_block_mask; - this->m_blocks = rhs.m_blocks; + KOKKOS_INLINE_FUNCTION + Bitset (const Bitset&) = default; - return *this; - } + KOKKOS_INLINE_FUNCTION + Bitset& operator= (const Bitset&) = default; - /// copy constructor - Bitset( Bitset const & rhs) - : m_size( rhs.m_size ) - , m_last_block_mask( rhs.m_last_block_mask ) - , m_blocks( rhs.m_blocks ) - {} + KOKKOS_INLINE_FUNCTION + Bitset (Bitset&&) = default; + + KOKKOS_INLINE_FUNCTION + Bitset& operator= (Bitset&&) = default; + + KOKKOS_INLINE_FUNCTION + ~Bitset () = default; /// number of bits in the set /// can be call from the host or the device diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp index f6631a4149..d9b14d67a2 100644 --- a/lib/kokkos/containers/src/Kokkos_DualView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -484,8 +484,8 @@ public: } } if(std::is_same::value) { - t_dev::execution_space::fence(); - t_host::execution_space::fence(); + typename t_dev::execution_space().fence(); + typename t_host::execution_space().fence(); } } diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index 3f284e6a8d..d1e6704a57 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -75,7 +75,7 @@ struct DynRankDimTraits { , const size_t N4 , const size_t N5 , const size_t N6 - , const size_t N7 ) + , const size_t /* N7 */) { return ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified && N0 == unspecified) ? 0 @@ -106,7 +106,7 @@ struct DynRankDimTraits { // Extra overload to match that for specialize types v2 template KOKKOS_INLINE_FUNCTION - static size_t computeRank( const Kokkos::Impl::ViewCtorProp& prop, const Layout& layout ) + static size_t computeRank( const Kokkos::Impl::ViewCtorProp& /* prop */, const Layout& layout ) { return computeRank(layout); } @@ -155,7 +155,7 @@ struct DynRankDimTraits { // Extra overload to match that for specialize types template KOKKOS_INLINE_FUNCTION - static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp& prop, const typename Traits::array_layout& layout ) + static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp& /* prop */, const typename Traits::array_layout& layout ) { return createLayout( layout ); } @@ -655,7 +655,7 @@ public: const size_t dim_scalar = m_map.dimension_scalar(); const size_t bytes = this->span() / dim_scalar; - typedef Kokkos::View > tmp_view_type; + typedef Kokkos::View > tmp_view_type; tmp_view_type rankone_view(this->data(), bytes, dim_scalar); return rankone_view(i0); } @@ -1060,7 +1060,7 @@ public: } // Copy the input allocation properties with possibly defaulted properties - alloc_prop prop( arg_prop ); + alloc_prop prop_copy( arg_prop ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) @@ -1070,18 +1070,18 @@ public: // Fence using the trait's executon space (which will be Kokkos::Cuda) // to avoid incomplete type errors from usng Kokkos::Cuda directly. if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ Kokkos::Impl::SharedAllocationRecord<> * - record = m_map.allocate_shared( prop , Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ); + record = m_map.allocate_shared( prop_copy, Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ @@ -1609,7 +1609,7 @@ struct DynRankViewFill { closure.execute(); - execution_space::fence(); + execution_space().fence(); } }; @@ -1650,6 +1650,7 @@ struct DynRankViewRemap { typedef Kokkos::RangePolicy< ExecSpace > Policy ; const Kokkos::Impl::ParallelFor< DynRankViewRemap , Policy > closure( *this , Policy( 0 , n0 ) ); closure.execute(); + // Kokkos::fence(); // ?? } KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp index ab782a82ad..37d56e7cfb 100644 --- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -288,8 +288,8 @@ public: >::type resize_serial( IntType const & n ) { - typedef typename traits::value_type value_type ; - typedef value_type * value_pointer_type ; + typedef typename traits::value_type local_value_type ; + typedef local_value_type * value_pointer_type ; const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; // New total number of chunks needed for resize @@ -304,8 +304,8 @@ public: if ( *pc < NC ) { while ( *pc < NC ) { m_chunks[*pc] = reinterpret_cast - ( - typename traits::memory_space().allocate( sizeof(value_type) << m_chunk_shift ) + ( + typename traits::memory_space().allocate( sizeof(local_value_type) << m_chunk_shift ) ); ++*pc ; } @@ -314,7 +314,7 @@ public: while ( NC + 1 <= *pc ) { --*pc ; typename traits::memory_space().deallocate( m_chunks[*pc] - , sizeof(value_type) << m_chunk_shift ); + , sizeof(local_value_type) << m_chunk_shift ); m_chunks[*pc] = 0 ; } } @@ -376,8 +376,8 @@ public: closure.execute(); - traits::execution_space::fence(); - //Impl::ChunkArraySpace< typename traits::memory_space >::memory_space::execution_space::fence(); + typename traits::execution_space().fence(); + //Impl::ChunkArraySpace< typename traits::memory_space >::memory_space::execution_space().fence(); } void construct_shared_allocation() diff --git a/lib/kokkos/containers/src/Kokkos_OffsetView.hpp b/lib/kokkos/containers/src/Kokkos_OffsetView.hpp index b614764ee7..4ce1f4d84f 100644 --- a/lib/kokkos/containers/src/Kokkos_OffsetView.hpp +++ b/lib/kokkos/containers/src/Kokkos_OffsetView.hpp @@ -202,8 +202,8 @@ namespace Kokkos { template ::value, iType>::type = 0> KOKKOS_INLINE_FUNCTION - int64_t begin(const iType dimension) const { - return dimension < Rank ? m_begins[dimension] : 0; + int64_t begin(const iType local_dimension) const { + return local_dimension < Rank ? m_begins[local_dimension] : 0; } KOKKOS_INLINE_FUNCTION @@ -211,7 +211,9 @@ namespace Kokkos { template ::value, iType>::type = 0> KOKKOS_INLINE_FUNCTION - int64_t end(const iType dimension) const {return begin(dimension) + m_map.extent(dimension);} + int64_t end(const iType local_dimension) const { + return begin(local_dimension) + m_map.extent(local_dimension); + } private: @@ -1068,7 +1070,7 @@ namespace Kokkos { } // Copy the input allocation properties with possibly defaulted properties - alloc_prop prop( arg_prop ); + alloc_prop prop_copy( arg_prop ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) @@ -1078,18 +1080,18 @@ namespace Kokkos { // Fence using the trait's executon space (which will be Kokkos::Cuda) // to avoid incomplete type errors from usng Kokkos::Cuda directly. if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ Kokkos::Impl::SharedAllocationRecord<> * - record = m_map.allocate_shared( prop , arg_layout ); + record = m_map.allocate_shared( prop_copy , arg_layout ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ diff --git a/lib/kokkos/containers/src/Kokkos_ScatterView.hpp b/lib/kokkos/containers/src/Kokkos_ScatterView.hpp index 8e56857887..a8c05e3f36 100644 --- a/lib/kokkos/containers/src/Kokkos_ScatterView.hpp +++ b/lib/kokkos/containers/src/Kokkos_ScatterView.hpp @@ -57,9 +57,16 @@ namespace Kokkos { namespace Experimental { -//TODO: replace this enum with the Kokkos::Sum, etc reducers for parallel_reduce +/* + * Reduction Type list + * - These corresponds to subset of the reducers in parallel_reduce + * - See Implementations of ScatterValue for details. + */ enum : int { ScatterSum, + ScatterProd, + ScatterMax, + ScatterMin, }; enum : int { @@ -114,6 +121,21 @@ struct DefaultContribution +struct DefaultDuplication { + enum : int { value = Kokkos::Experimental::ScatterDuplicated }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterAtomic }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; +}; +#endif + #ifdef KOKKOS_ENABLE_THREADS template <> struct DefaultDuplication { @@ -144,39 +166,277 @@ struct DefaultContribution is the object returned by the access operator() of ScatterAccess, + This class inherits from the Sum<> reducer and it wraps join(dest, src) with convenient operator+=, etc. + Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions + See ReduceDuplicates and ResetDuplicates ) */ template struct ScatterValue; template -struct ScatterValue { +struct ScatterValue : + Sum { public: - KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {} - KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : value( other.value ) {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Sum(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : + Sum(other.reference()) + {} KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { - value += rhs; + this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) { - value -= rhs; + this->join( this->reference(), -rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); } - private: - ValueType& value; }; +/* ScatterValue is the object returned by the access operator() + * of ScatterAccess, similar to that returned by an Atomic View, it wraps Kokkos::atomic_add with convenient + operator+=, etc. This version also has the update(rhs) and reset() functions. */ template -struct ScatterValue { +struct ScatterValue : + Sum { public: - KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : value( value_in ) {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Sum(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { - Kokkos::atomic_add(&value, rhs); + this->join(this->reference(), rhs); } KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) { - Kokkos::atomic_add(&value, -rhs); + this->join(this->reference(), -rhs); } - private: - ValueType& value; + + KOKKOS_INLINE_FUNCTION + void join(ValueType& dest, const ValueType& src) const { + Kokkos::atomic_add(&dest, src); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile ValueType& dest, const volatile ValueType& src) const { + Kokkos::atomic_add(&dest, src); + } + + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } +}; + +/* ScatterValue is the object returned by the access operator() of ScatterAccess, + This class inherits from the Prod<> reducer and it wraps join(dest, src) with convenient operator*=, etc. + Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions + See ReduceDuplicates and ResetDuplicates ) */ +template +struct ScatterValue : + Prod { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Prod(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : + Prod(other.reference()) + {} + KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) { + this->join( this->reference(), static_cast(1)/rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } +}; + +/* ScatterValue is the object returned by the access operator() + * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_prod with convenient + operator*=, etc. atomic_prod uses the atomic_compare_exchange. This version also has the update(rhs) and reset() functions. */ +template +struct ScatterValue : + Prod { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Prod(value_in) + {} + + KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { + this->join(this->reference(), rhs); + } + KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) { + this->join(this->reference(), static_cast(1)/rhs); + } + + KOKKOS_FORCEINLINE_FUNCTION + void atomic_prod(ValueType & dest, const ValueType& src) const { + + bool success = false; + while(!success) { + ValueType dest_old = dest; + ValueType dest_new = dest_old * src; + dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); + success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); + } + } + + KOKKOS_INLINE_FUNCTION + void join(ValueType& dest, const ValueType& src) const { + atomic_prod(dest, src); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile ValueType& dest, const volatile ValueType& src) const { + atomic_prod(dest, src); + } + + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } + +}; + +/* ScatterValue is the object returned by the access operator() of ScatterAccess, + This class inherits from the Min<> reducer and it wraps join(dest, src) with convenient update(rhs). + Note the addition of update(ValueType const& rhs) and reset() are so that all reducers can have a common update function + See ReduceDuplicates and ResetDuplicates ) */ +template +struct ScatterValue : + Min { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Min(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : + Min(other.reference()) + {} + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } +}; + +/* ScatterValue is the object returned by the access operator() + * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_min with the update(rhs) + function. atomic_min uses the atomic_compare_exchange. This version also has the reset() function */ +template +struct ScatterValue : + Min { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Min(value_in) + {} + + KOKKOS_FORCEINLINE_FUNCTION + void atomic_min(ValueType & dest, const ValueType& src) const { + + bool success = false; + while(!success) { + ValueType dest_old = dest; + ValueType dest_new = ( dest_old > src ) ? src : dest_old; + dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); + success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); + } + } + + KOKKOS_INLINE_FUNCTION + void join(ValueType& dest, const ValueType& src) const { + atomic_min(dest, src); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile ValueType& dest, const volatile ValueType& src) const { + atomic_min(dest, src); + } + + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } + +}; + +/* ScatterValue is the object returned by the access operataor() of ScatterAccess, + This class inherits from the Max<> reducer and it wraps join(dest, src) with convenient update(rhs). + Note the addition of update(ValueType const& rhs) and reset() are so that all reducers can have a common update function + See ReduceDuplicates and ResetDuplicates ) */ +template +struct ScatterValue : + Max { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Max(value_in) + {} + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : + Max(other.reference()) + {} + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } +}; + +/* ScatterValue is the object returned by the access operator() + * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_max with the update(rhs) + function. atomic_max uses the atomic_compare_exchange. This version also has the reset() function */ +template +struct ScatterValue : + Max { + public: + KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : + Max(value_in) + {} + + KOKKOS_FORCEINLINE_FUNCTION + void atomic_max(ValueType & dest, const ValueType& src) const { + + bool success = false; + while(!success) { + ValueType dest_old = dest; + ValueType dest_new = ( dest_old < src ) ? src : dest_old; + dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); + success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); + } + } + + KOKKOS_INLINE_FUNCTION + void join(ValueType& dest, const ValueType& src) const { + atomic_max(dest, src); + } + + KOKKOS_INLINE_FUNCTION + void join(volatile ValueType& dest, const volatile ValueType& src) const { + atomic_max(dest, src); + } + + KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { + this->join( this->reference(), rhs ); + } + KOKKOS_FORCEINLINE_FUNCTION void reset() { + this->init( this->reference() ); + } + }; /* DuplicatedDataType, given a View DataType, will create a new DataType @@ -226,6 +486,18 @@ struct DuplicatedDataType { typedef typename DuplicatedDataType::value_type* value_type; }; +/* Insert integer argument pack into array */ + +template +void args_to_array(size_t* array, int pos, T dim0) { + array[pos] = dim0; +} +template +void args_to_array(size_t* array, int pos, T dim0, Dims ... dims) { + array[pos] = dim0; + args_to_array(array,pos+1,dims...); +} + /* Slice is just responsible for stuffing the correct number of Kokkos::ALL arguments on the correct side of the index in a call to subview() to get a subview where the index specified is the largest-stride one. */ @@ -304,21 +576,26 @@ struct ReduceDuplicatesBase { } }; -template -struct ReduceDuplicates : - public ReduceDuplicatesBase +/* ReduceDuplicates -- Perform reduction on destination array using strided source + * Use ScatterValue<> specific to operation to wrap destination array so that + * the reduction operation can be accessed via the update(rhs) function */ +template +struct ReduceDuplicates : + public ReduceDuplicatesBase { - typedef ReduceDuplicatesBase Base; + typedef ReduceDuplicatesBase Base; ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name): Base(src_in, dst_in, stride_in, start_in, n_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { for (size_t j = Base::start; j < Base::n; ++j) { - Base::dst[i] += Base::src[i + Base::stride * j]; + ScatterValue sv(Base::dst[i]); + sv.update( Base::src[i + Base::stride * j] ); } } }; + template struct ResetDuplicates; @@ -347,19 +624,24 @@ struct ResetDuplicatesBase { } }; -template -struct ResetDuplicates : - public ResetDuplicatesBase +/* ResetDuplicates -- Perform reset on destination array + * Use ScatterValue<> specific to operation to wrap destination array so that + * the reset operation can be accessed via the reset() function */ +template +struct ResetDuplicates : + public ResetDuplicatesBase { - typedef ResetDuplicatesBase Base; + typedef ResetDuplicatesBase Base; ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name): Base(data_in, size_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { - Base::data[i] = Kokkos::reduction_identity::sum(); + ScatterValue sv(Base::data[i]); + sv.reset(); } }; + }}} // Kokkos::Impl::Experimental namespace Kokkos { @@ -519,12 +801,22 @@ public: typedef Kokkos::Impl::Experimental::ScatterValue< original_value_type, Op, override_contribution> value_type; + KOKKOS_INLINE_FUNCTION + ScatterAccess() : + view(view_type()) { + } + KOKKOS_INLINE_FUNCTION ScatterAccess(view_type const& view_in) : view(view_in) { } + KOKKOS_INLINE_FUNCTION + ~ScatterAccess() + { + } + template KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args ... args) const { @@ -608,7 +900,7 @@ public: } template - inline + KOKKOS_FORCEINLINE_FUNCTION ScatterAccess access() const { return ScatterAccess{*this}; @@ -729,14 +1021,14 @@ public: : unique_token() { size_t arg_N[8] = { - original_view.extent(0), - original_view.extent(1), - original_view.extent(2), - original_view.extent(3), - original_view.extent(4), - original_view.extent(5), - original_view.extent(6), - 0 + original_view.rank>0?original_view.extent(0):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>1?original_view.extent(1):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>2?original_view.extent(2):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>3?original_view.extent(3):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>4?original_view.extent(4):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>5?original_view.extent(5):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>6?original_view.extent(6):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + KOKKOS_IMPL_CTOR_DEFAULT_ARG }; arg_N[internal_view_type::rank - 1] = unique_token.size(); internal_view = internal_view_type( @@ -748,14 +1040,28 @@ public: } template - ScatterView(std::string const& name, Dims ... dims) - : internal_view(Kokkos::ViewAllocateWithoutInitializing(name), dims ..., unique_token.size()) - { + ScatterView(std::string const& name, Dims ... dims) { + original_view_type original_view; + size_t arg_N[8] = { + original_view.rank>0?original_view.static_extent(0):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>1?original_view.static_extent(1):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>2?original_view.static_extent(2):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>3?original_view.static_extent(3):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>4?original_view.static_extent(4):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>5?original_view.static_extent(5):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + original_view.rank>6?original_view.static_extent(6):KOKKOS_IMPL_CTOR_DEFAULT_ARG, + KOKKOS_IMPL_CTOR_DEFAULT_ARG + }; + Kokkos::Impl::Experimental::args_to_array(arg_N,0,dims ...); + arg_N[internal_view_type::rank - 1] = unique_token.size(); + internal_view = internal_view_type(Kokkos::ViewAllocateWithoutInitializing(name), + arg_N[0], arg_N[1], arg_N[2], arg_N[3], + arg_N[4], arg_N[5], arg_N[6], arg_N[7]); reset(); } template - inline + KOKKOS_FORCEINLINE_FUNCTION ScatterAccess access() const { return ScatterAccess{*this}; @@ -770,9 +1076,13 @@ public: } template - void contribute_into(View const& dest) const + void contribute_into(View const& dest) const { - typedef View dest_type; + typedef View dest_type; + static_assert(std::is_same< + typename dest_type::value_type, + typename original_view_type::non_const_value_type>::value, + "ScatterView deep_copy destination has wrong value_type"); static_assert(std::is_same< typename dest_type::array_layout, Kokkos::LayoutLeft>::value, @@ -891,12 +1201,14 @@ public: typedef Kokkos::Impl::Experimental::ScatterValue< original_value_type, Op, override_contribution> value_type; - inline ScatterAccess(view_type const& view_in) + KOKKOS_FORCEINLINE_FUNCTION + ScatterAccess(view_type const& view_in) : view(view_in) , thread_id(view_in.unique_token.acquire()) { } - inline ~ScatterAccess() { + KOKKOS_FORCEINLINE_FUNCTION + ~ScatterAccess() { if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id); } @@ -926,8 +1238,9 @@ private: public: // do need to allow moves though, for the common // auto b = a.access(); - // that assignments turns into a move constructor call - inline ScatterAccess(ScatterAccess&& other) + // that assignments turns into a move constructor call + KOKKOS_FORCEINLINE_FUNCTION + ScatterAccess(ScatterAccess&& other) : view(other.view) , thread_id(other.thread_id) { diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp index 64601e6b59..aed723288f 100644 --- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -437,9 +437,9 @@ public: { bool result = !erasable(); if (is_insertable_map && result) { - execution_space::fence(); + execution_space().fence(); set_flag(erasable_idx); - execution_space::fence(); + execution_space().fence(); } return result; } @@ -448,10 +448,10 @@ public: { bool result = erasable(); if (is_insertable_map && result) { - execution_space::fence(); + execution_space().fence(); Impl::UnorderedMapErase f(*this); f.apply(); - execution_space::fence(); + execution_space().fence(); reset_flag(erasable_idx); } return result; diff --git a/lib/kokkos/containers/src/Kokkos_Vector.hpp b/lib/kokkos/containers/src/Kokkos_Vector.hpp index 76c515941e..9b151d9505 100644 --- a/lib/kokkos/containers/src/Kokkos_Vector.hpp +++ b/lib/kokkos/containers/src/Kokkos_Vector.hpp @@ -121,12 +121,12 @@ public: if( DV::template need_sync() ) { set_functor_host f(DV::h_view,val); parallel_for(n,f); - DV::t_host::execution_space::fence(); + typename DV::t_host::execution_space().fence(); DV::template modify(); } else { set_functor f(DV::d_view,val); parallel_for(n,f); - DV::t_dev::execution_space::fence(); + typename DV::t_dev::execution_space().fence(); DV::template modify(); } } diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt index 0f94afec8c..8564bd9ddd 100644 --- a/lib/kokkos/containers/unit_tests/CMakeLists.txt +++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt @@ -86,6 +86,31 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( ) ENDIF() +IF(Kokkos_ENABLE_HPX) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_HPX + SOURCES + UnitTestMain.cpp + hpx/TestHPX_BitSet.cpp + hpx/TestHPX_DualView.cpp + hpx/TestHPX_DynamicView.cpp + hpx/TestHPX_DynRankViewAPI_generic.cpp + hpx/TestHPX_DynRankViewAPI_rank12345.cpp + hpx/TestHPX_DynRankViewAPI_rank67.cpp + hpx/TestHPX_ErrorReporter.cpp + hpx/TestHPX_OffsetView.cpp + hpx/TestHPX_ScatterView.cpp + hpx/TestHPX_StaticCrsGraph.cpp + hpx/TestHPX_UnorderedMap.cpp + hpx/TestHPX_Vector.cpp + hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) +ENDIF() + IF(Kokkos_ENABLE_Cuda) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Cuda diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile index c0e5d2820c..a7e0233f8a 100644 --- a/lib/kokkos/containers/unit_tests/Makefile +++ b/lib/kokkos/containers/unit_tests/Makefile @@ -4,6 +4,7 @@ GTEST_PATH = ../../TPL/gtest vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/openmp +vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/hpx vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/serial vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/threads vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/rocm @@ -106,6 +107,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) TEST_TARGETS += test-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + OBJ_HPX = UnitTestMain.o gtest-all.o + OBJ_HPX += TestHPX_BitSet.o + OBJ_HPX += TestHPX_DualView.o + OBJ_HPX += TestHPX_DynamicView.o + OBJ_HPX += TestHPX_DynRankViewAPI_generic.o + OBJ_HPX += TestHPX_DynRankViewAPI_rank12345.o + OBJ_HPX += TestHPX_DynRankViewAPI_rank67.o + OBJ_HPX += TestHPX_ErrorReporter.o + OBJ_HPX += TestHPX_OffsetView.o + OBJ_HPX += TestHPX_ScatterView.o + OBJ_HPX += TestHPX_StaticCrsGraph.o + OBJ_HPX += TestHPX_UnorderedMap.o + OBJ_HPX += TestHPX_Vector.o + OBJ_HPX += TestHPX_ViewCtorPropEmbeddedDim.o + TARGETS += KokkosContainers_UnitTest_HPX + TEST_TARGETS += test-hpx +endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL = UnitTestMain.o gtest-all.o OBJ_SERIAL += TestSerial_BitSet.o @@ -137,6 +157,9 @@ KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_OpenMP +KokkosContainers_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_HPX + KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Serial @@ -152,6 +175,9 @@ test-threads: KokkosContainers_UnitTest_Threads test-openmp: KokkosContainers_UnitTest_OpenMP ./KokkosContainers_UnitTest_OpenMP +test-hpx: KokkosContainers_UnitTest_HPX + ./KokkosContainers_UnitTest_HPX + test-serial: KokkosContainers_UnitTest_Serial ./KokkosContainers_UnitTest_Serial diff --git a/lib/kokkos/containers/unit_tests/TestBitset.hpp b/lib/kokkos/containers/unit_tests/TestBitset.hpp index 6200124644..371c0288b1 100644 --- a/lib/kokkos/containers/unit_tests/TestBitset.hpp +++ b/lib/kokkos/containers/unit_tests/TestBitset.hpp @@ -66,7 +66,7 @@ struct TestBitset unsigned testit(unsigned collisions) { - execution_space::fence(); + execution_space().fence(); unsigned count = 0; Kokkos::parallel_reduce( m_bitset.size()*collisions, *this, count); @@ -114,7 +114,7 @@ struct TestBitsetTest unsigned testit() { - execution_space::fence(); + execution_space().fence(); unsigned count = 0; Kokkos::parallel_reduce( m_bitset.size(), *this, count); @@ -151,7 +151,7 @@ struct TestBitsetAny unsigned testit() { - execution_space::fence(); + execution_space().fence(); unsigned count = 0; Kokkos::parallel_reduce( m_bitset.size(), *this, count); diff --git a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp index 6684a55452..13e56c9f8d 100644 --- a/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp +++ b/lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp @@ -1276,6 +1276,7 @@ public: Kokkos::deep_copy( dx , hx ); Kokkos::deep_copy( dy , dx ); Kokkos::deep_copy( hy , dy ); + Kokkos::fence(); for ( size_t ip = 0 ; ip < N0 ; ++ip ) { for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { @@ -1286,6 +1287,7 @@ public: Kokkos::deep_copy( dx , T(0) ); Kokkos::deep_copy( hx , dx ); + Kokkos::fence(); for ( size_t ip = 0 ; ip < N0 ; ++ip ) { for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { diff --git a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp index ecb7542232..7e48089b43 100644 --- a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp +++ b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp @@ -162,6 +162,7 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase void execute(int reporter_capacity, int test_size) { Kokkos::parallel_for(Kokkos::RangePolicy(0,test_size), *this); + Kokkos::fence(); driver_base::check_expectations(reporter_capacity, test_size); } @@ -194,6 +195,7 @@ struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase driver_base::m_errorReporter.add_report(work_idx, report); } }); + Kokkos::fence(); driver_base::check_expectations(reporter_capacity, test_size); } diff --git a/lib/kokkos/containers/unit_tests/TestScatterView.hpp b/lib/kokkos/containers/unit_tests/TestScatterView.hpp index d402a91b9f..a9d97b32f3 100644 --- a/lib/kokkos/containers/unit_tests/TestScatterView.hpp +++ b/lib/kokkos/containers/unit_tests/TestScatterView.hpp @@ -48,79 +48,387 @@ namespace Test { +template +struct test_scatter_view_impl_cls; + template -void test_scatter_view_config(int n) +struct test_scatter_view_impl_cls { - Kokkos::View original_view("original_view", n); - { - auto scatter_view = Kokkos::Experimental::create_scatter_view - < Kokkos::Experimental::ScatterSum - , duplication - , contribution - > (original_view); -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) - auto policy = Kokkos::RangePolicy(0, n); - auto f = KOKKOS_LAMBDA(int i) { +public: + + typedef Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterSum + , duplication + , contribution + > scatter_view_type; + + typedef Kokkos::View orig_view_type; + + + scatter_view_type scatter_view; + int scatterSize; + + test_scatter_view_impl_cls(const scatter_view_type& view){ + scatter_view = view; + scatterSize = 0; + } + + void initialize(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + host_view(i, 0) = 0.0; + host_view(i, 1) = 0.0; + host_view(i, 2) = 0.0; + } + Kokkos::fence(); + Kokkos::deep_copy(orig, host_view); + } + + void run_parallel(int n) { + scatterSize = n; + auto policy = Kokkos::RangePolicy(0, n); + Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum"); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { auto scatter_access = scatter_view.access(); auto scatter_access_atomic = scatter_view.template access(); for (int j = 0; j < 10; ++j) { - auto k = (i + j) % n; + auto k = (i + j) % scatterSize; scatter_access(k, 0) += 4.2; scatter_access_atomic(k, 1) += 2.0; scatter_access(k, 2) += 1.0; } - }; - Kokkos::parallel_for(policy, f, "scatter_view_test"); -#endif - Kokkos::Experimental::contribute(original_view, scatter_view); - scatter_view.reset_except(original_view); -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) - Kokkos::parallel_for(policy, f, "scatter_view_test"); -#endif - Kokkos::Experimental::contribute(original_view, scatter_view); - } -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) - Kokkos::fence(); - auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), original_view); - Kokkos::fence(); - for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { - auto val0 = host_view(i, 0); - auto val1 = host_view(i, 1); - auto val2 = host_view(i, 2); - EXPECT_TRUE(std::fabs((val0 - 84.0) / 84.0) < 1e-15); - EXPECT_TRUE(std::fabs((val1 - 40.0) / 40.0) < 1e-15); - EXPECT_TRUE(std::fabs((val2 - 20.0) / 20.0) < 1e-15); - } -#endif - { - Kokkos::Experimental::ScatterView - < double*[3] - , Layout - , ExecSpace - , Kokkos::Experimental::ScatterSum - , duplication - , contribution - > - persistent_view("persistent", n); - auto result_view = persistent_view.subview(); - contribute(result_view, persistent_view); - } -} + } -template + void validateResults(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 84.0) / 84.0) < 1e-14); + EXPECT_TRUE(std::fabs((val1 - 40.0) / 40.0) < 1e-14); + EXPECT_TRUE(std::fabs((val2 - 20.0) / 20.0) < 1e-14); + } + } +}; + + +template +struct test_scatter_view_impl_cls +{ +public: + + typedef Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterProd + , duplication + , contribution + > scatter_view_type; + + typedef Kokkos::View orig_view_type; + + + scatter_view_type scatter_view; + int scatterSize; + + test_scatter_view_impl_cls(const scatter_view_type& view){ + scatter_view = view; + scatterSize = 0; + } + + void initialize(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + host_view(i, 0) = 1.0; + host_view(i, 1) = 1.0; + host_view(i, 2) = 1.0; + } + Kokkos::fence(); + Kokkos::deep_copy(orig, host_view); + } + + void run_parallel(int n) { + scatterSize = n; + auto policy = Kokkos::RangePolicy(0, n); + Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + auto scatter_access = scatter_view.access(); + auto scatter_access_atomic = scatter_view.template access(); + for (int j = 0; j < 4; ++j) { + auto k = (i + j) % scatterSize; + scatter_access(k, 0) *= 4.0; + scatter_access_atomic(k, 1) *= 2.0; + scatter_access(k, 2) *= 1.0; + } + } + + void validateResults(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 65536.0) / 65536.0) < 1e-14); + EXPECT_TRUE(std::fabs((val1 - 256.0) / 256.0) < 1e-14); + EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14); + } + } +}; + + +template +struct test_scatter_view_impl_cls +{ +public: + + typedef Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterMin + , duplication + , contribution + > scatter_view_type; + + typedef Kokkos::View orig_view_type; + + + scatter_view_type scatter_view; + int scatterSize; + + test_scatter_view_impl_cls(const scatter_view_type& view){ + scatter_view = view; + scatterSize = 0; + } + + void initialize(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + host_view(i, 0) = 999999.0; + host_view(i, 1) = 999999.0; + host_view(i, 2) = 999999.0; + } + Kokkos::fence(); + Kokkos::deep_copy(orig, host_view); + } + + void run_parallel(int n) { + scatterSize = n; + auto policy = Kokkos::RangePolicy(0, n); + Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + auto scatter_access = scatter_view.access(); + auto scatter_access_atomic = scatter_view.template access(); + for (int j = 0; j < 4; ++j) { + auto k = (i + j) % scatterSize; + scatter_access(k, 0).update((double)(j+1)*4); + scatter_access_atomic(k, 1).update((double)(j+1)*2.0); + scatter_access(k, 2).update((double)(j+1)*1.0); + } + } + + void validateResults(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 4.0) / 4.0) < 1e-14); + EXPECT_TRUE(std::fabs((val1 - 2.0) / 2.0) < 1e-14); + EXPECT_TRUE(std::fabs((val2 - 1.0) / 1.0) < 1e-14); + } + } +}; + + +template +struct test_scatter_view_impl_cls +{ +public: + + typedef Kokkos::Experimental::ScatterView + < double*[3] + , Layout + , ExecSpace + , Kokkos::Experimental::ScatterMax + , duplication + , contribution + > scatter_view_type; + + typedef Kokkos::View orig_view_type; + + + scatter_view_type scatter_view; + int scatterSize; + + test_scatter_view_impl_cls(const scatter_view_type& view){ + scatter_view = view; + scatterSize = 0; + } + + void initialize(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + host_view(i, 0) = 0.0; + host_view(i, 1) = 0.0; + host_view(i, 2) = 0.0; + } + Kokkos::fence(); + Kokkos::deep_copy(orig, host_view); + } + + void run_parallel(int n) { + scatterSize = n; + auto policy = Kokkos::RangePolicy(0, n); + Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); + } + + KOKKOS_INLINE_FUNCTION + void operator()(int i) const { + auto scatter_access = scatter_view.access(); + auto scatter_access_atomic = scatter_view.template access(); + for (int j = 0; j < 4; ++j) { + auto k = (i + j) % scatterSize; + scatter_access(k, 0).update((double)(j+1)*4); + scatter_access_atomic(k, 1).update((double)(j+1)*2.0); + scatter_access(k, 2).update((double)(j+1)*1.0); + } + } + + void validateResults(orig_view_type orig) { + auto host_view = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), orig); + Kokkos::fence(); + for (typename decltype(host_view)::size_type i = 0; i < host_view.extent(0); ++i) { + auto val0 = host_view(i, 0); + auto val1 = host_view(i, 1); + auto val2 = host_view(i, 2); + EXPECT_TRUE(std::fabs((val0 - 16.0) / 16.0) < 1e-14); + EXPECT_TRUE(std::fabs((val1 - 8.0) / 8.0) < 1e-14); + EXPECT_TRUE(std::fabs((val2 - 4.0) / 4.0) < 1e-14); + } + } +}; + + + +template +struct test_scatter_view_config +{ + public: + typedef typename test_scatter_view_impl_cls::scatter_view_type scatter_view_def; + typedef typename test_scatter_view_impl_cls::orig_view_type orig_view_def; + + test_scatter_view_config() { + } + + void run_test(int n) + { + //Test creation via create_scatter_view + { + orig_view_def original_view("original_view", n); + scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view + < op + , duplication + , contribution + > (original_view); + + test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); + scatter_view_test_impl.initialize(original_view); + scatter_view_test_impl.run_parallel(n); + + Kokkos::Experimental::contribute(original_view, scatter_view); + scatter_view.reset_except(original_view); + + scatter_view_test_impl.run_parallel(n); + + Kokkos::Experimental::contribute(original_view, scatter_view); + Kokkos::fence(); + + scatter_view_test_impl.validateResults(original_view); + + { + scatter_view_def persistent_view("persistent", n); + auto result_view = persistent_view.subview(); + contribute(result_view, persistent_view); + Kokkos::fence(); + } + } + //Test creation via constructor + { + orig_view_def original_view("original_view", n); + scatter_view_def scatter_view(original_view); + + test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); + scatter_view_test_impl.initialize(original_view); + scatter_view_test_impl.run_parallel(n); + + Kokkos::Experimental::contribute(original_view, scatter_view); + scatter_view.reset_except(original_view); + + scatter_view_test_impl.run_parallel(n); + + Kokkos::Experimental::contribute(original_view, scatter_view); + Kokkos::fence(); + + scatter_view_test_impl.validateResults(original_view); + + { + scatter_view_def persistent_view("persistent", n); + auto result_view = persistent_view.subview(); + contribute(result_view, persistent_view); + Kokkos::fence(); + } + } + } + +}; + + +template struct TestDuplicatedScatterView { TestDuplicatedScatterView(int n) { + // ScatterSum test test_scatter_view_config(n); + Kokkos::Experimental::ScatterNonAtomic, + ScatterType> test_sv_right_config; + test_sv_right_config.run_test(n); + test_scatter_view_config test_sv_left_config; + test_sv_left_config.run_test(n); } }; #ifdef KOKKOS_ENABLE_CUDA // disable duplicated instantiation with CUDA until // UniqueToken can support it -template <> -struct TestDuplicatedScatterView { +template +struct TestDuplicatedScatterView { TestDuplicatedScatterView(int) { } }; @@ -129,14 +437,14 @@ struct TestDuplicatedScatterView { #ifdef KOKKOS_ENABLE_ROCM // disable duplicated instantiation with ROCm until // UniqueToken can support it -template <> -struct TestDuplicatedScatterView { +template +struct TestDuplicatedScatterView { TestDuplicatedScatterView(int) { } }; #endif -template +template void test_scatter_view(int n) { // all of these configurations should compile okay, but only some of them are @@ -149,29 +457,47 @@ void test_scatter_view(int n) if (unique_token.size() == 1) { test_scatter_view_config(n); + Kokkos::Experimental::ScatterNonAtomic, + ScatterType> test_sv_config; + test_sv_config.run_test(n); } #ifdef KOKKOS_ENABLE_SERIAL if (!std::is_same::value) { #endif test_scatter_view_config(n); + Kokkos::Experimental::ScatterAtomic, + ScatterType> test_sv_config; + test_sv_config.run_test(n); #ifdef KOKKOS_ENABLE_SERIAL } #endif - - TestDuplicatedScatterView duptest(n); + // with hundreds of threads we were running out of memory. + // limit (n) so that duplication doesn't exceed 8GB + constexpr std::size_t maximum_allowed_total_bytes = 8ull * 1024ull * 1024ull * 1024ull; + std::size_t const maximum_allowed_copy_bytes = maximum_allowed_total_bytes / std::size_t(unique_token.size()); + constexpr std::size_t bytes_per_value = sizeof(double) * 3; + std::size_t const maximum_allowed_copy_values = maximum_allowed_copy_bytes / bytes_per_value; + n = std::min(n, int(maximum_allowed_copy_values)); + TestDuplicatedScatterView duptest(n); } TEST_F( TEST_CATEGORY, scatterview) { #ifndef KOKKOS_ENABLE_ROCM - test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + // tests were timing out in DEBUG mode, reduce the amount of work #ifdef KOKKOS_ENABLE_DEBUG - test_scatter_view(100000); + int big_n = 100 * 1000; #else - test_scatter_view(10000000); + int big_n = 10 * 1000 * 1000; #endif + test_scatter_view(big_n); + test_scatter_view(big_n); + test_scatter_view(big_n); + test_scatter_view(big_n); #endif } diff --git a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp index 426db1dbf0..2d34267df3 100644 --- a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp +++ b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp @@ -69,7 +69,7 @@ struct TestInsert void testit( bool rehash_on_fail = true ) { - execution_space::fence(); + execution_space().fence(); uint32_t failed_count = 0; do { @@ -82,7 +82,7 @@ struct TestInsert } } while (rehash_on_fail && failed_count > 0u); - execution_space::fence(); + execution_space().fence(); } @@ -122,9 +122,9 @@ struct TestInsert void testit() { - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for(m_num_erase, *this); - execution_space::fence(); + execution_space().fence(); } KOKKOS_INLINE_FUNCTION @@ -161,9 +161,9 @@ struct TestInsert void testit(value_type &errors) { - execution_space::execution_space::fence(); + execution_space().fence(); Kokkos::parallel_reduce(m_map.capacity(), *this, errors); - execution_space::execution_space::fence(); + execution_space().fence(); } KOKKOS_INLINE_FUNCTION @@ -247,7 +247,7 @@ void test_failed_insert( uint32_t num_nodes) map_type map(num_nodes); Impl::TestInsert test_insert(map, 2u*num_nodes, 1u); test_insert.testit(false /*don't rehash on fail*/); - Device::execution_space::fence(); + typename Device::execution_space().fence(); EXPECT_TRUE( map.failed_insert() ); } diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_BitSet.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_BitSet.cpp new file mode 100644 index 0000000000..cec24e00c7 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_BitSet.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_Category.hpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_Category.hpp new file mode 100644 index 0000000000..358b42d1aa --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_Category.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_HPX_HPP +#define KOKKOS_TEST_HPX_HPP + +#include + +namespace Test { + +class hpx : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; + +} // namespace Test + +#define TEST_CATEGORY hpx +#define TEST_EXECSPACE Kokkos::Experimental::HPX + +#endif diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DualView.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DualView.cpp new file mode 100644 index 0000000000..80af9dc33a --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DualView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp new file mode 100644 index 0000000000..95d49c8acf --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp new file mode 100644 index 0000000000..72e0bc6616 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp new file mode 100644 index 0000000000..5a104f0de2 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynamicView.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynamicView.cpp new file mode 100644 index 0000000000..718b322684 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_DynamicView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp new file mode 100644 index 0000000000..ea819ae343 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_OffsetView.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_OffsetView.cpp new file mode 100644 index 0000000000..4d3684923f --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_OffsetView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_ScatterView.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ScatterView.cpp new file mode 100644 index 0000000000..6a871cc121 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ScatterView.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp new file mode 100644 index 0000000000..fbb70a762b --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp new file mode 100644 index 0000000000..7e7aad309f --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_Vector.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_Vector.cpp new file mode 100644 index 0000000000..5fb3664197 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_Vector.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp new file mode 100644 index 0000000000..fb9c263c83 --- /dev/null +++ b/lib/kokkos/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/core/cmake/Dependencies.cmake b/lib/kokkos/core/cmake/Dependencies.cmake index 8d9872725e..9ad7660bdf 100644 --- a/lib/kokkos/core/cmake/Dependencies.cmake +++ b/lib/kokkos/core/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREADS DLlib + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREADS DLlib HPX TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt index d9c0f89413..d92462a357 100644 --- a/lib/kokkos/core/perf_test/CMakeLists.txt +++ b/lib/kokkos/core/perf_test/CMakeLists.txt @@ -47,6 +47,7 @@ TRIBITS_ADD_EXECUTABLE( PerformanceTest_TaskDAG SOURCES test_taskdag.cpp COMM serial mpi + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) TRIBITS_ADD_TEST( diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile index a0ce1e2c31..ca98ca6dde 100644 --- a/lib/kokkos/core/perf_test/Makefile +++ b/lib/kokkos/core/perf_test/Makefile @@ -30,6 +30,7 @@ TARGETS = # OBJ_PERF = PerfTestMain.o gtest-all.o +OBJ_PERF += PerfTest_ExecSpacePartitioning.o OBJ_PERF += PerfTestGramSchmidt.o OBJ_PERF += PerfTestHexGrad.o OBJ_PERF += PerfTest_CustomReduction.o diff --git a/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp b/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp index bb2fb5fce5..ff9bf5a91b 100644 --- a/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp +++ b/lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp @@ -44,6 +44,8 @@ #ifndef KOKKOS_BLAS_KERNELS_HPP #define KOKKOS_BLAS_KERNELS_HPP +#include + namespace Kokkos { template< class ConstVectorType , @@ -123,15 +125,10 @@ struct Dot { typedef typename Device::execution_space execution_space ; - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< Type::Rank > >::type ok_rank ; + static_assert( static_cast(Type::Rank) == static_cast(1), + "Dot static_assert Fail: Rank != 1"); -/* typedef typename - Impl::StaticAssertSame< execution_space , - typename Type::execution_space >::type ok_device ;*/ - typedef double value_type ; #if 1 @@ -164,13 +161,8 @@ struct DotSingle { typedef typename Device::execution_space execution_space ; - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< Type::Rank > >::type ok_rank ; - -/* typedef typename - Impl::StaticAssertSame< execution_space , - typename Type::execution_space >::type ok_device ;*/ + static_assert( static_cast(Type::Rank) == static_cast(1), + "DotSingle static_assert Fail: Rank != 1"); typedef double value_type ; @@ -204,25 +196,11 @@ struct Scale { typedef typename Device::execution_space execution_space ; -/* typedef typename - Impl::StaticAssertSame< execution_space , - typename ScalarType::execution_space >::type - ok_scalar_device ; + static_assert( static_cast(ScalarType::Rank) == static_cast(0), + "Scale static_assert Fail: ScalarType::Rank != 0"); - typedef typename - Impl::StaticAssertSame< execution_space , - typename VectorType::execution_space >::type - ok_vector_device ;*/ - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 0 > , - Impl::unsigned_< ScalarType::Rank > >::type - ok_scalar_rank ; - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< VectorType::Rank > >::type - ok_vector_rank ; + static_assert( static_cast(VectorType::Rank) == static_cast(1), + "Scale static_assert Fail: VectorType::Rank != 1"); #if 1 typename ScalarType::const_type alpha ; @@ -251,35 +229,14 @@ struct AXPBY { typedef typename Device::execution_space execution_space ; -/* typedef typename - Impl::StaticAssertSame< execution_space , - typename ScalarType::execution_space >::type - ok_scalar_device ; + static_assert( static_cast(ScalarType::Rank) == static_cast(0), + "AXPBY static_assert Fail: ScalarType::Rank != 0"); - typedef typename - Impl::StaticAssertSame< execution_space , - typename ConstVectorType::execution_space >::type - ok_const_vector_device ; + static_assert( static_cast(ConstVectorType::Rank) == static_cast(1), + "AXPBY static_assert Fail: ConstVectorType::Rank != 1"); - typedef typename - Impl::StaticAssertSame< execution_space , - typename VectorType::execution_space >::type - ok_vector_device ;*/ - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 0 > , - Impl::unsigned_< ScalarType::Rank > >::type - ok_scalar_rank ; - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< ConstVectorType::Rank > >::type - ok_const_vector_rank ; - - typedef typename - Impl::StaticAssertSame< Impl::unsigned_< 1 > , - Impl::unsigned_< VectorType::Rank > >::type - ok_vector_rank ; + static_assert( static_cast(VectorType::Rank) == static_cast(1), + "AXPBY static_assert Fail: VectorType::Rank != 1"); #if 1 typename ScalarType::const_type alpha , beta ; diff --git a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp index b169b02903..d812b16d85 100644 --- a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp +++ b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp @@ -183,7 +183,7 @@ struct ModifiedGramSchmidt } } - execution_space::fence(); + execution_space().fence(); return timer.seconds(); } diff --git a/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp b/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp index b228dd2e2e..03285a375c 100644 --- a/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp +++ b/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp @@ -253,12 +253,12 @@ struct HexGrad double dt_min = 0 ; Kokkos::parallel_for( count , Init( coord ) ); - execution_space::fence(); + execution_space().fence(); for ( int i = 0 ; i < iter ; ++i ) { Kokkos::Timer timer ; Kokkos::parallel_for( count , HexGrad( coord , grad ) ); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; diff --git a/lib/kokkos/core/perf_test/PerfTestMDRange.hpp b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp index 51affa6a2e..f433451f78 100644 --- a/lib/kokkos/core/perf_test/PerfTestMDRange.hpp +++ b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp @@ -125,15 +125,15 @@ struct MultiDimRangePerf3D Kokkos::MDRangePolicy, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} ); Kokkos::parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) ); - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) ); - execution_space::fence(); + execution_space().fence(); for (int i = 0; i < iter; ++i) { Kokkos::Timer timer; Kokkos::parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) ); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; @@ -189,15 +189,15 @@ struct MultiDimRangePerf3D Kokkos::MDRangePolicy, execution_space > policy({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}} ); Kokkos::parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) ); - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) ); - execution_space::fence(); + execution_space().fence(); for (int i = 0; i < iter; ++i) { Kokkos::Timer timer; Kokkos::parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) ); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; @@ -368,15 +368,15 @@ struct RangePolicyCollapseTwo double dt_min = 0; Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) ); - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) ); - execution_space::fence(); + execution_space().fence(); for (int i = 0; i < iter; ++i) { Kokkos::Timer timer; Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount)); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; @@ -513,15 +513,15 @@ struct RangePolicyCollapseAll double dt_min = 0; Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) ); - execution_space::fence(); + execution_space().fence(); Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) ); - execution_space::fence(); + execution_space().fence(); for (int i = 0; i < iter; ++i) { Kokkos::Timer timer; Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount)); - execution_space::fence(); + execution_space().fence(); const double dt = timer.seconds(); if ( 0 == i ) dt_min = dt ; else dt_min = dt < dt_min ? dt : dt_min ; diff --git a/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp b/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp new file mode 100644 index 0000000000..2fc889beed --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTest_ExecSpacePartitioning.cpp @@ -0,0 +1,564 @@ +#include +#include +#include + + +namespace Test { + +namespace { + template + struct SpaceInstance { + static ExecSpace create() { + return ExecSpace(); + } + static void destroy(ExecSpace&) { + } + static bool overlap() { + return false; + } + }; + + #ifndef KOKKOS_ENABLE_DEBUG + #ifdef KOKKOS_ENABLE_CUDA + template<> + struct SpaceInstance { + static Kokkos::Cuda create() { + cudaStream_t stream; + cudaStreamCreate(&stream); + return Kokkos::Cuda(stream); + } + static void destroy(Kokkos::Cuda& space) { + cudaStream_t stream = space.cuda_stream(); + cudaStreamDestroy(stream); + } + static bool overlap() { + bool value = true; + auto local_rank_str = std::getenv("CUDA_LAUNCH_BLOCKING"); + if(local_rank_str) { + value = (std::atoi(local_rank_str)==0); + } + return value; + } + }; + #endif + #endif +} + +struct FunctorRange { + int M,R; + Kokkos::View a; + FunctorRange(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const int i) const { + for(int r=0;r a; + FunctorMDRange(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const int i, const int) const { + for(int j=0;j a; + FunctorTeam(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const Kokkos::TeamPolicy::member_type& team) const { + int i = team.league_rank(); + for(int r=0;r a; + FunctorRangeReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const int i, double& tmp) const { + for(int r=0;r a; + FunctorMDRangeReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const int i, const int, double& tmp) const { + for(int j=0;j a; + FunctorTeamReduce(int M_, int R_, Kokkos::View a_):M(M_),R(R_),a(a_){} + KOKKOS_INLINE_FUNCTION + void operator() (const Kokkos::TeamPolicy::member_type& team, double& tmp) const { + int i = team.league_rank(); + for(int r=0;r::create(); + TEST_EXECSPACE space2 = SpaceInstance::create(); + + Kokkos::View a("A",N,M); + FunctorRange f(M,R,a); + FunctorRangeReduce fr(M,R,a); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", + Kokkos::RangePolicy(0,N), FunctorRange(M,R,a)); + + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space1,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space2,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + Kokkos::Timer timer; + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space1,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorRange(M,R,a)); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space2,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorRange(M,R,a)); + Kokkos::fence(); + double time_overlap = timer.seconds(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + double time_end = timer.seconds(); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE( (time_end > 1.5*time_overlap) ); + } + printf("Time RangePolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); + + Kokkos::View result("result"); + Kokkos::View result1("result1"); + Kokkos::View result2("result2"); + Kokkos::View h_result("h_result"); + Kokkos::View h_result1("h_result1"); + Kokkos::View h_result2("h_result2"); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_fenced = timer.seconds(); + Kokkos::deep_copy(h_result,result); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + double time_not_fenced = timer.seconds(); + Kokkos::fence(); + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_fenced>2.0*time_not_fenced); + } + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_no_overlapped_reduce = timer.seconds(); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space1,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result1); + Kokkos::parallel_reduce("default_exec::overlap_range_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(space2,0,N), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result2); + Kokkos::fence(); + double time_overlapped_reduce = timer.seconds(); + + Kokkos::deep_copy(h_result2,result2); + Kokkos::deep_copy(h_result1,result1); + + ASSERT_EQ(h_result1(),h_result()); + ASSERT_EQ(h_result2(),h_result()); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); + } + printf("Time RangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); + SpaceInstance::destroy(space1); + SpaceInstance::destroy(space2); +} + +TEST_F( default_exec, overlap_mdrange_policy ) { + int N = 200; + int M = 10000; + int R = 10; + + TEST_EXECSPACE space; + TEST_EXECSPACE space1 = SpaceInstance::create(); + TEST_EXECSPACE space2 = SpaceInstance::create(); + + Kokkos::View a("A",N,M); + FunctorMDRange f(M,R,a); + FunctorMDRangeReduce fr(M,R,a); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>({0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorMDRange(M,R,a)); + + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + Kokkos::Timer timer; + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorMDRange(M,R,a)); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorMDRange(M,R,a)); + Kokkos::fence(); + double time_overlap = timer.seconds(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + double time_end = timer.seconds(); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE( (time_end > 1.5*time_overlap) ); + } + printf("Time MDRangePolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); + + Kokkos::View result("result"); + Kokkos::View result1("result1"); + Kokkos::View result2("result2"); + Kokkos::View h_result("h_result"); + Kokkos::View h_result1("h_result1"); + Kokkos::View h_result2("h_result2"); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_fenced = timer.seconds(); + Kokkos::deep_copy(h_result,result); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + double time_not_fenced = timer.seconds(); + Kokkos::fence(); + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_fenced>2.0*time_not_fenced); + } + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_no_overlapped_reduce = timer.seconds(); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space1,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result1); + Kokkos::parallel_reduce("default_exec::overlap_mdrange_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::MDRangePolicy>(space2,{0,0},{N,R}), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result2); + Kokkos::fence(); + double time_overlapped_reduce = timer.seconds(); + + Kokkos::deep_copy(h_result2,result2); + Kokkos::deep_copy(h_result1,result1); + + ASSERT_EQ(h_result1(),h_result()); + ASSERT_EQ(h_result2(),h_result()); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); + } + printf("Time MDRangePolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); + SpaceInstance::destroy(space2); + SpaceInstance::destroy(space1); + +} + +TEST_F( default_exec, overlap_team_policy ) { + int N = 20; + int M = 1000000; + int R = 10; + + TEST_EXECSPACE space; + TEST_EXECSPACE space1 = SpaceInstance::create(); + TEST_EXECSPACE space2 = SpaceInstance::create(); + + Kokkos::View a("A",N,M); + FunctorTeam f(M,R,a); + FunctorTeamReduce fr(M,R,a); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel0", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorTeam(M,R,a)); + + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel1", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel2", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + Kokkos::Timer timer; + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel3", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel4", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel5", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorTeam(M,R,a)); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel6", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , FunctorTeam(M,R,a)); + Kokkos::fence(); + double time_overlap = timer.seconds(); + + timer.reset(); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel7", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::parallel_for("default_exec::overlap_range_policy::kernel8", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , f); + Kokkos::fence(); + double time_end = timer.seconds(); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE( (time_end > 1.5*time_overlap) ); + } + printf("Time TeamPolicy: NonOverlap: %lf Time Overlap: %lf\n",time_end,time_overlap); + + Kokkos::View result("result"); + Kokkos::View result1("result1"); + Kokkos::View result2("result2"); + Kokkos::View h_result("h_result"); + Kokkos::View h_result1("h_result1"); + Kokkos::View h_result2("h_result2"); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_fenced = timer.seconds(); + Kokkos::deep_copy(h_result,result); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + double time_not_fenced = timer.seconds(); + Kokkos::fence(); + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_fenced>2.0*time_not_fenced); + } + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result); + Kokkos::fence(); + double time_no_overlapped_reduce = timer.seconds(); + + timer.reset(); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space1,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result1); + Kokkos::parallel_reduce("default_exec::overlap_team_policy::kernel_reduce", + Kokkos::Experimental::require( + Kokkos::TeamPolicy(space2,N,Kokkos::AUTO), + Kokkos::Experimental::WorkItemProperty::HintLightWeight) + , fr, result2); + Kokkos::fence(); + double time_overlapped_reduce = timer.seconds(); + + Kokkos::deep_copy(h_result2,result2); + Kokkos::deep_copy(h_result1,result1); + + ASSERT_EQ(h_result1(),h_result()); + ASSERT_EQ(h_result2(),h_result()); + + if(SpaceInstance::overlap()) { + ASSERT_TRUE(time_overlapped_reduce < 1.5*time_no_overlapped_reduce); + } + printf("Time TeamPolicy Reduce: NonOverlap: %lf Time Overlap: %lf\n",time_no_overlapped_reduce,time_overlapped_reduce); + SpaceInstance::destroy(space1); + SpaceInstance::destroy(space2); +} +} diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp b/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp index 7d64591d9f..685194c150 100644 --- a/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp +++ b/lib/kokkos/core/perf_test/PerfTest_ViewAllocate.cpp @@ -121,6 +121,7 @@ void run_allocateview_tests(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a_ptr[i] = 0.0; }); + Kokkos::fence(); Kokkos::kokkos_free(a_ptr); } time_raw = timer.seconds()/R; diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp index 3f46187957..eff31c69bb 100644 --- a/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp +++ b/lib/kokkos/core/perf_test/PerfTest_ViewCopy.hpp @@ -95,6 +95,7 @@ void run_deepcopyview_tests123(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -135,6 +136,7 @@ void run_deepcopyview_tests45(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -169,6 +171,7 @@ void run_deepcopyview_tests6(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -202,6 +205,7 @@ void run_deepcopyview_tests7(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -235,6 +239,7 @@ void run_deepcopyview_tests8(int N, int R) { a_ptr[i] = b_ptr[i]; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp index c50d13d1ed..b17356f0c8 100644 --- a/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp +++ b/lib/kokkos/core/perf_test/PerfTest_ViewFill.hpp @@ -90,6 +90,7 @@ void run_fillview_tests123(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -126,6 +127,7 @@ void run_fillview_tests45(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -157,6 +159,7 @@ void run_fillview_tests6(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -187,6 +190,7 @@ void run_fillview_tests7(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -217,6 +221,7 @@ void run_fillview_tests8(int N, int R) { a_ptr[i] = 1.1; }); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif diff --git a/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp b/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp index 2720f4855c..b5019b467a 100644 --- a/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp +++ b/lib/kokkos/core/perf_test/PerfTest_ViewResize.hpp @@ -95,7 +95,9 @@ void run_resizeview_tests123(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -143,7 +145,9 @@ void run_resizeview_tests45(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -181,7 +185,9 @@ void run_resizeview_tests6(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -218,7 +224,9 @@ void run_resizeview_tests7(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif @@ -255,7 +263,9 @@ void run_resizeview_tests8(int N, int R) { Kokkos::parallel_for(N8, KOKKOS_LAMBDA (const int& i) { a1_ptr[i] = a_ptr[i]; }); + Kokkos::fence(); } + Kokkos::fence(); time_raw = timer.seconds()/R; } #endif diff --git a/lib/kokkos/core/perf_test/test_atomic.cpp b/lib/kokkos/core/perf_test/test_atomic.cpp index 6bb22e4e30..24e4f015d3 100644 --- a/lib/kokkos/core/perf_test/test_atomic.cpp +++ b/lib/kokkos/core/perf_test/test_atomic.cpp @@ -69,7 +69,7 @@ typedef Kokkos::DefaultExecutionSpace exec_space; #define WHITE 8 void textcolor(int attr, int fg, int bg) -{ char command[13]; +{ char command[40]; /* Command is the control command to the terminal */ sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40); @@ -85,7 +85,7 @@ struct ZeroFunctor{ typedef typename Kokkos::View::HostMirror h_type; type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { data() = 0; } }; @@ -101,7 +101,7 @@ struct AddFunctor{ type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { Kokkos::atomic_fetch_add(&data(),(T)1); } }; @@ -113,12 +113,12 @@ T AddLoop(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct AddFunctor f_add; f_add.data = data; Kokkos::parallel_for(loop,f_add); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); T val = h_data(); @@ -132,7 +132,7 @@ struct AddNonAtomicFunctor{ type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { data()+=(T)1; } }; @@ -145,12 +145,12 @@ T AddLoopNonAtomic(int loop) { f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct AddNonAtomicFunctor f_add; f_add.data = data; Kokkos::parallel_for(loop,f_add); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); T val = h_data(); @@ -178,7 +178,7 @@ struct CASFunctor{ type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { T old = data(); T newval, assumed; do { @@ -197,12 +197,12 @@ T CASLoop(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct CASFunctor f_cas; f_cas.data = data; Kokkos::parallel_for(loop,f_cas); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); T val = h_data(); @@ -217,7 +217,7 @@ struct CASNonAtomicFunctor{ type data; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()(int) const { volatile T assumed; volatile T newval; bool fail=1; @@ -240,12 +240,12 @@ T CASLoopNonAtomic(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct CASNonAtomicFunctor f_cas; f_cas.data = data; Kokkos::parallel_for(loop,f_cas); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); T val = h_data(); @@ -296,19 +296,19 @@ T ExchLoop(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); typename ZeroFunctor::type data2("Data"); typename ZeroFunctor::h_type h_data2("HData"); f_zero.data = data2; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct ExchFunctor f_exch; f_exch.data = data; f_exch.data2 = data2; Kokkos::parallel_for(loop,f_exch); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); Kokkos::deep_copy(h_data2,data2); @@ -339,19 +339,19 @@ T ExchLoopNonAtomic(int loop) { typename ZeroFunctor::h_type h_data("HData"); f_zero.data = data; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); typename ZeroFunctor::type data2("Data"); typename ZeroFunctor::h_type h_data2("HData"); f_zero.data = data2; Kokkos::parallel_for(1,f_zero); - exec_space::fence(); + exec_space().fence(); struct ExchNonAtomicFunctor f_exch; f_exch.data = data; f_exch.data2 = data2; Kokkos::parallel_for(loop,f_exch); - exec_space::fence(); + exec_space().fence(); Kokkos::deep_copy(h_data,data); Kokkos::deep_copy(h_data2,data2); diff --git a/lib/kokkos/core/perf_test/test_mempool.cpp b/lib/kokkos/core/perf_test/test_mempool.cpp index 9fd58eda91..c47730ec69 100644 --- a/lib/kokkos/core/perf_test/test_mempool.cpp +++ b/lib/kokkos/core/perf_test/test_mempool.cpp @@ -153,6 +153,7 @@ struct TestFunctor { typedef Kokkos::RangePolicy< ExecSpace , TagDel > policy ; Kokkos::parallel_for( policy(0,range_iter), *this ); + Kokkos::fence(); } //---------------------------------------- diff --git a/lib/kokkos/core/perf_test/test_taskdag.cpp b/lib/kokkos/core/perf_test/test_taskdag.cpp index 8d5e1c475f..41198edfe1 100644 --- a/lib/kokkos/core/perf_test/test_taskdag.cpp +++ b/lib/kokkos/core/perf_test/test_taskdag.cpp @@ -92,27 +92,26 @@ long fib_alloc_count( long n ) return count[ n & mask ]; } -template< class Space > +template< class Scheduler > struct TestFib { - using Scheduler = Kokkos::TaskScheduler< Space > ; using MemorySpace = typename Scheduler::memory_space ; using MemberType = typename Scheduler::member_type ; - using FutureType = Kokkos::Future< long , Space > ; + using FutureType = Kokkos::BasicFuture< long , Scheduler > ; typedef long value_type ; - Scheduler sched ; FutureType dep[2] ; const value_type n ; KOKKOS_INLINE_FUNCTION - TestFib( const Scheduler & arg_sched , const value_type arg_n ) - : sched( arg_sched ), dep{} , n( arg_n ) {} + TestFib( const value_type arg_n ) + : dep{} , n( arg_n ) {} KOKKOS_INLINE_FUNCTION - void operator()( const MemberType & , value_type & result ) noexcept + void operator()( MemberType & member, value_type & result ) noexcept { + auto& sched = member.scheduler(); if ( n < 2 ) { result = n ; } @@ -126,13 +125,13 @@ struct TestFib { dep[1] = Kokkos::task_spawn ( Kokkos::TaskSingle( sched, Kokkos::TaskPriority::High ) - , TestFib( sched, n - 2 ) ); + , TestFib( n - 2 ) ); dep[0] = Kokkos::task_spawn ( Kokkos::TaskSingle( sched ) - , TestFib( sched, n - 1 ) ); + , TestFib( n - 1 ) ); - Kokkos::Future< ExecSpace > fib_all = Kokkos::when_all( dep, 2 ); + auto fib_all = sched.when_all( dep, 2 ); if ( ! dep[0].is_null() && ! dep[1].is_null() && ! fib_all.is_null() ) { // High priority to retire this branch. @@ -202,13 +201,15 @@ int main( int argc , char* argv[] ) return -1; } - typedef TestFib< ExecSpace > Functor ; + using Scheduler = Kokkos::TaskSchedulerMultiple; + + typedef TestFib< Scheduler > Functor ; Kokkos::initialize(argc,argv); { - Functor::Scheduler sched( Functor::MemorySpace() + Scheduler sched( Functor::MemorySpace() , total_alloc_size , min_block_size , max_block_size @@ -217,21 +218,21 @@ int main( int argc , char* argv[] ) Functor::FutureType f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ) - , Functor( sched , fib_input ) + , Functor( fib_input ) ); Kokkos::wait( sched ); test_result = f.get(); - task_count_max = sched.allocated_task_count_max(); - task_count_accum = sched.allocated_task_count_accum(); + //task_count_max = sched.allocated_task_count_max(); + //task_count_accum = sched.allocated_task_count_accum(); - if ( number_alloc != task_count_accum ) { - std::cout << " number_alloc( " << number_alloc << " )" - << " != task_count_accum( " << task_count_accum << " )" - << std::endl ; - } + //if ( number_alloc != task_count_accum ) { + // std::cout << " number_alloc( " << number_alloc << " )" + // << " != task_count_accum( " << task_count_accum << " )" + // << std::endl ; + //} if ( fib_output != test_result ) { std::cout << " answer( " << fib_output << " )" @@ -239,7 +240,7 @@ int main( int argc , char* argv[] ) << std::endl ; } - if ( fib_output != test_result || number_alloc != task_count_accum ) { + if ( fib_output != test_result) { // || number_alloc != task_count_accum ) { printf(" TEST FAILED\n"); return -1; } @@ -252,7 +253,7 @@ int main( int argc , char* argv[] ) Functor::FutureType ftmp = Kokkos::host_spawn( Kokkos::TaskSingle( sched ) - , Functor( sched , fib_input ) + , Functor( fib_input ) ); Kokkos::wait( sched ); diff --git a/lib/kokkos/core/src/CMakeLists.txt b/lib/kokkos/core/src/CMakeLists.txt index ab7f3f55c7..a941c5da0c 100644 --- a/lib/kokkos/core/src/CMakeLists.txt +++ b/lib/kokkos/core/src/CMakeLists.txt @@ -61,6 +61,16 @@ IF(KOKKOS_LEGACY_TRIBITS) #----------------------------------------------------------------------------- + FILE(GLOB HEADERS_HPX HPX/*.hpp) + FILE(GLOB SOURCES_HPX HPX/*.cpp) + + LIST(APPEND HEADERS_PRIVATE ${HEADERS_HPX} ) + LIST(APPEND SOURCES ${SOURCES_HPX} ) + + INSTALL(FILES ${HEADERS_HPX} DESTINATION ${TRILINOS_INCDIR}/HPX/) + + #----------------------------------------------------------------------------- + FILE(GLOB HEADERS_CUDA Cuda/*.hpp) FILE(GLOB SOURCES_CUDA Cuda/*.cpp) diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp deleted file mode 100644 index c31b7f5b5d..0000000000 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp +++ /dev/null @@ -1,419 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_CUDAEXEC_HPP -#define KOKKOS_CUDAEXEC_HPP - -#include -#ifdef KOKKOS_ENABLE_CUDA - -#include -#include -#include -#include -#include -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -struct CudaTraits { - enum { WarpSize = 32 /* 0x0020 */ }; - enum { WarpIndexMask = 0x001f /* Mask for warpindex */ }; - enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ }; - - enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ }; - enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ }; - enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ }; - - enum { UpperBoundGridCount = 65535 /* Hard upper bound */ }; - enum { ConstantMemoryCapacity = 0x010000 /* 64k bytes */ }; - enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ }; - enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ }; - - typedef unsigned long - ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ]; - - enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ }; - - KOKKOS_INLINE_FUNCTION static - CudaSpace::size_type warp_count( CudaSpace::size_type i ) - { return ( i + WarpIndexMask ) >> WarpIndexShift ; } - - KOKKOS_INLINE_FUNCTION static - CudaSpace::size_type warp_align( CudaSpace::size_type i ) - { - enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) }; - return ( i + WarpIndexMask ) & Mask ; - } -}; - -//---------------------------------------------------------------------------- - -CudaSpace::size_type cuda_internal_multiprocessor_count(); -CudaSpace::size_type cuda_internal_maximum_warp_count(); -CudaSpace::size_type cuda_internal_maximum_grid_count(); -CudaSpace::size_type cuda_internal_maximum_shared_words(); - -CudaSpace::size_type cuda_internal_maximum_concurrent_block_count(); - -CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size ); -CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size ); -CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size ); - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#if defined( __CUDACC__ ) - -/** \brief Access to constant memory on the device */ -#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE - -__device__ __constant__ -extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ; - -#else - -__device__ __constant__ -unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ; - -#endif - -namespace Kokkos { -namespace Impl { - void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink = false); -} -} - -template< typename T > -inline -__device__ -T * kokkos_impl_cuda_shared_memory() -{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; } - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- -// See section B.17 of Cuda C Programming Guide Version 3.2 -// for discussion of -// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) -// function qualifier which could be used to improve performance. -//---------------------------------------------------------------------------- -// Maximize L1 cache and minimize shared memory: -// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 ); -// For 2.0 capability: 48 KB L1 and 16 KB shared -//---------------------------------------------------------------------------- - -template< class DriverType> -__global__ -static void cuda_parallel_launch_constant_memory() -{ - const DriverType & driver = - *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); - - driver(); -} - -template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > -__global__ -__launch_bounds__(maxTperB, minBperSM) -static void cuda_parallel_launch_constant_memory() -{ - const DriverType & driver = - *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); - - driver(); -} - -template< class DriverType> -__global__ -static void cuda_parallel_launch_local_memory( const DriverType driver ) -{ - driver(); -} - -template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > -__global__ -__launch_bounds__(maxTperB, minBperSM) -static void cuda_parallel_launch_local_memory( const DriverType driver ) -{ - driver(); -} - -template < class DriverType - , class LaunchBounds = Kokkos::LaunchBounds<> - , bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) > -struct CudaParallelLaunch ; - -template < class DriverType - , unsigned int MaxThreadsPerBlock - , unsigned int MinBlocksPerSM > -struct CudaParallelLaunch< DriverType - , Kokkos::LaunchBounds< MaxThreadsPerBlock - , MinBlocksPerSM > - , true > -{ - inline - CudaParallelLaunch( const DriverType & driver - , const dim3 & grid - , const dim3 & block - , const int shmem - , const cudaStream_t stream = 0 ) - { - if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - // Fence before changing settings and copying closure - Kokkos::Cuda::fence(); - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - #ifndef KOKKOS_ARCH_KEPLER - // On Kepler the L1 has no benefit since it doesn't cache reads - else { - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig - ( cuda_parallel_launch_constant_memory - < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > - , ( shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) - ) ); - } - #endif - - // Copy functor to constant memory on the device - cudaMemcpyToSymbol( - kokkos_impl_cuda_constant_memory_buffer, &driver, sizeof(DriverType) ); - - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - - // Invoke the driver function on the device - cuda_parallel_launch_constant_memory - < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > - <<< grid , block , shmem , stream >>>(); - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - CUDA_SAFE_CALL( cudaGetLastError() ); - Kokkos::Cuda::fence(); -#endif - } - } -}; - -template < class DriverType > -struct CudaParallelLaunch< DriverType - , Kokkos::LaunchBounds<> - , true > -{ - inline - CudaParallelLaunch( const DriverType & driver - , const dim3 & grid - , const dim3 & block - , const int shmem - , const cudaStream_t stream = 0 ) - { - if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - // Fence before changing settings and copying closure - Kokkos::Cuda::fence(); - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - #ifndef KOKKOS_ARCH_KEPLER - // On Kepler the L1 has no benefit since it doesn't cache reads - else { - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig - ( cuda_parallel_launch_constant_memory< DriverType > - , ( shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) - ) ); - } - #endif - - // Copy functor to constant memory on the device - cudaMemcpyToSymbol( - kokkos_impl_cuda_constant_memory_buffer, &driver, sizeof(DriverType) ); - - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - - // Invoke the driver function on the device - cuda_parallel_launch_constant_memory< DriverType > - <<< grid , block , shmem , stream >>>(); - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - CUDA_SAFE_CALL( cudaGetLastError() ); - Kokkos::Cuda::fence(); -#endif - } - } -}; - -template < class DriverType - , unsigned int MaxThreadsPerBlock - , unsigned int MinBlocksPerSM > -struct CudaParallelLaunch< DriverType - , Kokkos::LaunchBounds< MaxThreadsPerBlock - , MinBlocksPerSM > - , false > -{ - inline - CudaParallelLaunch( const DriverType & driver - , const dim3 & grid - , const dim3 & block - , const int shmem - , const cudaStream_t stream = 0 ) - { - if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - #ifndef KOKKOS_ARCH_KEPLER - // On Kepler the L1 has no benefit since it doesn't cache reads - else { - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig - ( cuda_parallel_launch_local_memory - < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > - , ( shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) - ) ); - } - #endif - - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - - // Invoke the driver function on the device - cuda_parallel_launch_local_memory - < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > - <<< grid , block , shmem , stream >>>( driver ); - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - CUDA_SAFE_CALL( cudaGetLastError() ); - Kokkos::Cuda::fence(); -#endif - } - } -}; - -template < class DriverType > -struct CudaParallelLaunch< DriverType - , Kokkos::LaunchBounds<> - , false > -{ - inline - CudaParallelLaunch( const DriverType & driver - , const dim3 & grid - , const dim3 & block - , const int shmem - , const cudaStream_t stream = 0 ) - { - if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { - - if ( sizeof( Kokkos::Impl::CudaTraits::ConstantGlobalBufferType ) < - sizeof( DriverType ) ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: Functor is too large") ); - } - - if ( CudaTraits::SharedMemoryCapacity < shmem ) { - Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); - } - #ifndef KOKKOS_ARCH_KEPLER - // On Kepler the L1 has no benefit since it doesn't cache reads - else { - CUDA_SAFE_CALL( - cudaFuncSetCacheConfig - ( cuda_parallel_launch_local_memory< DriverType > - , ( shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) - ) ); - } - #endif - - KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - - // Invoke the driver function on the device - cuda_parallel_launch_local_memory< DriverType > - <<< grid , block , shmem , stream >>>( driver ); - -#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) - CUDA_SAFE_CALL( cudaGetLastError() ); - Kokkos::Cuda::fence(); -#endif - } - } -}; - -//---------------------------------------------------------------------------- - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* defined( __CUDACC__ ) */ -#endif /* defined( KOKKOS_ENABLE_CUDA ) */ -#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */ - diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index e13744e327..4c9ed47085 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -55,7 +55,7 @@ #include #include -#include +//#include #include #if defined(KOKKOS_ENABLE_PROFILING) @@ -183,7 +183,7 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const enum { max_uvm_allocations = 65536 }; - Cuda::fence(); + Cuda::impl_static_fence(); if ( arg_alloc_size > 0 ) { Kokkos::Impl::num_uvm_allocations++; @@ -194,7 +194,7 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) ); } - Cuda::fence(); + Cuda::impl_static_fence(); return ptr ; } @@ -217,14 +217,14 @@ void CudaSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_all void CudaUVMSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const { - Cuda::fence(); + Cuda::impl_static_fence(); try { if ( arg_alloc_ptr != nullptr ) { Kokkos::Impl::num_uvm_allocations--; CUDA_SAFE_CALL( cudaFree( arg_alloc_ptr ) ); } } catch(...) {} - Cuda::fence(); + Cuda::impl_static_fence(); } void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t /* arg_alloc_size */ ) const @@ -390,7 +390,7 @@ SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: { #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { - Cuda::fence(); //Make sure I can access the label ... + Cuda::impl_static_fence(); //Make sure I can access the label ... Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::CudaUVMSpace::name()),RecordBase::m_alloc_ptr->m_label, data(),size()); diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp new file mode 100644 index 0000000000..9d4bcbc8cf --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics.hpp @@ -0,0 +1,657 @@ +/* +@HEADER +================================================================================ + +ORIGINAL LICENSE +---------------- + +Copyright (c) 2018, NVIDIA Corporation + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +================================================================================ + +LICENSE ASSOCIATED WITH SUBSEQUENT MODIFICATIONS +------------------------------------------------ + +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +@HEADER +*/ + +#include +#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + +#include + +#ifndef _SIMT_DETAILS_CONFIG +#define _SIMT_DETAILS_CONFIG + +namespace Kokkos { +namespace Impl { + + +#ifndef __simt_scope +// Modification: Kokkos GPU atomics should default to `gpu` scope +#define __simt_scope "gpu" +#endif + +#define __simt_fence_signal_() asm volatile("":::"memory") +#define __simt_fence_sc_() asm volatile("fence.sc." __simt_scope ";":::"memory") +#define __simt_fence_() asm volatile("fence." __simt_scope ";":::"memory") + +#define __simt_load_acquire_8_as_32(ptr,ret) asm volatile("ld.acquire." __simt_scope ".b8 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory") +#define __simt_load_relaxed_8_as_32(ptr,ret) asm volatile("ld.relaxed." __simt_scope ".b8 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory") +#define __simt_store_release_8_as_32(ptr,desired) asm volatile("st.release." __simt_scope ".b8 [%0], %1;" :: "l"(ptr), "r"(desired) : "memory") +#define __simt_store_relaxed_8_as_32(ptr,desired) asm volatile("st.relaxed." __simt_scope ".b8 [%0], %1;" :: "l"(ptr), "r"(desired) : "memory") + +#define __simt_load_acquire_16(ptr,ret) asm volatile("ld.acquire." __simt_scope ".b16 %0, [%1];" : "=h"(ret) : "l"(ptr) : "memory") +#define __simt_load_relaxed_16(ptr,ret) asm volatile("ld.relaxed." __simt_scope ".b16 %0, [%1];" : "=h"(ret) : "l"(ptr) : "memory") +#define __simt_store_release_16(ptr,desired) asm volatile("st.release." __simt_scope ".b16 [%0], %1;" :: "l"(ptr), "h"(desired) : "memory") +#define __simt_store_relaxed_16(ptr,desired) asm volatile("st.relaxed." __simt_scope ".b16 [%0], %1;" :: "l"(ptr), "h"(desired) : "memory") + +#define __simt_load_acquire_32(ptr,ret) asm volatile("ld.acquire." __simt_scope ".b32 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory") +#define __simt_load_relaxed_32(ptr,ret) asm volatile("ld.relaxed." __simt_scope ".b32 %0, [%1];" : "=r"(ret) : "l"(ptr) : "memory") +#define __simt_store_release_32(ptr,desired) asm volatile("st.release." __simt_scope ".b32 [%0], %1;" :: "l"(ptr), "r"(desired) : "memory") +#define __simt_store_relaxed_32(ptr,desired) asm volatile("st.relaxed." __simt_scope ".b32 [%0], %1;" :: "l"(ptr), "r"(desired) : "memory") +#define __simt_exch_release_32(ptr,old,desired) asm volatile("atom.exch.release." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(desired) : "memory") +#define __simt_exch_acquire_32(ptr,old,desired) asm volatile("atom.exch.acquire." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(desired) : "memory") +#define __simt_exch_acq_rel_32(ptr,old,desired) asm volatile("atom.exch.acq_rel." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(desired) : "memory") +#define __simt_exch_relaxed_32(ptr,old,desired) asm volatile("atom.exch.relaxed." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(desired) : "memory") +#define __simt_cas_release_32(ptr,old,expected,desired) asm volatile("atom.cas.release." __simt_scope ".b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +#define __simt_cas_acquire_32(ptr,old,expected,desired) asm volatile("atom.cas.acquire." __simt_scope ".b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +#define __simt_cas_acq_rel_32(ptr,old,expected,desired) asm volatile("atom.cas.acq_rel." __simt_scope ".b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +#define __simt_cas_relaxed_32(ptr,old,expected,desired) asm volatile("atom.cas.relaxed." __simt_scope ".b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +#define __simt_add_release_32(ptr,old,addend) asm volatile("atom.add.release." __simt_scope ".u32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(addend) : "memory") +#define __simt_add_acquire_32(ptr,old,addend) asm volatile("atom.add.acquire." __simt_scope ".u32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(addend) : "memory") +#define __simt_add_acq_rel_32(ptr,old,addend) asm volatile("atom.add.acq_rel." __simt_scope ".u32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(addend) : "memory") +#define __simt_add_relaxed_32(ptr,old,addend) asm volatile("atom.add.relaxed." __simt_scope ".u32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(addend) : "memory") +#define __simt_and_release_32(ptr,old,andend) asm volatile("atom.and.release." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(andend) : "memory") +#define __simt_and_acquire_32(ptr,old,andend) asm volatile("atom.and.acquire." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(andend) : "memory") +#define __simt_and_acq_rel_32(ptr,old,andend) asm volatile("atom.and.acq_rel." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(andend) : "memory") +#define __simt_and_relaxed_32(ptr,old,andend) asm volatile("atom.and.relaxed." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(andend) : "memory") +#define __simt_or_release_32(ptr,old,orend) asm volatile("atom.or.release." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(orend) : "memory") +#define __simt_or_acquire_32(ptr,old,orend) asm volatile("atom.or.acquire." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(orend) : "memory") +#define __simt_or_acq_rel_32(ptr,old,orend) asm volatile("atom.or.acq_rel." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(orend) : "memory") +#define __simt_or_relaxed_32(ptr,old,orend) asm volatile("atom.or.relaxed." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(orend) : "memory") +#define __simt_xor_release_32(ptr,old,xorend) asm volatile("atom.xor.release." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(xorend) : "memory") +#define __simt_xor_acquire_32(ptr,old,xorend) asm volatile("atom.xor.acquire." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(xorend) : "memory") +#define __simt_xor_acq_rel_32(ptr,old,xorend) asm volatile("atom.xor.acq_rel." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(xorend) : "memory") +#define __simt_xor_relaxed_32(ptr,old,xorend) asm volatile("atom.xor.relaxed." __simt_scope ".b32 %0, [%1], %2;" : "=r"(old) : "l"(ptr), "r"(xorend) : "memory") + +#define __simt_load_acquire_64(ptr,ret) asm volatile("ld.acquire." __simt_scope ".b64 %0, [%1];" : "=l"(ret) : "l"(ptr) : "memory") +#define __simt_load_relaxed_64(ptr,ret) asm volatile("ld.relaxed." __simt_scope ".b64 %0, [%1];" : "=l"(ret) : "l"(ptr) : "memory") +#define __simt_store_release_64(ptr,desired) asm volatile("st.release." __simt_scope ".b64 [%0], %1;" :: "l"(ptr), "l"(desired) : "memory") +#define __simt_store_relaxed_64(ptr,desired) asm volatile("st.relaxed." __simt_scope ".b64 [%0], %1;" :: "l"(ptr), "l"(desired) : "memory") +#define __simt_exch_release_64(ptr,old,desired) asm volatile("atom.exch.release." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(desired) : "memory") +#define __simt_exch_acquire_64(ptr,old,desired) asm volatile("atom.exch.acquire." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(desired) : "memory") +#define __simt_exch_acq_rel_64(ptr,old,desired) asm volatile("atom.exch.acq_rel." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(desired) : "memory") +#define __simt_exch_relaxed_64(ptr,old,desired) asm volatile("atom.exch.relaxed." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(desired) : "memory") +#define __simt_cas_release_64(ptr,old,expected,desired) asm volatile("atom.cas.release." __simt_scope ".b64 %0, [%1], %2, %3;" : "=l"(old) : "l"(ptr), "l"(expected), "l"(desired) : "memory") +#define __simt_cas_acquire_64(ptr,old,expected,desired) asm volatile("atom.cas.acquire." __simt_scope ".b64 %0, [%1], %2, %3;" : "=l"(old) : "l"(ptr), "l"(expected), "l"(desired) : "memory") +#define __simt_cas_acq_rel_64(ptr,old,expected,desired) asm volatile("atom.cas.acq_rel." __simt_scope ".b64 %0, [%1], %2, %3;" : "=l"(old) : "l"(ptr), "l"(expected), "l"(desired) : "memory") +#define __simt_cas_relaxed_64(ptr,old,expected,desired) asm volatile("atom.cas.relaxed." __simt_scope ".b64 %0, [%1], %2, %3;" : "=l"(old) : "l"(ptr), "l"(expected), "l"(desired) : "memory") +#define __simt_add_release_64(ptr,old,addend) asm volatile("atom.add.release." __simt_scope ".u64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(addend) : "memory") +#define __simt_add_acquire_64(ptr,old,addend) asm volatile("atom.add.acquire." __simt_scope ".u64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(addend) : "memory") +#define __simt_add_acq_rel_64(ptr,old,addend) asm volatile("atom.add.acq_rel." __simt_scope ".u64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(addend) : "memory") +#define __simt_add_relaxed_64(ptr,old,addend) asm volatile("atom.add.relaxed." __simt_scope ".u64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(addend) : "memory") +#define __simt_and_release_64(ptr,old,andend) asm volatile("atom.and.release." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(andend) : "memory") +#define __simt_and_acquire_64(ptr,old,andend) asm volatile("atom.and.acquire." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(andend) : "memory") +#define __simt_and_acq_rel_64(ptr,old,andend) asm volatile("atom.and.acq_rel." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(andend) : "memory") +#define __simt_and_relaxed_64(ptr,old,andend) asm volatile("atom.and.relaxed." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(andend) : "memory") +#define __simt_or_release_64(ptr,old,orend) asm volatile("atom.or.release." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(orend) : "memory") +#define __simt_or_acquire_64(ptr,old,orend) asm volatile("atom.or.acquire." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(orend) : "memory") +#define __simt_or_acq_rel_64(ptr,old,orend) asm volatile("atom.or.acq_rel." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(orend) : "memory") +#define __simt_or_relaxed_64(ptr,old,orend) asm volatile("atom.or.relaxed." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(orend) : "memory") +#define __simt_xor_release_64(ptr,old,xorend) asm volatile("atom.xor.release." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(xorend) : "memory") +#define __simt_xor_acquire_64(ptr,old,xorend) asm volatile("atom.xor.acquire." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(xorend) : "memory") +#define __simt_xor_acq_rel_64(ptr,old,xorend) asm volatile("atom.xor.acq_rel." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(xorend) : "memory") +#define __simt_xor_relaxed_64(ptr,old,xorend) asm volatile("atom.xor.relaxed." __simt_scope ".b64 %0, [%1], %2;" : "=l"(old) : "l"(ptr), "l"(xorend) : "memory") + +#define __simt_nanosleep(timeout) asm volatile("nanosleep.u32 %0;" :: "r"(unsigned(timeout)) : ) + +/* + definitions +*/ + +#ifndef __GCC_ATOMIC_BOOL_LOCK_FREE +#define __GCC_ATOMIC_BOOL_LOCK_FREE 2 +#define __GCC_ATOMIC_CHAR_LOCK_FREE 2 +#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2 +#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2 +#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2 +#define __GCC_ATOMIC_SHORT_LOCK_FREE 2 +#define __GCC_ATOMIC_INT_LOCK_FREE 2 +#define __GCC_ATOMIC_LONG_LOCK_FREE 2 +#define __GCC_ATOMIC_LLONG_LOCK_FREE 2 +#define __GCC_ATOMIC_POINTER_LOCK_FREE 2 +#endif + +#ifndef __ATOMIC_RELAXED +#define __ATOMIC_RELAXED 0 +#define __ATOMIC_CONSUME 1 +#define __ATOMIC_ACQUIRE 2 +#define __ATOMIC_RELEASE 3 +#define __ATOMIC_ACQ_REL 4 +#define __ATOMIC_SEQ_CST 5 +#endif + +inline __device__ int __stronger_order_simt_(int a, int b) { + if (b == __ATOMIC_SEQ_CST) return __ATOMIC_SEQ_CST; + if (b == __ATOMIC_RELAXED) return a; + switch (a) { + case __ATOMIC_SEQ_CST: + case __ATOMIC_ACQ_REL: return a; + case __ATOMIC_CONSUME: + case __ATOMIC_ACQUIRE: if (b != __ATOMIC_ACQUIRE) return __ATOMIC_ACQ_REL; else return __ATOMIC_ACQUIRE; + case __ATOMIC_RELEASE: if (b != __ATOMIC_RELEASE) return __ATOMIC_ACQ_REL; else return __ATOMIC_RELEASE; + case __ATOMIC_RELAXED: return b; + default: assert(0); + } + return __ATOMIC_SEQ_CST; +} + +/* + base +*/ + +#define DO__atomic_load_simt_(bytes, bits) \ +template::type = 0> \ +void __device__ __atomic_load_simt_ (const type *ptr, type *ret, int memorder) { \ + int##bits##_t tmp = 0; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_load_acquire_##bits(ptr, tmp); break; \ + case __ATOMIC_RELAXED: __simt_load_relaxed_##bits(ptr, tmp); break; \ + default: assert(0); \ + } \ + memcpy(ret, &tmp, bytes); \ +} +DO__atomic_load_simt_(1,32) +DO__atomic_load_simt_(2,16) +DO__atomic_load_simt_(4,32) +DO__atomic_load_simt_(8,64) + +template +type __device__ __atomic_load_n_simt_(const type *ptr, int memorder) { + type ret; + __atomic_load_simt_(ptr, &ret, memorder); + return ret; +} + +#define DO__atomic_store_simt_(bytes, bits) \ +template::type = 0> \ +void __device__ __atomic_store_simt_ (type *ptr, type *val, int memorder) { \ + int##bits##_t tmp = 0; \ + memcpy(&tmp, val, bytes); \ + switch (memorder) { \ + case __ATOMIC_RELEASE: __simt_store_release_##bits(ptr, tmp); break; \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_RELAXED: __simt_store_relaxed_##bits(ptr, tmp); break; \ + default: assert(0); \ + } \ +} +DO__atomic_store_simt_(1,32) +DO__atomic_store_simt_(2,16) +DO__atomic_store_simt_(4,32) +DO__atomic_store_simt_(8,64) + +template +void __device__ __atomic_store_n_simt_(type *ptr, type val, int memorder) { + __atomic_store_simt_(ptr, &val, memorder); +} + +#define DO__atomic_compare_exchange_simt_(bytes, bits) \ +template::type = 0> \ +bool __device__ __atomic_compare_exchange_simt_ (type *ptr, type *expected, const type *desired, bool, int success_memorder, int failure_memorder) { \ + int##bits##_t tmp = 0, old = 0, old_tmp; \ + memcpy(&tmp, desired, bytes); \ + memcpy(&old, expected, bytes); \ + old_tmp = old; \ + switch (__stronger_order_simt_(success_memorder, failure_memorder)) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_cas_acquire_##bits(ptr, old, old_tmp, tmp); break; \ + case __ATOMIC_ACQ_REL: __simt_cas_acq_rel_##bits(ptr, old, old_tmp, tmp); break; \ + case __ATOMIC_RELEASE: __simt_cas_release_##bits(ptr, old, old_tmp, tmp); break; \ + case __ATOMIC_RELAXED: __simt_cas_relaxed_##bits(ptr, old, old_tmp, tmp); break; \ + default: assert(0); \ + } \ + bool const ret = old == old_tmp; \ + memcpy(expected, &old, bytes); \ + return ret; \ +} +DO__atomic_compare_exchange_simt_(4, 32) +DO__atomic_compare_exchange_simt_(8, 64) + +template::type = 0> \ +bool __device__ __atomic_compare_exchange_simt_(type *ptr, type *expected, const type *desired, bool, int success_memorder, int failure_memorder) { + + using R = typename std::conditional::value, volatile uint32_t, uint32_t>::type; + auto const aligned = (R*)((intptr_t)ptr & ~(sizeof(uint32_t) - 1)); + auto const offset = uint32_t((intptr_t)ptr & (sizeof(uint32_t) - 1)) * 8; + auto const mask = ((1 << sizeof(type)*8) - 1) << offset; + + uint32_t old = *expected << offset, old_value; + while (1) { + old_value = (old & mask) >> offset; + if (old_value != *expected) + break; + uint32_t const attempt = (old & ~mask) | (*desired << offset); + if (__atomic_compare_exchange_simt_ (aligned, &old, &attempt, true, success_memorder, failure_memorder)) + return true; + } + *expected = old_value; + return false; +} + +template +bool __device__ __atomic_compare_exchange_n_simt_(type *ptr, type *expected, type desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_simt_(ptr, expected, &desired, weak, success_memorder, failure_memorder); +} + +#define DO__atomic_exchange_simt_(bytes, bits) \ +template::type = 0> \ +void __device__ __atomic_exchange_simt_ (type *ptr, type *val, type *ret, int memorder) { \ + int##bits##_t tmp = 0; \ + memcpy(&tmp, val, bytes); \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_exch_acquire_##bits(ptr, tmp, tmp); break; \ + case __ATOMIC_ACQ_REL: __simt_exch_acq_rel_##bits(ptr, tmp, tmp); break; \ + case __ATOMIC_RELEASE: __simt_exch_release_##bits(ptr, tmp, tmp); break; \ + case __ATOMIC_RELAXED: __simt_exch_relaxed_##bits(ptr, tmp, tmp); break; \ + default: assert(0); \ + } \ + memcpy(ret, &tmp, bytes); \ +} +DO__atomic_exchange_simt_(4,32) +DO__atomic_exchange_simt_(8,64) + +template::type = 0> +void __device__ __atomic_exchange_simt_ (type *ptr, type *val, type *ret, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + while(!__atomic_compare_exchange_simt_(ptr, &expected, val, true, memorder, memorder)) + ; + *ret = expected; +} + +template +type __device__ __atomic_exchange_n_simt_(type *ptr, type val, int memorder) { + type ret; + __atomic_exchange_simt_(ptr, &val, &ret, memorder); + return ret; +} + +#define DO__atomic_fetch_add_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_add_simt_ (type *ptr, delta val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_add_acquire_##bits(ptr, ret, val); break; \ + case __ATOMIC_ACQ_REL: __simt_add_acq_rel_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELEASE: __simt_add_release_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELAXED: __simt_add_relaxed_##bits(ptr, ret, val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_add_simt_(4, 32) +DO__atomic_fetch_add_simt_(8, 64) + +template::type = 0> +type __device__ __atomic_fetch_add_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected + val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +#define DO__atomic_fetch_sub_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_sub_simt_ (type *ptr, delta val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_add_acquire_##bits(ptr, ret, -val); break; \ + case __ATOMIC_ACQ_REL: __simt_add_acq_rel_##bits(ptr, ret, -val); break; \ + case __ATOMIC_RELEASE: __simt_add_release_##bits(ptr, ret, -val); break; \ + case __ATOMIC_RELAXED: __simt_add_relaxed_##bits(ptr, ret, -val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_sub_simt_(4,32) +DO__atomic_fetch_sub_simt_(8,64) + +template::type = 0> +type __device__ __atomic_fetch_sub_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected - val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +#define DO__atomic_fetch_and_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_and_simt_ (type *ptr, type val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_and_acquire_##bits(ptr, ret, val); break; \ + case __ATOMIC_ACQ_REL: __simt_and_acq_rel_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELEASE: __simt_and_release_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELAXED: __simt_and_relaxed_##bits(ptr, ret, val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_and_simt_(4,32) +DO__atomic_fetch_and_simt_(8,64) + +template::type = 0> +type __device__ __atomic_fetch_and_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected & val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +#define DO__atomic_fetch_xor_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_xor_simt_ (type *ptr, type val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_xor_acquire_##bits(ptr, ret, val); break; \ + case __ATOMIC_ACQ_REL: __simt_xor_acq_rel_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELEASE: __simt_xor_release_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELAXED: __simt_xor_relaxed_##bits(ptr, ret, val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_xor_simt_(4,32) +DO__atomic_fetch_xor_simt_(8,64) + +template::type = 0> +type __device__ __atomic_fetch_xor_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected ^ val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +#define DO__atomic_fetch_or_simt_(bytes, bits) \ +template::type = 0> \ +type __device__ __atomic_fetch_or_simt_ (type *ptr, type val, int memorder) { \ + type ret; \ + switch (memorder) { \ + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); \ + case __ATOMIC_CONSUME: \ + case __ATOMIC_ACQUIRE: __simt_or_acquire_##bits(ptr, ret, val); break; \ + case __ATOMIC_ACQ_REL: __simt_or_acq_rel_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELEASE: __simt_or_release_##bits(ptr, ret, val); break; \ + case __ATOMIC_RELAXED: __simt_or_relaxed_##bits(ptr, ret, val); break; \ + default: assert(0); \ + } \ + return ret; \ +} +DO__atomic_fetch_or_simt_(4,32) +DO__atomic_fetch_or_simt_(8,64) + +template::type = 0> +type __device__ __atomic_fetch_or_simt_ (type *ptr, delta val, int memorder) { + + type expected = __atomic_load_n_simt_(ptr, __ATOMIC_RELAXED); + type const desired = expected | val; + while(!__atomic_compare_exchange_simt_(ptr, &expected, &desired, true, memorder, memorder)) + ; + return expected; +} + +template +inline bool __device__ __atomic_test_and_set_simt_(type *ptr, int memorder) { + return __atomic_exchange_n_simt_((char*)ptr, (char)1, memorder) == 1; +} +template +inline void __device__ __atomic_clear_simt_(type *ptr, int memorder) { + return __atomic_store_n_simt_((char*)ptr, (char)0, memorder); +} + +inline constexpr __device__ bool __atomic_always_lock_free_simt_ (size_t size, void *) { + return size <= 8; +} +inline __device__ bool __atomic_is_lock_free_simt_(size_t size, void * ptr) { + return __atomic_always_lock_free_simt_(size, ptr); +} + +/* + fences +*/ + +inline void __device__ __atomic_thread_fence_simt(int memorder) { + switch (memorder) { + case __ATOMIC_SEQ_CST: __simt_fence_sc_(); break; + case __ATOMIC_CONSUME: + case __ATOMIC_ACQUIRE: + case __ATOMIC_ACQ_REL: + case __ATOMIC_RELEASE: __simt_fence_(); break; + case __ATOMIC_RELAXED: break; + default: assert(0); + } +} +inline void __device__ __atomic_signal_fence_simt(int memorder) { + __atomic_thread_fence_simt(memorder); +} + +/* + non-volatile +*/ + +template type __device__ __atomic_load_n_simt(const type *ptr, int memorder) { + return __atomic_load_n_simt_(const_cast(ptr), memorder); +} +template void __device__ __atomic_load_simt(const type *ptr, type *ret, int memorder) { + __atomic_load_simt_(const_cast(ptr), ret, memorder); +} +template void __device__ __atomic_store_n_simt(type *ptr, type val, int memorder) { + __atomic_store_n_simt_(const_cast(ptr), val, memorder); +} +template void __device__ __atomic_store_simt(type *ptr, type *val, int memorder) { + __atomic_store_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_exchange_n_simt(type *ptr, type val, int memorder) { + return __atomic_exchange_n_simt_(const_cast(ptr), val, memorder); +} +template void __device__ __atomic_exchange_simt(type *ptr, type *val, type *ret, int memorder) { + __atomic_exchange_simt_(const_cast(ptr), val, ret, memorder); +} +template bool __device__ __atomic_compare_exchange_n_simt(type *ptr, type *expected, type desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_n_simt_(const_cast(ptr), expected, desired, weak, success_memorder, failure_memorder); +} +template bool __device__ __atomic_compare_exchange_simt(type *ptr, type *expected, type *desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_simt_(const_cast(ptr), expected, desired, weak, success_memorder, failure_memorder); +} +template type __device__ __atomic_fetch_add_simt(type *ptr, delta val, int memorder) { + return __atomic_fetch_add_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_sub_simt(type *ptr, delta val, int memorder) { + return __atomic_fetch_sub_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_and_simt(type *ptr, type val, int memorder) { + return __atomic_fetch_and_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_xor_simt(type *ptr, type val, int memorder) { + return __atomic_fetch_xor_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_or_simt(type *ptr, type val, int memorder) { + return __atomic_fetch_or_simt_(const_cast(ptr), val, memorder); +} +template bool __device__ __atomic_test_and_set_simt(void *ptr, int memorder) { + return __atomic_test_and_set_simt_(const_cast(ptr), memorder); +} +template void __device__ __atomic_clear_simt(void *ptr, int memorder) { + return __atomic_clear_simt_(const_cast(ptr), memorder); +} +inline bool __device__ __atomic_always_lock_free_simt(size_t size, void *ptr) { + return __atomic_always_lock_free_simt_(size, const_cast(ptr)); +} +inline bool __device__ __atomic_is_lock_free_simt(size_t size, void *ptr) { + return __atomic_is_lock_free_simt_(size, const_cast(ptr)); +} + +/* + volatile +*/ + +template type __device__ __atomic_load_n_simt(const volatile type *ptr, int memorder) { + return __atomic_load_n_simt_(const_cast(ptr), memorder); +} +template void __device__ __atomic_load_simt(const volatile type *ptr, type *ret, int memorder) { + __atomic_load_simt_(const_cast(ptr), ret, memorder); +} +template void __device__ __atomic_store_n_simt(volatile type *ptr, type val, int memorder) { + __atomic_store_n_simt_(const_cast(ptr), val, memorder); +} +template void __device__ __atomic_store_simt(volatile type *ptr, type *val, int memorder) { + __atomic_store_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_exchange_n_simt(volatile type *ptr, type val, int memorder) { + return __atomic_exchange_n_simt_(const_cast(ptr), val, memorder); +} +template void __device__ __atomic_exchange_simt(volatile type *ptr, type *val, type *ret, int memorder) { + __atomic_exchange_simt_(const_cast(ptr), val, ret, memorder); +} +template bool __device__ __atomic_compare_exchange_n_simt(volatile type *ptr, type *expected, type desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_n_simt_(const_cast(ptr), expected, desired, weak, success_memorder, failure_memorder); +} +template bool __device__ __atomic_compare_exchange_simt(volatile type *ptr, type *expected, type *desired, bool weak, int success_memorder, int failure_memorder) { + return __atomic_compare_exchange_simt_(const_cast(ptr), expected, desired, weak, success_memorder, failure_memorder); +} +template type __device__ __atomic_fetch_add_simt(volatile type *ptr, delta val, int memorder) { + return __atomic_fetch_add_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_sub_simt(volatile type *ptr, delta val, int memorder) { + return __atomic_fetch_sub_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_and_simt(volatile type *ptr, type val, int memorder) { + return __atomic_fetch_and_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_xor_simt(volatile type *ptr, type val, int memorder) { + return __atomic_fetch_xor_simt_(const_cast(ptr), val, memorder); +} +template type __device__ __atomic_fetch_or_simt(volatile type *ptr, type val, int memorder) { + return __atomic_fetch_or_simt_(const_cast(ptr), val, memorder); +} +template bool __device__ __atomic_test_and_set_simt(volatile void *ptr, int memorder) { + return __atomic_test_and_set_simt_(const_cast(ptr), memorder); +} +template void __device__ __atomic_clear_simt(volatile void *ptr, int memorder) { + return __atomic_clear_simt_(const_cast(ptr), memorder); +} + + + +} // end namespace Impl +} // end namespace Kokkos + +#endif //_SIMT_DETAILS_CONFIG + +#ifndef KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED +/* + builtins +*/ + +#define __atomic_load_n __atomic_load_n_simt +#define __atomic_load __atomic_load_simt +#define __atomic_store_n __atomic_store_n_simt +#define __atomic_store __atomic_store_simt +#define __atomic_exchange_n __atomic_exchange_n_simt +#define __atomic_exchange __atomic_exchange_simt +#define __atomic_compare_exchange_n __atomic_compare_exchange_n_simt +#define __atomic_compare_exchange __atomic_compare_exchange_simt +#define __atomic_fetch_add __atomic_fetch_add_simt +#define __atomic_fetch_sub __atomic_fetch_sub_simt +#define __atomic_fetch_and __atomic_fetch_and_simt +#define __atomic_fetch_xor __atomic_fetch_xor_simt +#define __atomic_fetch_or __atomic_fetch_or_simt +#define __atomic_test_and_set __atomic_test_and_set_simt +#define __atomic_clear __atomic_clear_simt +#define __atomic_always_lock_free __atomic_always_lock_free_simt +#define __atomic_is_lock_free __atomic_is_lock_free_simt +#define __atomic_thread_fence __atomic_thread_fence_simt +#define __atomic_signal_fence __atomic_signal_fence_simt + +#define KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED + +#endif //__CUDA_ARCH__ && KOKKOS_ENABLE_CUDA_ASM_ATOMICS +#endif // KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp new file mode 100644 index 0000000000..bedb147227 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Atomic_Intrinsics_Restore_Builtins.hpp @@ -0,0 +1,68 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifdef KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED + +#undef __atomic_load_n +#undef __atomic_load +#undef __atomic_store_n +#undef __atomic_store +#undef __atomic_exchange_n +#undef __atomic_exchange +#undef __atomic_compare_exchange_n +#undef __atomic_compare_exchange +#undef __atomic_fetch_add +#undef __atomic_fetch_sub +#undef __atomic_fetch_and +#undef __atomic_fetch_xor +#undef __atomic_fetch_or +#undef __atomic_test_and_set +#undef __atomic_clear +#undef __atomic_always_lock_free +#undef __atomic_is_lock_free +#undef __atomic_thread_fence +#undef __atomic_signal_fence + +#undef KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED + +#endif // KOKKOS_SIMT_ATOMIC_BUILTIN_REPLACEMENTS_DEFINED diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp similarity index 69% rename from lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp rename to lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp index 145d93ed76..932bde2b37 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp @@ -58,7 +58,68 @@ struct CudaGetMaxBlockSize; template int cuda_get_max_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { - return CudaGetMaxBlockSize::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread); + return CudaGetMaxBlockSize::get_block_size(f,vector_length, shmem_extra_block,shmem_extra_thread); +} + +template +int cuda_get_max_block_size(const CudaInternal* cuda_instance, const cudaFuncAttributes& attr, const FunctorType& f, const size_t vector_length, + const size_t shmem_block, const size_t shmem_thread) { + + const int min_blocks_per_sm = LaunchBounds::minBperSM == 0 ? + 1 : LaunchBounds::minBperSM ; + const int max_threads_per_block = LaunchBounds::maxTperB == 0 ? + cuda_instance->m_maxThreadsPerBlock : LaunchBounds::maxTperB ; + + const int regs_per_thread = attr.numRegs; + const int regs_per_sm = cuda_instance->m_regsPerSM; + const int shmem_per_sm = cuda_instance->m_shmemPerSM; + const int max_shmem_per_block = cuda_instance->m_maxShmemPerBlock; + const int max_blocks_per_sm = cuda_instance->m_maxBlocksPerSM; + const int max_threads_per_sm = cuda_instance->m_maxThreadsPerSM; + + int block_size = std::min(attr.maxThreadsPerBlock,max_threads_per_block); + + int functor_shmem = FunctorTeamShmemSize< FunctorType >::value( f , block_size/vector_length ); + int total_shmem = shmem_block + shmem_thread*(block_size/vector_length) + functor_shmem + attr.sharedSizeBytes; + int max_blocks_regs = regs_per_sm/(regs_per_thread*block_size); + int max_blocks_shmem = (total_shmem0?shmem_per_sm/total_shmem:max_blocks_regs):0; + int blocks_per_sm = std::min(max_blocks_regs,max_blocks_shmem); + int threads_per_sm = blocks_per_sm * block_size; + if(threads_per_sm > max_threads_per_sm) { + blocks_per_sm = max_threads_per_sm/block_size; + threads_per_sm = blocks_per_sm * block_size; + } + int opt_block_size = (blocks_per_sm>=min_blocks_per_sm) ? block_size : 0; + int opt_threads_per_sm = threads_per_sm; + //printf("BlockSizeMax: %i Shmem: %i %i %i %i Regs: %i %i Blocks: %i %i Achieved: %i %i Opt: %i %i\n",block_size, + // shmem_per_sm,max_shmem_per_block,functor_shmem,total_shmem, + // regs_per_sm,regs_per_thread,max_blocks_shmem,max_blocks_regs,blocks_per_sm,threads_per_sm,opt_block_size,opt_threads_per_sm); + block_size-=32; + while ((blocks_per_sm==0) && (block_size>=32)) { + functor_shmem = FunctorTeamShmemSize< FunctorType >::value( f , block_size/vector_length ); + total_shmem = shmem_block + shmem_thread*(block_size/vector_length) + functor_shmem + attr.sharedSizeBytes; + max_blocks_regs = regs_per_sm/(regs_per_thread*block_size); + max_blocks_shmem = (total_shmem0?shmem_per_sm/total_shmem:max_blocks_regs):0; + blocks_per_sm = std::min(max_blocks_regs,max_blocks_shmem); + threads_per_sm = blocks_per_sm * block_size; + if(threads_per_sm > max_threads_per_sm) { + blocks_per_sm = max_threads_per_sm/block_size; + threads_per_sm = blocks_per_sm * block_size; + } + if((blocks_per_sm >= min_blocks_per_sm) && (blocks_per_sm <= max_blocks_per_sm)) { + if(threads_per_sm>=opt_threads_per_sm) { + opt_block_size = block_size; + opt_threads_per_sm = threads_per_sm; + } + } + //printf("BlockSizeMax: %i Shmem: %i %i %i %i Regs: %i %i Blocks: %i %i Achieved: %i %i Opt: %i %i\n",block_size, + // shmem_per_sm,max_shmem_per_block,functor_shmem,total_shmem, + // regs_per_sm,regs_per_thread,max_blocks_shmem,max_blocks_regs,blocks_per_sm,threads_per_sm,opt_block_size,opt_threads_per_sm); + block_size-=32; + } + return opt_block_size; } @@ -241,11 +302,71 @@ struct CudaGetOptBlockSize; template int cuda_get_opt_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { - return CudaGetOptBlockSize::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread); + return CudaGetOptBlockSize::get_block_size(f,vector_length,shmem_extra_block,shmem_extra_thread); +} + +template +int cuda_get_opt_block_size(const CudaInternal* cuda_instance, const cudaFuncAttributes& attr, const FunctorType& f, const size_t vector_length, + const size_t shmem_block, const size_t shmem_thread) { + + const int min_blocks_per_sm = LaunchBounds::minBperSM == 0 ? + 1 : LaunchBounds::minBperSM ; + const int max_threads_per_block = LaunchBounds::maxTperB == 0 ? + cuda_instance->m_maxThreadsPerBlock : LaunchBounds::maxTperB ; + + const int regs_per_thread = attr.numRegs; + const int regs_per_sm = cuda_instance->m_regsPerSM; + const int shmem_per_sm = cuda_instance->m_shmemPerSM; + const int max_shmem_per_block = cuda_instance->m_maxShmemPerBlock; + const int max_blocks_per_sm = cuda_instance->m_maxBlocksPerSM; + const int max_threads_per_sm = cuda_instance->m_maxThreadsPerSM; + + int block_size = std::min(attr.maxThreadsPerBlock,max_threads_per_block); + + int functor_shmem = FunctorTeamShmemSize< FunctorType >::value( f , block_size/vector_length ); + int total_shmem = shmem_block + shmem_thread*(block_size/vector_length) + functor_shmem + attr.sharedSizeBytes; + int max_blocks_regs = regs_per_sm/(regs_per_thread*block_size); + int max_blocks_shmem = (total_shmem0?shmem_per_sm/total_shmem:max_blocks_regs):0; + int blocks_per_sm = std::min(max_blocks_regs,max_blocks_shmem); + int threads_per_sm = blocks_per_sm * block_size; + if(threads_per_sm > max_threads_per_sm) { + blocks_per_sm = max_threads_per_sm/block_size; + threads_per_sm = blocks_per_sm * block_size; + } + int opt_block_size = (blocks_per_sm>=min_blocks_per_sm) ? block_size : 0; + int opt_threads_per_sm = threads_per_sm; + + block_size-=32; + while ((block_size>=32)) { + functor_shmem = FunctorTeamShmemSize< FunctorType >::value( f , block_size/vector_length ); + total_shmem = shmem_block + shmem_thread*(block_size/vector_length) + functor_shmem + attr.sharedSizeBytes; + max_blocks_regs = regs_per_sm/(regs_per_thread*block_size); + max_blocks_shmem = (total_shmem0?shmem_per_sm/total_shmem:max_blocks_regs):0; + blocks_per_sm = std::min(max_blocks_regs,max_blocks_shmem); + threads_per_sm = blocks_per_sm * block_size; + if(threads_per_sm > max_threads_per_sm) { + blocks_per_sm = max_threads_per_sm/block_size; + threads_per_sm = blocks_per_sm * block_size; + } + if((blocks_per_sm >= min_blocks_per_sm) && (blocks_per_sm <= max_blocks_per_sm)) { + if(threads_per_sm>=opt_threads_per_sm) { + opt_block_size = block_size; + opt_threads_per_sm = threads_per_sm; + } + } + block_size-=32; + } + return opt_block_size; } template -struct CudaGetOptBlockSize,true> { +struct CudaGetOptBlockSize,true> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int blockSize=16; @@ -275,7 +396,7 @@ struct CudaGetOptBlockSize,true> { }; template -struct CudaGetOptBlockSize,false> { +struct CudaGetOptBlockSize,false> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int blockSize=16; @@ -305,7 +426,7 @@ struct CudaGetOptBlockSize,false> { }; template -struct CudaGetOptBlockSize,true> { +struct CudaGetOptBlockSize,true> { static int get_block_size(const typename DriverType::functor_type & f, const size_t vector_length, const size_t shmem_extra_block, const size_t shmem_extra_thread) { int blockSize=16; diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp similarity index 86% rename from lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp rename to lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp index 4fd7a9c69e..0ca9e3c160 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -50,7 +50,8 @@ #include #include -#include +#include +#include #include #include #include @@ -217,78 +218,6 @@ const CudaInternalDevices & CudaInternalDevices::singleton() } -//---------------------------------------------------------------------------- - -class CudaInternal { -private: - - CudaInternal( const CudaInternal & ); - CudaInternal & operator = ( const CudaInternal & ); - - -public: - - typedef Cuda::size_type size_type ; - - int m_cudaDev ; - int m_cudaArch ; - unsigned m_multiProcCount ; - unsigned m_maxWarpCount ; - unsigned m_maxBlock ; - unsigned m_maxSharedWords ; - uint32_t m_maxConcurrency ; - size_type m_scratchSpaceCount ; - size_type m_scratchFlagsCount ; - size_type m_scratchUnifiedCount ; - size_type m_scratchUnifiedSupported ; - size_type m_streamCount ; - size_type * m_scratchSpace ; - size_type * m_scratchFlags ; - size_type * m_scratchUnified ; - uint32_t * m_scratchConcurrentBitset ; - cudaStream_t * m_stream ; - - static int was_initialized; - static int was_finalized; - - static CudaInternal & singleton(); - - int verify_is_initialized( const char * const label ) const ; - - int is_initialized() const - { return 0 != m_scratchSpace && 0 != m_scratchFlags ; } - - void initialize( int cuda_device_id , int stream_count ); - void finalize(); - - void print_configuration( std::ostream & ) const ; - - ~CudaInternal(); - - CudaInternal() - : m_cudaDev( -1 ) - , m_cudaArch( -1 ) - , m_multiProcCount( 0 ) - , m_maxWarpCount( 0 ) - , m_maxBlock( 0 ) - , m_maxSharedWords( 0 ) - , m_maxConcurrency( 0 ) - , m_scratchSpaceCount( 0 ) - , m_scratchFlagsCount( 0 ) - , m_scratchUnifiedCount( 0 ) - , m_scratchUnifiedSupported( 0 ) - , m_streamCount( 0 ) - , m_scratchSpace( 0 ) - , m_scratchFlags( 0 ) - , m_scratchUnified( 0 ) - , m_scratchConcurrentBitset( 0 ) - , m_stream( 0 ) - {} - - size_type * scratch_space( const size_type size ); - size_type * scratch_flags( const size_type size ); - size_type * scratch_unified( const size_type size ); -}; int CudaInternal::was_initialized = 0; int CudaInternal::was_finalized = 0; @@ -366,8 +295,11 @@ CudaInternal & CudaInternal::singleton() static CudaInternal self ; return self ; } +void CudaInternal::fence() const { + cudaStreamSynchronize(m_stream); +} -void CudaInternal::initialize( int cuda_device_id , int stream_count ) +void CudaInternal::initialize( int cuda_device_id , cudaStream_t stream ) { if ( was_finalized ) Kokkos::abort("Calling Cuda::initialize after Cuda::finalize is illegal\n"); was_initialized = 1; @@ -454,6 +386,15 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) m_maxBlock = cudaProp.maxGridSize[0] ; + m_shmemPerSM = cudaProp.sharedMemPerMultiprocessor ; + m_maxShmemPerBlock = cudaProp.sharedMemPerBlock ; + m_regsPerSM = cudaProp.regsPerMultiprocessor ; + m_maxBlocksPerSM = m_cudaArch < 500 ? 16 : ( + m_cudaArch < 750 ? 32 : ( + m_cudaArch == 750 ? 16 : 32)); + m_maxThreadsPerSM = cudaProp.maxThreadsPerMultiProcessor ; + m_maxThreadsPerBlock = cudaProp.maxThreadsPerBlock ; + //---------------------------------- m_scratchUnifiedSupported = cudaProp.unifiedAddressing ; @@ -482,10 +423,9 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) // Concurrent bitset for obtaining unique tokens from within // an executing kernel. { - const unsigned max_threads_per_sm = 2048 ; // up to capability 7.0 m_maxConcurrency = - max_threads_per_sm * cudaProp.multiProcessorCount ; + m_maxThreadsPerSM * cudaProp.multiProcessorCount ; const int32_t buffer_bound = Kokkos::Impl::concurrent_bitset::buffer_bound( m_maxConcurrency ); @@ -507,11 +447,6 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) } //---------------------------------- - if ( stream_count ) { - m_stream = (cudaStream_t*) ::malloc( stream_count * sizeof(cudaStream_t) ); - m_streamCount = stream_count ; - for ( size_type i = 0 ; i < m_streamCount ; ++i ) m_stream[i] = 0 ; - } } else { @@ -539,7 +474,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) if( Kokkos::show_warnings() && !cuda_launch_blocking() ) { std::cerr << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl; std::cerr << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl; - std::cerr << " The code must call Cuda::fence() after each kernel" << std::endl; + std::cerr << " The code must call Cuda().fence() after each kernel" << std::endl; std::cerr << " or will likely crash when accessing data on the host." << std::endl; } @@ -568,7 +503,10 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) #endif // Init the array for used for arbitrarily sized atomics - Impl::initialize_host_cuda_lock_arrays(); + if(stream == 0) + Impl::initialize_host_cuda_lock_arrays(); + + m_stream = stream; } //---------------------------------------------------------------------------- @@ -578,7 +516,7 @@ enum { sizeScratchGrain = sizeof(ScratchGrain) }; Cuda::size_type * -CudaInternal::scratch_flags( const Cuda::size_type size ) +CudaInternal::scratch_flags( const Cuda::size_type size ) const { if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) { @@ -587,6 +525,9 @@ CudaInternal::scratch_flags( const Cuda::size_type size ) typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ; + if( m_scratchFlags ) + Record::decrement( Record::get_record( m_scratchFlags ) ); + Record * const r = Record::allocate( Kokkos::CudaSpace() , "InternalScratchFlags" , ( sizeof( ScratchGrain ) * m_scratchFlagsCount ) ); @@ -602,7 +543,7 @@ CudaInternal::scratch_flags( const Cuda::size_type size ) } Cuda::size_type * -CudaInternal::scratch_space( const Cuda::size_type size ) +CudaInternal::scratch_space( const Cuda::size_type size ) const { if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) { @@ -610,6 +551,9 @@ CudaInternal::scratch_space( const Cuda::size_type size ) typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ; + if( m_scratchSpace ) + Record::decrement( Record::get_record( m_scratchSpace ) ); + Record * const r = Record::allocate( Kokkos::CudaSpace() , "InternalScratchSpace" , ( sizeof( ScratchGrain ) * m_scratchSpaceCount ) ); @@ -623,7 +567,7 @@ CudaInternal::scratch_space( const Cuda::size_type size ) } Cuda::size_type * -CudaInternal::scratch_unified( const Cuda::size_type size ) +CudaInternal::scratch_unified( const Cuda::size_type size ) const { if ( verify_is_initialized("scratch_unified") && m_scratchUnifiedSupported && m_scratchUnifiedCount * sizeScratchGrain < size ) { @@ -632,6 +576,9 @@ CudaInternal::scratch_unified( const Cuda::size_type size ) typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void > Record ; + if( m_scratchUnified ) + Record::decrement( Record::get_record( m_scratchUnified ) ); + Record * const r = Record::allocate( Kokkos::CudaHostPinnedSpace() , "InternalScratchUnified" , ( sizeof( ScratchGrain ) * m_scratchUnifiedCount ) ); @@ -644,6 +591,31 @@ CudaInternal::scratch_unified( const Cuda::size_type size ) return m_scratchUnified ; } +Cuda::size_type * +CudaInternal::scratch_functor( const Cuda::size_type size ) const +{ + if ( verify_is_initialized("scratch_functor") && + m_scratchFunctorSize < size ) { + + m_scratchFunctorSize = size ; + + typedef Kokkos::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ; + + if( m_scratchFunctor ) + Record::decrement( Record::get_record( m_scratchFunctor ) ); + + Record * const r = Record::allocate( Kokkos::CudaSpace() + , "InternalScratchFunctor" + , m_scratchFunctorSize ); + + Record::increment( r ); + + m_scratchFunctor = reinterpret_cast( r->data() ); + } + + return m_scratchFunctor ; +} + //---------------------------------------------------------------------------- void CudaInternal::finalize() @@ -653,13 +625,7 @@ void CudaInternal::finalize() Impl::finalize_host_cuda_lock_arrays(); - if ( m_stream ) { - for ( size_type i = 1 ; i < m_streamCount ; ++i ) { - cudaStreamDestroy( m_stream[i] ); - m_stream[i] = 0 ; - } - ::free( m_stream ); - } + if(m_stream!=0) cudaStreamDestroy(m_stream); typedef Kokkos::Impl::SharedAllocationRecord< CudaSpace > RecordCuda ; typedef Kokkos::Impl::SharedAllocationRecord< CudaHostPinnedSpace > RecordHost ; @@ -668,6 +634,8 @@ void CudaInternal::finalize() RecordCuda::decrement( RecordCuda::get_record( m_scratchSpace ) ); RecordHost::decrement( RecordHost::get_record( m_scratchUnified ) ); RecordCuda::decrement( RecordCuda::get_record( m_scratchConcurrentBitset ) ); + if(m_scratchFunctorSize>0) + RecordCuda::decrement( RecordCuda::get_record( m_scratchFunctor ) ); m_cudaDev = -1 ; m_multiProcCount = 0 ; @@ -713,14 +681,14 @@ Cuda::size_type cuda_internal_maximum_grid_count() Cuda::size_type cuda_internal_maximum_shared_words() { return CudaInternal::singleton().m_maxSharedWords ; } -Cuda::size_type * cuda_internal_scratch_space( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_space( size ); } +Cuda::size_type * cuda_internal_scratch_space( const Cuda& instance, const Cuda::size_type size ) +{ return instance.impl_internal_space_instance()->scratch_space( size ); } -Cuda::size_type * cuda_internal_scratch_flags( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_flags( size ); } +Cuda::size_type * cuda_internal_scratch_flags( const Cuda& instance, const Cuda::size_type size ) +{ return instance.impl_internal_space_instance()->scratch_flags( size ); } -Cuda::size_type * cuda_internal_scratch_unified( const Cuda::size_type size ) -{ return CudaInternal::singleton().scratch_unified( size ); } +Cuda::size_type * cuda_internal_scratch_unified( const Cuda& instance, const Cuda::size_type size ) +{ return instance.impl_internal_space_instance()->scratch_unified( size ); } } // namespace Impl @@ -749,7 +717,7 @@ void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances ) void Cuda::impl_initialize( const Cuda::SelectDevice config , size_t num_instances ) #endif { - Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); + Impl::CudaInternal::singleton().initialize( config.cuda_device_id , 0 ); #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); @@ -800,19 +768,17 @@ void Cuda::impl_finalize() } Cuda::Cuda() - : m_device( Impl::CudaInternal::singleton().m_cudaDev ) - , m_stream( 0 ) + : m_space_instance( &Impl::CudaInternal::singleton() ) { Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" ); } -Cuda::Cuda( const int instance_id ) - : m_device( Impl::CudaInternal::singleton().m_cudaDev ) - , m_stream( - Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" ) - ? Impl::CudaInternal::singleton().m_stream[ instance_id % Impl::CudaInternal::singleton().m_streamCount ] - : 0 ) -{} +Cuda::Cuda(cudaStream_t stream) + : m_space_instance(new Impl::CudaInternal) +{ + Impl::CudaInternal::singleton().verify_is_initialized( "Cuda instance constructor" ); + m_space_instance->initialize(Impl::CudaInternal::singleton().m_cudaDev,stream); +} void Cuda::print_configuration( std::ostream & s , const bool ) { Impl::CudaInternal::singleton().print_configuration( s ); } @@ -823,13 +789,27 @@ bool Cuda::sleep() { return false ; } bool Cuda::wake() { return true ; } #endif -void Cuda::fence() +void Cuda::impl_static_fence() { Kokkos::Impl::cuda_device_synchronize(); } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE +void Cuda::fence() { + impl_static_fence(); +} +#else +void Cuda::fence() const { + m_space_instance->fence(); +} +#endif + const char* Cuda::name() { return "Cuda"; } +cudaStream_t Cuda::cuda_stream() const { return m_space_instance->m_stream ; } +int Cuda::cuda_device() const { return m_space_instance->m_cudaDev ; } + + } // namespace Kokkos namespace Kokkos { diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp new file mode 100644 index 0000000000..f9e333fcf0 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Instance.hpp @@ -0,0 +1,156 @@ +#ifndef KOKKOS_CUDA_INSTANCE_HPP_ +#define KOKKOS_CUDA_INSTANCE_HPP_ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct CudaTraits { + enum { WarpSize = 32 /* 0x0020 */ }; + enum { WarpIndexMask = 0x001f /* Mask for warpindex */ }; + enum { WarpIndexShift = 5 /* WarpSize == 1 << WarpShift */ }; + + enum { ConstantMemoryUsage = 0x008000 /* 32k bytes */ }; + enum { ConstantMemoryCache = 0x002000 /* 8k bytes */ }; + enum { KernelArgumentLimit = 0x001000 /* 4k bytes */ }; + + typedef unsigned long + ConstantGlobalBufferType[ ConstantMemoryUsage / sizeof(unsigned long) ]; + +#if defined(KOKKOS_ARCH_VOLTA) || \ + defined(KOKKOS_ARCH_PASCAL) + enum { ConstantMemoryUseThreshold = 0x000200 /* 0 bytes -> always use constant (or global)*/ }; +#else + enum { ConstantMemoryUseThreshold = 0x000200 /* 512 bytes */ }; +#endif + + KOKKOS_INLINE_FUNCTION static + CudaSpace::size_type warp_count( CudaSpace::size_type i ) + { return ( i + WarpIndexMask ) >> WarpIndexShift ; } + + KOKKOS_INLINE_FUNCTION static + CudaSpace::size_type warp_align( CudaSpace::size_type i ) + { + enum { Mask = ~CudaSpace::size_type( WarpIndexMask ) }; + return ( i + WarpIndexMask ) & Mask ; + } +}; + +//---------------------------------------------------------------------------- + +CudaSpace::size_type cuda_internal_multiprocessor_count(); +CudaSpace::size_type cuda_internal_maximum_warp_count(); +CudaSpace::size_type cuda_internal_maximum_grid_count(); +CudaSpace::size_type cuda_internal_maximum_shared_words(); + +CudaSpace::size_type cuda_internal_maximum_concurrent_block_count(); + +CudaSpace::size_type * cuda_internal_scratch_flags( const Cuda&, const CudaSpace::size_type size ); +CudaSpace::size_type * cuda_internal_scratch_space( const Cuda&, const CudaSpace::size_type size ); +CudaSpace::size_type * cuda_internal_scratch_unified( const Cuda&, const CudaSpace::size_type size ); + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +namespace Kokkos { +namespace Impl { + +class CudaInternal { +private: + + CudaInternal( const CudaInternal & ); + CudaInternal & operator = ( const CudaInternal & ); + + +public: + + typedef Cuda::size_type size_type ; + + int m_cudaDev ; + + // Device Properties + int m_cudaArch ; + unsigned m_multiProcCount ; + unsigned m_maxWarpCount ; + unsigned m_maxBlock ; + unsigned m_maxSharedWords ; + uint32_t m_maxConcurrency ; + int m_shmemPerSM ; + int m_maxShmemPerBlock ; + int m_regsPerSM ; + int m_maxBlocksPerSM ; + int m_maxThreadsPerSM ; + int m_maxThreadsPerBlock ; + + mutable size_type m_scratchSpaceCount ; + mutable size_type m_scratchFlagsCount ; + mutable size_type m_scratchUnifiedCount ; + mutable size_type m_scratchFunctorSize ; + size_type m_scratchUnifiedSupported ; + size_type m_streamCount ; + mutable size_type * m_scratchSpace ; + mutable size_type * m_scratchFlags ; + mutable size_type * m_scratchUnified ; + mutable size_type * m_scratchFunctor ; + uint32_t * m_scratchConcurrentBitset ; + cudaStream_t m_stream ; + + static int was_initialized; + static int was_finalized; + + static CudaInternal & singleton(); + + int verify_is_initialized( const char * const label ) const ; + + int is_initialized() const + { return 0 != m_scratchSpace && 0 != m_scratchFlags ; } + + void initialize( int cuda_device_id , cudaStream_t stream = 0 ); + void finalize(); + + void print_configuration( std::ostream & ) const ; + + void fence() const ; + + ~CudaInternal(); + + CudaInternal() + : m_cudaDev( -1 ) + , m_cudaArch( -1 ) + , m_multiProcCount( 0 ) + , m_maxWarpCount( 0 ) + , m_maxBlock( 0 ) + , m_maxSharedWords( 0 ) + , m_maxConcurrency( 0 ) + , m_shmemPerSM( 0 ) + , m_maxShmemPerBlock( 0 ) + , m_regsPerSM( 0 ) + , m_maxBlocksPerSM( 0 ) + , m_maxThreadsPerSM( 0 ) + , m_maxThreadsPerBlock( 0 ) + , m_scratchSpaceCount( 0 ) + , m_scratchFlagsCount( 0 ) + , m_scratchUnifiedCount( 0 ) + , m_scratchFunctorSize( 0 ) + , m_scratchUnifiedSupported( 0 ) + , m_streamCount( 0 ) + , m_scratchSpace( 0 ) + , m_scratchFlags( 0 ) + , m_scratchUnified( 0 ) + , m_scratchFunctor( 0 ) + , m_scratchConcurrentBitset( 0 ) + , m_stream( 0 ) + {} + + size_type * scratch_space( const size_type size ) const ; + size_type * scratch_flags( const size_type size ) const ; + size_type * scratch_unified( const size_type size ) const ; + size_type * scratch_functor( const size_type size ) const ; +}; + +} // Namespace Impl +} // Namespace Kokkos +#endif diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp new file mode 100644 index 0000000000..2ec868c1f1 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp @@ -0,0 +1,579 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDAEXEC_HPP +#define KOKKOS_CUDAEXEC_HPP + +#include +#ifdef KOKKOS_ENABLE_CUDA + +#include +#include +#include +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#if defined( __CUDACC__ ) + +/** \brief Access to constant memory on the device */ +#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE + +__device__ __constant__ +extern unsigned long kokkos_impl_cuda_constant_memory_buffer[] ; + +#else + +__device__ __constant__ +unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ; + +#endif + +namespace Kokkos { +namespace Impl { + void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink = false); +} +} + +template< typename T > +inline +__device__ +T * kokkos_impl_cuda_shared_memory() +{ extern __shared__ Kokkos::CudaSpace::size_type sh[]; return (T*) sh ; } + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +// See section B.17 of Cuda C Programming Guide Version 3.2 +// for discussion of +// __launch_bounds__(maxThreadsPerBlock,minBlocksPerMultiprocessor) +// function qualifier which could be used to improve performance. +//---------------------------------------------------------------------------- +// Maximize L1 cache and minimize shared memory: +// cudaFuncSetCacheConfig(MyKernel, cudaFuncCachePreferL1 ); +// For 2.0 capability: 48 KB L1 and 16 KB shared +//---------------------------------------------------------------------------- + +template< class DriverType> +__global__ +static void cuda_parallel_launch_constant_memory() +{ + const DriverType & driver = + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > +__global__ +__launch_bounds__(maxTperB, minBperSM) +static void cuda_parallel_launch_constant_memory() +{ + const DriverType & driver = + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType> +__global__ +static void cuda_parallel_launch_local_memory( const DriverType driver ) +{ + driver(); +} + +template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > +__global__ +__launch_bounds__(maxTperB, minBperSM) +static void cuda_parallel_launch_local_memory( const DriverType driver ) +{ + driver(); +} + +template< class DriverType> +__global__ +static void cuda_parallel_launch_global_memory( const DriverType* driver ) +{ + driver->operator()(); +} + +template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > +__global__ +__launch_bounds__(maxTperB, minBperSM) +static void cuda_parallel_launch_global_memory( const DriverType* driver ) +{ + driver->operator()(); +} + +template< class DriverType> +__global__ +static void cuda_parallel_launch_constant_or_global_memory( const DriverType* driver_ptr ) +{ + const DriverType & driver = driver_ptr!=NULL ? *driver_ptr : + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType, unsigned int maxTperB, unsigned int minBperSM > +__global__ +__launch_bounds__(maxTperB, minBperSM) +static void cuda_parallel_launch_constant_or_global_memory( const DriverType* driver_ptr ) +{ + const DriverType & driver = driver_ptr!=NULL ? *driver_ptr : + *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer ); + + driver(); +} + +template< class DriverType > +struct DeduceCudaLaunchMechanism { + constexpr static const Kokkos::Experimental::WorkItemProperty::HintLightWeight_t light_weight = Kokkos::Experimental::WorkItemProperty::HintLightWeight; + constexpr static const Kokkos::Experimental::WorkItemProperty::HintHeavyWeight_t heavy_weight = Kokkos::Experimental::WorkItemProperty::HintHeavyWeight ; + constexpr static const typename DriverType::Policy::work_item_property property = typename DriverType::Policy::work_item_property(); + + static constexpr const Experimental::CudaLaunchMechanism valid_launch_mechanism = + // BuildValidMask + (sizeof(DriverType) + , Experimental::CudaLaunchMechanism LaunchMechanism = + DeduceCudaLaunchMechanism::launch_mechanism > +struct CudaParallelLaunch ; + +template < class DriverType + , unsigned int MaxThreadsPerBlock + , unsigned int MinBlocksPerSM> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds< MaxThreadsPerBlock + , MinBlocksPerSM > + , Experimental::CudaLaunchMechanism::ConstantMemory> +{ + static_assert(sizeof(DriverType)m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_constant_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + // Copy functor to constant memory on the device + cudaMemcpyToSymbolAsync( + kokkos_impl_cuda_constant_memory_buffer, &driver, sizeof(DriverType), 0, cudaMemcpyHostToDevice, cudaStream_t(cuda_instance->m_stream)); + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + // Invoke the driver function on the device + cuda_parallel_launch_constant_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + <<< grid , block , shmem , cuda_instance->m_stream >>>(); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_constant_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM >); + return attr; + } +}; + +template < class DriverType> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds<0,0> + , Experimental::CudaLaunchMechanism::ConstantMemory > +{ + static_assert(sizeof(DriverType)m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_constant_memory< DriverType > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + // Copy functor to constant memory on the device + cudaMemcpyToSymbolAsync( + kokkos_impl_cuda_constant_memory_buffer, &driver, sizeof(DriverType), 0, cudaMemcpyHostToDevice, cudaStream_t(cuda_instance->m_stream)); + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + // Invoke the driver function on the device + cuda_parallel_launch_constant_memory< DriverType > + <<< grid , block , shmem , cuda_instance->m_stream >>>(); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_constant_memory + < DriverType >); + return attr; + } +}; + +template < class DriverType + , unsigned int MaxThreadsPerBlock + , unsigned int MinBlocksPerSM > +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds< MaxThreadsPerBlock + , MinBlocksPerSM > + , Experimental::CudaLaunchMechanism::LocalMemory > +{ + static_assert(sizeof(DriverType)m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_local_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + // Invoke the driver function on the device + cuda_parallel_launch_local_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + <<< grid , block , shmem , cuda_instance->m_stream >>>( driver ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_local_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM >); + return attr; + } +}; + +template < class DriverType> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds<0,0> + , Experimental::CudaLaunchMechanism::LocalMemory > +{ + static_assert(sizeof(DriverType)m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_local_memory< DriverType > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + // Invoke the driver function on the device + cuda_parallel_launch_local_memory< DriverType > + <<< grid , block , shmem , cuda_instance->m_stream >>>( driver ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_local_memory + < DriverType >); + return attr; + } +}; + +template < class DriverType + , unsigned int MaxThreadsPerBlock + , unsigned int MinBlocksPerSM> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds< MaxThreadsPerBlock + , MinBlocksPerSM> + , Experimental::CudaLaunchMechanism::GlobalMemory > +{ + inline + CudaParallelLaunch( const DriverType & driver + , const dim3 & grid + , const dim3 & block + , const int shmem + , CudaInternal* cuda_instance + , const bool prefer_shmem ) + { + if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { + + if ( cuda_instance->m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_global_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + DriverType* driver_ptr = NULL; + driver_ptr = reinterpret_cast(cuda_instance->scratch_functor(sizeof(DriverType))); + cudaMemcpyAsync(driver_ptr,&driver, sizeof(DriverType), cudaMemcpyDefault, cuda_instance->m_stream); + + // Invoke the driver function on the device + cuda_parallel_launch_global_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM > + <<< grid , block , shmem , cuda_instance->m_stream >>>( driver_ptr ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_global_memory + < DriverType, MaxThreadsPerBlock, MinBlocksPerSM >); + return attr; + } + +}; + +template < class DriverType> +struct CudaParallelLaunch< DriverType + , Kokkos::LaunchBounds<0,0> + , Experimental::CudaLaunchMechanism::GlobalMemory > +{ + inline + CudaParallelLaunch( const DriverType & driver + , const dim3 & grid + , const dim3 & block + , const int shmem + , CudaInternal* cuda_instance + , const bool prefer_shmem) + { + if ( (grid.x != 0) && ( ( block.x * block.y * block.z ) != 0 ) ) { + + if ( cuda_instance->m_maxShmemPerBlock < shmem ) { + Kokkos::Impl::throw_runtime_exception( std::string("CudaParallelLaunch FAILED: shared memory request is too large") ); + } + #ifndef KOKKOS_ARCH_KEPLER + // On Kepler the L1 has no benefit since it doesn't cache reads + else { + CUDA_SAFE_CALL( + cudaFuncSetCacheConfig + ( cuda_parallel_launch_global_memory< DriverType > + , ( prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1 ) + ) ); + } + #endif + + KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); + + DriverType* driver_ptr = NULL; + driver_ptr = reinterpret_cast(cuda_instance->scratch_functor(sizeof(DriverType))); + cudaMemcpyAsync(driver_ptr,&driver, sizeof(DriverType), cudaMemcpyDefault, cuda_instance->m_stream); + + cuda_parallel_launch_global_memory< DriverType > + <<< grid , block , shmem , cuda_instance->m_stream >>>( driver_ptr ); + +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + CUDA_SAFE_CALL( cudaGetLastError() ); + Kokkos::Cuda().fence(); +#endif + } + } + + static cudaFuncAttributes get_cuda_func_attributes() { + cudaFuncAttributes attr; + cudaFuncGetAttributes(&attr,cuda_parallel_launch_global_memory + < DriverType >); + return attr; + } +}; +//---------------------------------------------------------------------------- + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined( __CUDACC__ ) */ +#endif /* defined( KOKKOS_ENABLE_CUDA ) */ +#endif /* #ifndef KOKKOS_CUDAEXEC_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index 665d0732a7..c05fbcc6c1 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -55,9 +55,9 @@ #include #include -#include +#include #include -#include +#include #include #include #include @@ -73,6 +73,9 @@ //---------------------------------------------------------------------------- namespace Kokkos { + +extern bool show_warnings() noexcept; + namespace Impl { template< class ... Properties > @@ -85,10 +88,14 @@ public: typedef PolicyTraits traits; + template< class ExecSpace, class ... OtherProperties > + friend class TeamPolicyInternal; + private: enum { MAX_WARP = 8 }; + typename traits::execution_space m_space; int m_league_size ; int m_team_size ; int m_vector_length ; @@ -101,6 +108,19 @@ public: //! Execution space of this execution policy typedef Kokkos::Cuda execution_space ; + template + TeamPolicyInternal( const TeamPolicyInternal& p ) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + m_space = p.m_space; + } + TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { m_league_size = p.m_league_size; m_team_size = p.m_team_size; @@ -110,6 +130,7 @@ public: m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; m_chunk_size = p.m_chunk_size; + m_space = p.m_space; return *this; } @@ -117,7 +138,7 @@ public: #ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > - inline static + static inline int team_size_max( const FunctorType & functor ) { int n = MAX_WARP * Impl::CudaTraits::WarpSize ; @@ -128,7 +149,7 @@ public: /* for team reduce */ + ( n + 2 ) * sizeof(double) /* for team shared */ + Impl::FunctorTeamShmemSize< FunctorType >::value( functor , n ); - if ( shmem_size < Impl::CudaTraits::SharedMemoryCapacity ) break ; + if ( shmem_size < typename traits::execution_space().impl_internal_space_instance()->m_maxShmemPerBlock ) break ; } return n ; @@ -138,7 +159,10 @@ public: template int team_size_max( const FunctorType& f, const ParallelForTag& ) const { typedef Impl::ParallelFor< FunctorType , TeamPolicy > closure_type; - int block_size = Kokkos::Impl::cuda_get_max_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + cudaFuncAttributes attr = CudaParallelLaunch< closure_type, typename traits::launch_bounds >:: + get_cuda_func_attributes(); + int block_size = Kokkos::Impl::cuda_get_max_block_size< FunctorType, typename traits::launch_bounds >( + space().impl_internal_space_instance(),attr,f ,(size_t) vector_length(), (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) ); return block_size/vector_length(); } @@ -150,7 +174,10 @@ public: typedef Impl::ParallelReduce< FunctorType , TeamPolicy, reducer_type > closure_type; typedef Impl::FunctorValueTraits< FunctorType , typename traits::work_tag > functor_value_traits; - int block_size = Kokkos::Impl::cuda_get_max_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + cudaFuncAttributes attr = CudaParallelLaunch< closure_type, typename traits::launch_bounds >:: + get_cuda_func_attributes(); + int block_size = Kokkos::Impl::cuda_get_max_block_size< FunctorType, typename traits::launch_bounds >( + space().impl_internal_space_instance(),attr,f ,(size_t) vector_length(), (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) + ((functor_value_traits::StaticValueSize!=0)?0:functor_value_traits::value_size( f ))); @@ -178,7 +205,11 @@ public: template int team_size_recommended( const FunctorType& f, const ParallelForTag& ) const { typedef Impl::ParallelFor< FunctorType , TeamPolicy > closure_type; - int block_size = Kokkos::Impl::cuda_get_opt_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + cudaFuncAttributes attr = CudaParallelLaunch< closure_type, typename traits::launch_bounds >:: + get_cuda_func_attributes(); + const int block_size = Kokkos::Impl::cuda_get_opt_block_size< FunctorType, typename traits::launch_bounds>( + space().impl_internal_space_instance(), + attr, f , (size_t) vector_length(), (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double)); return block_size/vector_length(); } @@ -190,10 +221,18 @@ public: typedef Impl::ParallelReduce< FunctorType , TeamPolicy, reducer_type > closure_type; typedef Impl::FunctorValueTraits< FunctorType , typename traits::work_tag > functor_value_traits; - int block_size = Kokkos::Impl::cuda_get_opt_block_size< closure_type, typename traits::launch_bounds >( f ,(size_t) vector_length(), + cudaFuncAttributes attr = CudaParallelLaunch< closure_type, typename traits::launch_bounds >:: + get_cuda_func_attributes(); + const int block_size = Kokkos::Impl::cuda_get_opt_block_size< FunctorType, typename traits::launch_bounds>( + space().impl_internal_space_instance(), + attr, f , (size_t) vector_length(), (size_t) team_scratch_size(0) + 2*sizeof(double), (size_t) thread_scratch_size(0) + sizeof(double) + ((functor_value_traits::StaticValueSize!=0)?0:functor_value_traits::value_size( f ))); - return block_size/vector_length(); + // Currently we require Power-of-2 team size for reductions. + int p2 = 1; + while(p2<=block_size) p2*=2; + p2/=2; + return p2/vector_length(); } @@ -201,6 +240,25 @@ public: int vector_length_max() { return Impl::CudaTraits::WarpSize; } + inline static + int verify_requested_vector_length( int requested_vector_length ) { + int test_vector_length = std::min( requested_vector_length, vector_length_max() ); + + // Allow only power-of-two vector_length + if ( !(is_integral_power_of_two( test_vector_length ) ) ) { + int test_pow2 = 1; + for (int i = 0; i < 5; i++) { + test_pow2 = test_pow2 << 1; + if (test_pow2 > test_vector_length) { + break; + } + } + test_vector_length = test_pow2 >> 1; + } + + return test_vector_length; + } + inline static int scratch_size_max(int level) { return (level==0? @@ -224,9 +282,14 @@ public: return m_thread_scratch_size[level]; } + inline typename traits::execution_space space() const { + return m_space; + } + TeamPolicyInternal() - : m_league_size( 0 ) - , m_team_size( 0 ) + : m_space(typename traits::execution_space()) + , m_league_size( 0 ) + , m_team_size( -1 ) , m_vector_length( 0 ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} @@ -234,22 +297,18 @@ public: {} /** \brief Specify league size, request team size */ - TeamPolicyInternal( execution_space & + TeamPolicyInternal( const execution_space space_ , int league_size_ , int team_size_request , int vector_length_request = 1 ) - : m_league_size( league_size_ ) + : m_space( space_ ) + , m_league_size( league_size_ ) , m_team_size( team_size_request ) - , m_vector_length( vector_length_request ) + , m_vector_length( verify_requested_vector_length(vector_length_request) ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} , m_chunk_size ( 32 ) { - // Allow only power-of-two vector_length - if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { - Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); - } - // Make sure league size is permissable if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); @@ -261,22 +320,18 @@ public: } /** \brief Specify league size, request team size */ - TeamPolicyInternal( execution_space & + TeamPolicyInternal( const execution_space space_ , int league_size_ , const Kokkos::AUTO_t & /* team_size_request */ , int vector_length_request = 1 ) - : m_league_size( league_size_ ) + : m_space( space_ ) + , m_league_size( league_size_ ) , m_team_size( -1 ) - , m_vector_length( vector_length_request ) + , m_vector_length( verify_requested_vector_length(vector_length_request) ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} , m_chunk_size ( 32 ) { - // Allow only power-of-two vector_length - if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { - Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); - } - // Make sure league size is permissable if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); @@ -285,18 +340,14 @@ public: TeamPolicyInternal( int league_size_ , int team_size_request , int vector_length_request = 1 ) - : m_league_size( league_size_ ) + : m_space( typename traits::execution_space() ) + , m_league_size( league_size_ ) , m_team_size( team_size_request ) - , m_vector_length ( vector_length_request ) + , m_vector_length ( verify_requested_vector_length(vector_length_request) ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} , m_chunk_size ( 32 ) { - // Allow only power-of-two vector_length - if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { - Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); - } - // Make sure league size is permissable if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); @@ -310,18 +361,14 @@ public: TeamPolicyInternal( int league_size_ , const Kokkos::AUTO_t & /* team_size_request */ , int vector_length_request = 1 ) - : m_league_size( league_size_ ) + : m_space( typename traits::execution_space() ) + , m_league_size( league_size_ ) , m_team_size( -1 ) - , m_vector_length ( vector_length_request ) + , m_vector_length ( verify_requested_vector_length(vector_length_request) ) , m_team_scratch_size {0,0} , m_thread_scratch_size {0,0} , m_chunk_size ( 32 ) { - // Allow only power-of-two vector_length - if ( ! Kokkos::Impl::is_integral_power_of_two( vector_length_request ) ) { - Impl::throw_runtime_exception( "Requested non-power-of-two vector length for TeamPolicy."); - } - // Make sure league size is permissable if(league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution space."); @@ -431,9 +478,10 @@ class ParallelFor< FunctorType , Kokkos::Cuda > { +public: + typedef Kokkos::RangePolicy< Traits ... > Policy; private: - typedef Kokkos::RangePolicy< Traits ... > Policy; typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::launch_bounds LaunchBounds ; @@ -479,11 +527,17 @@ public: void execute() const { const typename Policy::index_type nwork = m_policy.end() - m_policy.begin(); - const int block_size = Kokkos::Impl::cuda_get_opt_block_size< ParallelFor, LaunchBounds>( m_functor , 1, 0 , 0 ); - const dim3 block( 1 , block_size , 1); - const dim3 grid( std::min( typename Policy::index_type(( nwork + block.y - 1 ) / block.y) , typename Policy::index_type(cuda_internal_maximum_grid_count()) ) , 1 , 1); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + cudaFuncAttributes attr = CudaParallelLaunch< ParallelFor, LaunchBounds >:: + get_cuda_func_attributes(); + const int block_size = Kokkos::Impl::cuda_get_opt_block_size< FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), + attr, m_functor , 1, 0 , 0 ); + const dim3 block( 1 , block_size , 1); + const dim3 grid( std::min( typename Policy::index_type(( nwork + block.y - 1 ) / block.y) , + typename Policy::index_type(cuda_internal_maximum_grid_count()) ) , 1 , 1); + + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_policy.space().impl_internal_space_instance() , false ); } ParallelFor( const FunctorType & arg_functor , @@ -491,6 +545,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) { } + }; @@ -501,8 +556,9 @@ class ParallelFor< FunctorType , Kokkos::Cuda > { -private: +public: typedef Kokkos::MDRangePolicy< Traits ... > Policy ; +private: using RP = Policy; typedef typename Policy::array_index_type array_index_type; typedef typename Policy::index_type index_type; @@ -526,7 +582,7 @@ public: void execute() const { if(m_rp.m_num_tiles==0) return; - const array_index_type maxblocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount); + const array_index_type maxblocks = static_cast(m_rp.space().impl_internal_space_instance()->m_maxBlock); if ( RP::rank == 2 ) { const dim3 block( m_rp.m_tile[0] , m_rp.m_tile[1] , 1); @@ -535,7 +591,7 @@ public: , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks ) , 1 ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else if ( RP::rank == 3 ) { @@ -545,7 +601,7 @@ public: , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks ) , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1 ) / block.z , maxblocks ) ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else if ( RP::rank == 4 ) { @@ -557,7 +613,7 @@ public: , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1 ) / block.y , maxblocks ) , std::min( ( m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1 ) / block.z , maxblocks ) ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else if ( RP::rank == 5 ) { @@ -570,7 +626,7 @@ public: , static_cast(maxblocks) ) , std::min( ( m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1 ) / block.z , maxblocks ) ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else if ( RP::rank == 6 ) { @@ -584,7 +640,7 @@ public: , std::min( static_cast( m_rp.m_tile_end[4] * m_rp.m_tile_end[5] ) , static_cast(maxblocks) ) ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 ); + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 , m_rp.space().impl_internal_space_instance() , false ); } else { @@ -609,9 +665,10 @@ class ParallelFor< FunctorType , Kokkos::Cuda > { +public: + typedef TeamPolicyInternal< Kokkos::Cuda , Properties ... > Policy ; private: - typedef TeamPolicyInternal< Kokkos::Cuda , Properties ... > Policy ; typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::launch_bounds LaunchBounds ; @@ -631,13 +688,14 @@ private: // const FunctorType m_functor ; + const Policy m_policy ; const size_type m_league_size ; - const size_type m_team_size ; + int m_team_size ; const size_type m_vector_size ; - const int m_shmem_begin ; - const int m_shmem_size ; + int m_shmem_begin ; + int m_shmem_size ; void* m_scratch_ptr[2] ; - const int m_scratch_size[2] ; + int m_scratch_size[2] ; template< class TagType > __device__ inline @@ -705,7 +763,7 @@ public: const dim3 grid( int(m_league_size) , 1 , 1 ); const dim3 block( int(m_vector_size) , int(m_team_size) , 1 ); - CudaParallelLaunch< ParallelFor, LaunchBounds >( *this, grid, block, shmem_size_total ); // copy to device and execute + CudaParallelLaunch< ParallelFor, LaunchBounds >( *this, grid, block, shmem_size_total, m_policy.space().impl_internal_space_instance() , true ); // copy to device and execute } @@ -713,26 +771,37 @@ public: , const Policy & arg_policy ) : m_functor( arg_functor ) + , m_policy( arg_policy ) , m_league_size( arg_policy.league_size() ) - , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelFor, LaunchBounds >( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length() ) + , m_team_size( arg_policy.team_size() ) , m_vector_size( arg_policy.vector_length() ) - , m_shmem_begin( sizeof(double) * ( m_team_size + 2 ) ) - , m_shmem_size( arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( m_functor , m_team_size ) ) - , m_scratch_ptr{NULL,NULL} - , m_scratch_size{arg_policy.scratch_size(0,m_team_size),arg_policy.scratch_size(1,m_team_size)} { + cudaFuncAttributes attr = CudaParallelLaunch< ParallelFor, LaunchBounds >:: + get_cuda_func_attributes(); + m_team_size = m_team_size>=0?m_team_size:Kokkos::Impl::cuda_get_opt_block_size< FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), + attr, m_functor , m_vector_size, + m_policy.team_scratch_size(0), m_policy.thread_scratch_size(0) )/m_vector_size; + + m_shmem_begin = ( sizeof(double) * ( m_team_size + 2 ) ); + m_shmem_size = ( m_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( m_functor , m_team_size ) ); + m_scratch_size[0] = m_policy.scratch_size(0,m_team_size); + m_scratch_size[1] = m_policy.scratch_size(1,m_team_size); + // Functor's reduce memory, team scan memory, and team shared memory depend upon team size. - m_scratch_ptr[1] = cuda_resize_scratch_space(m_scratch_size[1]*(Cuda::concurrency()/(m_team_size*m_vector_size))); + m_scratch_ptr[0] = NULL; + m_scratch_ptr[1] = m_team_size<=0?NULL:cuda_resize_scratch_space(static_cast(m_scratch_size[1])*static_cast(Cuda::concurrency()/(m_team_size*m_vector_size))); const int shmem_size_total = m_shmem_begin + m_shmem_size ; - if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + if ( m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size_total ) { + printf("%i %i\n",m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock,shmem_size_total); Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > insufficient shared memory")); } if ( int(m_team_size) > - int(Kokkos::Impl::cuda_get_max_block_size< ParallelFor, LaunchBounds > - ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { + int(Kokkos::Impl::cuda_get_max_block_size< FunctorType, LaunchBounds > + ( m_policy.space().impl_internal_space_instance(), + attr, arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelFor< Cuda > requested too large team size.")); } } @@ -754,9 +823,10 @@ class ParallelReduce< FunctorType , Kokkos::Cuda > { +public: + typedef Kokkos::RangePolicy< Traits ... > Policy ; private: - typedef Kokkos::RangePolicy< Traits ... > Policy ; typedef typename Policy::WorkRange WorkRange ; typedef typename Policy::work_tag WorkTag ; @@ -897,11 +967,16 @@ public: }*/ // Determine block size constrained by shared memory: - static inline + inline unsigned local_block_size( const FunctorType & f ) { unsigned n = CudaTraits::WarpSize * 8 ; - while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } + int shmem_size = cuda_single_inter_block_reduce_scan_shmem( f , n ); + while ( (n && (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size)) || + (n > static_cast(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce, LaunchBounds>( f , 1, shmem_size , 0 )))) { + n >>= 1 ; + shmem_size = cuda_single_inter_block_reduce_scan_shmem( f , n ); + } return n ; } @@ -912,9 +987,9 @@ public: if ( nwork ) { const int block_size = local_block_size( m_functor ); - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + m_scratch_space = cuda_internal_scratch_space( m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); + m_scratch_flags = cuda_internal_scratch_flags( m_policy.space(), sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); // REQUIRED ( 1 , N , 1 ) const dim3 block( 1 , block_size , 1 ); @@ -923,10 +998,10 @@ public: const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( m_functor , block.y ); - CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute if(!m_result_ptr_device_accessible) { - Cuda::fence(); + Cuda().fence(); if ( m_result_ptr ) { if ( m_unified_space ) { @@ -987,9 +1062,10 @@ class ParallelReduce< FunctorType , Kokkos::Cuda > { +public: + typedef Kokkos::MDRangePolicy< Traits ... > Policy ; private: - typedef Kokkos::MDRangePolicy< Traits ... > Policy ; typedef typename Policy::array_index_type array_index_type; typedef typename Policy::index_type index_type; @@ -1121,11 +1197,16 @@ public: } */ // Determine block size constrained by shared memory: - static inline + inline unsigned local_block_size( const FunctorType & f ) { unsigned n = CudaTraits::WarpSize * 8 ; - while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } + int shmem_size = cuda_single_inter_block_reduce_scan_shmem( f , n ); + while ( (n && (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size)) || + (n > static_cast(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce, LaunchBounds>( f , 1, shmem_size , 0 )))) { + n >>= 1 ; + shmem_size = cuda_single_inter_block_reduce_scan_shmem( f , n ); + } return n ; } @@ -1144,9 +1225,9 @@ public: block_size = (block_size > suggested_blocksize) ? block_size : suggested_blocksize ; //Note: block_size must be less than or equal to 512 - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + m_scratch_space = cuda_internal_scratch_space( m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ ); + m_scratch_flags = cuda_internal_scratch_flags( m_policy.space(), sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); // REQUIRED ( 1 , N , 1 ) const dim3 block( 1 , block_size , 1 ); @@ -1155,10 +1236,10 @@ public: const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( m_functor , block.y ); - CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute if(!m_result_ptr_device_accessible) { - Cuda::fence(); + Cuda().fence(); if ( m_result_ptr ) { if ( m_unified_space ) { @@ -1213,8 +1294,6 @@ public: //---------------------------------------------------------------------------- -#if 1 - template< class FunctorType , class ReducerType, class ... Properties > class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Properties ... > @@ -1222,9 +1301,10 @@ class ParallelReduce< FunctorType , Kokkos::Cuda > { +public: + typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... > Policy ; private: - typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... > Policy ; typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::launch_bounds LaunchBounds ; @@ -1261,6 +1341,7 @@ private: // const FunctorType m_functor ; + const Policy m_policy ; const ReducerType m_reducer ; const pointer_type m_result_ptr ; const bool m_result_ptr_device_accessible ; @@ -1273,7 +1354,7 @@ private: void* m_scratch_ptr[2] ; int m_scratch_size[2] ; const size_type m_league_size ; - const size_type m_team_size ; + int m_team_size ; const size_type m_vector_size ; template< class TagType > @@ -1412,20 +1493,20 @@ public: const int nwork = m_league_size * m_team_size ; if ( nwork ) { const int block_count = UseShflReduction? std::min( m_league_size , size_type(1024*32) ) - :std::min( m_league_size , m_team_size ); + :std::min( int(m_league_size) , m_team_size ); - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) ); - m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); + m_scratch_space = cuda_internal_scratch_space(m_policy.space(), ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_count ); + m_scratch_flags = cuda_internal_scratch_flags(m_policy.space(), sizeof(size_type) ); + m_unified_space = cuda_internal_scratch_unified( m_policy.space(),ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) ); const dim3 block( m_vector_size , m_team_size , 1 ); const dim3 grid( block_count , 1 , 1 ); const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; - CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem_size_total ); // copy to device and execute + CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem_size_total , m_policy.space().impl_internal_space_instance() , true ); // copy to device and execute if(!m_result_ptr_device_accessible) { - Cuda::fence(); + Cuda().fence(); if ( m_result_ptr ) { if ( m_unified_space ) { @@ -1454,6 +1535,7 @@ public: Kokkos::is_view< ViewType >::value ,void*>::type = NULL) : m_functor( arg_functor ) + , m_policy ( arg_policy ) , m_reducer( InvalidType() ) , m_result_ptr( arg_result.data() ) , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ViewType::memory_space>::accessible ) @@ -1464,35 +1546,30 @@ public: , m_shmem_begin( 0 ) , m_shmem_size( 0 ) , m_scratch_ptr{NULL,NULL} - , m_scratch_size{ - arg_policy.scratch_size(0,( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), - arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) - ), arg_policy.scratch_size(1,( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), - arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) - )} , m_league_size( arg_policy.league_size() ) - , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), - arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) + , m_team_size( arg_policy.team_size() ) , m_vector_size( arg_policy.vector_length() ) { + cudaFuncAttributes attr = CudaParallelLaunch< ParallelReduce, LaunchBounds >:: + get_cuda_func_attributes(); + m_team_size = m_team_size>=0?m_team_size: + Kokkos::Impl::cuda_get_opt_block_size< FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), + attr, m_functor , m_vector_size, + m_policy.team_scratch_size(0), m_policy.thread_scratch_size(0) )/m_vector_size; + // Return Init value if the number of worksets is zero - if( arg_policy.league_size() == 0) { + if( m_league_size*m_team_size == 0) { ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , arg_result.data() ); return ; } m_team_begin = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( arg_functor , m_team_size ); m_shmem_begin = sizeof(double) * ( m_team_size + 2 ); - m_shmem_size = arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); - m_scratch_ptr[1] = cuda_resize_scratch_space(static_cast(m_scratch_size[1])*(static_cast(Cuda::concurrency()/(m_team_size*m_vector_size)))); + m_shmem_size = m_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); m_scratch_size[0] = m_shmem_size; - m_scratch_size[1] = arg_policy.scratch_size(1,m_team_size); + m_scratch_size[1] = m_policy.scratch_size(1,m_team_size); + m_scratch_ptr[1] = m_team_size<=0?NULL:cuda_resize_scratch_space(static_cast(m_scratch_size[1])*(static_cast(Cuda::concurrency()/(m_team_size*m_vector_size)))); // The global parallel_reduce does not support vector_length other than 1 at the moment if( (arg_policy.vector_length() > 1) && !UseShflReduction ) @@ -1509,7 +1586,7 @@ public: Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); } - if ( CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + if ( m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size_total ) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch memory")); } @@ -1523,6 +1600,7 @@ public: , const Policy & arg_policy , const ReducerType & reducer) : m_functor( arg_functor ) + , m_policy( arg_policy ) , m_reducer( reducer ) , m_result_ptr( reducer.view().data() ) , m_result_ptr_device_accessible(MemorySpaceAccess< Kokkos::CudaSpace , typename ReducerType::result_view_type::memory_space>::accessible ) @@ -1534,12 +1612,17 @@ public: , m_shmem_size( 0 ) , m_scratch_ptr{NULL,NULL} , m_league_size( arg_policy.league_size() ) - , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() : - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds >( arg_functor , arg_policy.vector_length(), - arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / - arg_policy.vector_length() ) + , m_team_size( arg_policy.team_size() ) , m_vector_size( arg_policy.vector_length() ) { + cudaFuncAttributes attr = CudaParallelLaunch< ParallelReduce, LaunchBounds >:: + get_cuda_func_attributes(); + m_team_size = m_team_size>=0?m_team_size: + Kokkos::Impl::cuda_get_opt_block_size< FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), + attr, m_functor , m_vector_size, + m_policy.team_scratch_size(0), m_policy.thread_scratch_size(0) )/m_vector_size; + // Return Init value if the number of worksets is zero if( arg_policy.league_size() == 0) { ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr ); @@ -1548,10 +1631,10 @@ public: m_team_begin = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( arg_functor , m_team_size ); m_shmem_begin = sizeof(double) * ( m_team_size + 2 ); - m_shmem_size = arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); - m_scratch_ptr[1] = cuda_resize_scratch_space(m_scratch_size[1]*(Cuda::concurrency()/(m_team_size*m_vector_size))); + m_shmem_size = m_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); m_scratch_size[0] = m_shmem_size; - m_scratch_size[1] = arg_policy.scratch_size(1,m_team_size); + m_scratch_size[1] = m_policy.scratch_size(1,m_team_size); + m_scratch_ptr[1] = m_team_size<=0?NULL:cuda_resize_scratch_space(static_cast(m_scratch_size[1])*static_cast(Cuda::concurrency()/(m_team_size*m_vector_size))); // The global parallel_reduce does not support vector_length other than 1 at the moment if( (arg_policy.vector_length() > 1) && !UseShflReduction ) @@ -1565,7 +1648,7 @@ public: const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ; if ( (! Kokkos::Impl::is_integral_power_of_two( m_team_size ) && !UseShflReduction ) || - CudaTraits::SharedMemoryCapacity < shmem_size_total ) { + m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < shmem_size_total ) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > bad team size")); } if ( int(m_team_size) > arg_policy.team_size_max(m_functor,ParallelReduceTag()) ) { @@ -1575,365 +1658,6 @@ public: } }; -//---------------------------------------------------------------------------- -#else -//---------------------------------------------------------------------------- - -template< class FunctorType , class ReducerType, class ... Properties > -class ParallelReduce< FunctorType - , Kokkos::TeamPolicy< Properties ... > - , ReducerType - , Kokkos::Cuda - > -{ -private: - - enum : int { align_scratch_value = 0x0100 /* 256 */ }; - enum : int { align_scratch_mask = align_scratch_value - 1 }; - - KOKKOS_INLINE_FUNCTION static constexpr - int align_scratch( const int n ) - { - return ( n & align_scratch_mask ) - ? n + align_scratch_value - ( n & align_scratch_mask ) : n ; - } - - //---------------------------------------- - // Reducer does not wrap a functor - template< class R = ReducerType , class F = void > - struct reducer_type : public R { - - template< class S > - using rebind = reducer_type< typename R::rebind , void > ; - - KOKKOS_INLINE_FUNCTION - reducer_type( FunctorType const * - , ReducerType const * arg_reducer - , typename R::value_type * arg_value ) - : R( *arg_reducer , arg_value ) {} - }; - - // Reducer does wrap a functor - template< class R > - struct reducer_type< R , FunctorType > : public R { - - template< class S > - using rebind = reducer_type< typename R::rebind , FunctorType > ; - - KOKKOS_INLINE_FUNCTION - reducer_type( FunctorType const * arg_functor - , ReducerType const * - , typename R::value_type * arg_value ) - : R( arg_functor , arg_value ) {} - }; - - //---------------------------------------- - - typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... > Policy ; - typedef CudaTeamMember Member ; - typedef typename Policy::work_tag WorkTag ; - typedef typename reducer_type<>::pointer_type pointer_type ; - typedef typename reducer_type<>::reference_type reference_type ; - typedef typename reducer_type<>::value_type value_type ; - typedef typename Policy::launch_bounds LaunchBounds ; - - typedef Kokkos::Impl::FunctorAnalysis - < Kokkos::Impl::FunctorPatternInterface::REDUCE - , Policy - , FunctorType - > Analysis ; - -public: - - typedef FunctorType functor_type ; - typedef Cuda::size_type size_type ; - -private: - - const FunctorType m_functor ; - const reducer_type<> m_reducer ; - size_type * m_scratch_space ; - size_type * m_unified_space ; - size_type m_team_begin ; - size_type m_shmem_begin ; - size_type m_shmem_size ; - void* m_scratch_ptr[2] ; - int m_scratch_size[2] ; - const size_type m_league_size ; - const size_type m_team_size ; - const size_type m_vector_size ; - - template< class TagType > - __device__ inline - typename std::enable_if< std::is_same< TagType , void >::value >::type - exec_team( const Member & member , reference_type update ) const - { m_functor( member , update ); } - - template< class TagType > - __device__ inline - typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec_team( const Member & member , reference_type update ) const - { m_functor( TagType() , member , update ); } - - -public: - - __device__ inline - void operator() () const - { - void * const shmem = kokkos_impl_cuda_shared_memory(); - - const bool reduce_to_host = - std::is_same< typename reducer_type<>::memory_space - , Kokkos::HostSpace >::value && - m_reducer.data(); - - value_type value ; - - typename reducer_type<>::rebind< CudaSpace > - reduce( & m_functor , & m_reducer , & value ); - - reduce.init( reduce.data() ); - - // Iterate this block through the league - - for ( int league_rank = blockIdx.x - ; league_rank < m_league_size - ; league_rank += gridDim.x ) { - - // Initialization of team member data: - - const Member member - ( shmem - , m_shmem_team_begin - , m_shmem_team_size - , reinterpret_cast(m_scratch_space) + m_global_team_begin - , m_global_team_size - , league_rank - , m_league_size ); - - ParallelReduce::template - exec_team< WorkTag >( member , reduce.reference() ); - } - - if ( Member::global_reduce( reduce - , m_scratch_space - , reinterpret_cast(m_scratch_space) - + aligned_flag_size - , shmem - , m_shmem_size ) ) { - - // Single thread with data in value - - reduce.final( reduce.data() ); - - if ( reduce_to_host ) { - reducer.copy( m_unified_space , reduce.data() ); - } - } - } - - - inline - void execute() - { - const bool reduce_to_host = - std::is_same< typename reducer_type<>::memory_space - , Kokkos::HostSpace >::value && - m_reducer.data(); - - const bool reduce_to_gpu = - std::is_same< typename reducer_type<>::memory_space - , Kokkos::CudaSpace >::value && - m_reducer.data(); - - if ( m_league_size && m_team_size ) { - - const int value_size = Analysis::value_size( m_functor ); - - m_scratch_space = cuda_internal_scratch_space( m_scratch_size ); - m_unified_space = cuda_internal_scratch_unified( value_size ); - - const dim3 block( m_vector_size , m_team_size , m_team_per_block ); - const dim3 grid( m_league_size , 1 , 1 ); - const int shmem = m_shmem_team_begin + m_shmem_team_size ; - - // copy to device and execute - CudaParallelLaunch( *this, grid, block, shmem ); - - Cuda::fence(); - - if ( reduce_to_host ) { - m_reducer.copy( m_reducer.data() , pointer_type(m_unified_space) ); - } - } - else if ( reduce_to_host ) { - m_reducer.init( m_reducer.data() ); - } - else if ( reduce_to_gpu ) { - value_type tmp ; - m_reduce.init( & tmp ); - cudaMemcpy( m_reduce.data() , & tmp , cudaMemcpyHostToDevice ); - } - } - - - /**\brief Set up parameters and allocations for kernel launch. - * - * block = { vector_size , team_size , team_per_block } - * grid = { number_of_teams , 1 , 1 } - * - * shmem = shared memory for: - * [ team_reduce_buffer - * , team_scratch_buffer_level_0 ] - * reused by: - * [ global_reduce_buffer ] - * - * global_scratch for: - * [ global_reduce_flag_buffer - * , global_reduce_value_buffer - * , team_scratch_buffer_level_1 * max_concurrent_team ] - */ - - ParallelReduce( FunctorType && arg_functor - , Policy && arg_policy - , ReducerType const & arg_reducer - ) - : m_functor( arg_functor ) - // the input reducer may wrap the input functor so must - // generate a reducer bound to the copied functor. - , m_reducer( & m_functor , & arg_reducer , arg_reducer.data() ) - , m_scratch_space( 0 ) - , m_unified_space( 0 ) - , m_team_begin( 0 ) - , m_shmem_begin( 0 ) - , m_shmem_size( 0 ) - , m_scratch_ptr{NULL,NULL} - , m_league_size( arg_policy.league_size() ) - , m_team_per_block( 0 ) - , m_team_size( arg_policy.team_size() ) - , m_vector_size( arg_policy.vector_length() ) - { - if ( 0 == m_league_size ) return ; - - const int value_size = Analysis::value_size( m_functor ); - - //---------------------------------------- - // Vector length must be <= WarpSize and power of two - - const bool ok_vector = m_vector_size < CudaTraits::WarpSize && - Kokkos::Impl::is_integral_power_of_two( m_vector_size ); - - //---------------------------------------- - - if ( 0 == m_team_size ) { - // Team size is AUTO, use a whole block per team. - // Calculate block size using the occupance calculator. - // Occupancy calculator assumes whole block. - - m_team_size = - Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce, LaunchBounds > - ( arg_functor - , arg_policy.vector_length() - , arg_policy.team_scratch_size(0) - , arg_policy.thread_scratch_size(0) / arg_policy.vector_length() ); - - m_team_per_block = 1 ; - } - - //---------------------------------------- - // How many CUDA threads per team. - // If more than a warp or multiple teams cannot exactly fill a warp - // then only one team per block. - - const int team_threads = m_team_size * m_vector_size ; - - if ( ( CudaTraits::WarpSize < team_threads ) || - ( CudaTraits::WarpSize % team_threads ) ) { - m_team_per_block = 1 ; - } - - //---------------------------------------- - // How much team scratch shared memory determined from - // either the functor or the policy: - - if ( CudaTraits::WarpSize < team_threads ) { - // Need inter-warp team reduction (collectives) shared memory - // Speculate an upper bound for the value size - - m_shmem_team_begin = - align_scratch( CudaTraits::warp_count(team_threads) * sizeof(double) ); - } - - m_shmem_team_size = arg_policy.scratch_size(0,m_team_size); - - if ( 0 == m_shmem_team_size ) { - m_shmem_team_size = Analysis::team_shmem_size( m_functor , m_team_size ); - } - - m_shmem_team_size = align_scratch( m_shmem_team_size ); - - // Can fit a team in a block: - - const bool ok_shmem_team = - ( m_shmem_team_begin + m_shmem_team_size ) - < CudaTraits::SharedMemoryCapacity ; - - //---------------------------------------- - - if ( 0 == m_team_per_block ) { - // Potentially more than one team per block. - // Determine number of teams per block based upon - // how much team scratch can fit and exactly filling each warp. - - const int team_per_warp = team_threads / CudaTraits::WarpSize ; - - const int max_team_per_block = - Kokkos::Impl::CudaTraits::SharedMemoryCapacity - / shmem_team_scratch_size ; - - for ( m_team_per_block = team_per_warp ; - m_team_per_block + team_per_warp < max_team_per_block ; - m_team_per_block += team_per_warp ); - } - - //---------------------------------------- - // How much global reduce scratch shared memory. - - int shmem_global_reduce_size = 8 * value_size ; - - //---------------------------------------- - // Global scratch memory requirements. - - const int aligned_flag_size = align_scratch( sizeof(int) ); - - const int max_concurrent_block = - cuda_internal_maximum_concurrent_block_count(); - - // Reduce space has claim flag followed by vaue buffer - const int global_reduce_value_size = - max_concurrent_block * - ( aligned_flag_size + align_scratch( value_size ) ); - - // Scratch space has claim flag followed by scratch buffer - const int global_team_scratch_size = - max_concurrent_block * m_team_per_block * - ( aligned_flag_size + - align_scratch( arg_policy.scratch_size(1,m_team_size) / m_vector_size ) - ); - - const int global_size = aligned_flag_size - + global_reduce_value_size - + global_team_scratch_size ; - - m_global_reduce_begin = aligned_flag_size ; - m_global_team_begin = m_global_reduce_begin + global_reduce_value_size ; - m_global_size = m_global_team_begin + global_team_scratch_size ; - } -}; - -#endif - } // namespace Impl } // namespace Kokkos @@ -1949,9 +1673,9 @@ class ParallelScan< FunctorType , Kokkos::Cuda > { -private: - +public: typedef Kokkos::RangePolicy< Traits ... > Policy ; +private: typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::WorkRange WorkRange ; @@ -2105,7 +1829,7 @@ public: } // Determine block size constrained by shared memory: - static inline + inline unsigned local_block_size( const FunctorType & f ) { // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 (16 warps) @@ -2114,7 +1838,7 @@ public: // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit testing unsigned n = CudaTraits::WarpSize * 4 ; - while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } + while ( n && unsigned(m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock) < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } return n ; } @@ -2140,18 +1864,18 @@ public: // How many block are really needed for this much work: const int grid_x = ( nwork + work_per_block - 1 ) / work_per_block ; - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * grid_x ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 ); + m_scratch_space = cuda_internal_scratch_space( m_policy.space(), ValueTraits::value_size( m_functor ) * grid_x ); + m_scratch_flags = cuda_internal_scratch_flags( m_policy.space(), sizeof(size_type) * 1 ); const dim3 grid( grid_x , 1 , 1 ); const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 ) const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 ); m_final = false ; - CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute m_final = true ; - CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute } } @@ -2173,9 +1897,10 @@ class ParallelScanWithTotal< FunctorType , Kokkos::Cuda > { -private: - +public: typedef Kokkos::RangePolicy< Traits ... > Policy ; + +private: typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::WorkRange WorkRange ; @@ -2332,7 +2057,7 @@ public: } // Determine block size constrained by shared memory: - static inline + inline unsigned local_block_size( const FunctorType & f ) { // blockDim.y must be power of two = 128 (4 warps) or 256 (8 warps) or 512 (16 warps) @@ -2341,7 +2066,7 @@ public: // 4 warps was 10% faster than 8 warps and 20% faster than 16 warps in unit testing unsigned n = CudaTraits::WarpSize * 4 ; - while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } + while ( n && unsigned(m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock) < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; } return n ; } @@ -2367,18 +2092,18 @@ public: // How many block are really needed for this much work: const int grid_x = ( nwork + work_per_block - 1 ) / work_per_block ; - m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( m_functor ) * grid_x ); - m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) * 1 ); + m_scratch_space = cuda_internal_scratch_space( m_policy.space(), ValueTraits::value_size( m_functor ) * grid_x ); + m_scratch_flags = cuda_internal_scratch_flags( m_policy.space(), sizeof(size_type) * 1 ); const dim3 grid( grid_x , 1 , 1 ); const dim3 block( 1 , block_size , 1 ); // REQUIRED DIMENSIONS ( 1 , N , 1 ) const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 ); m_final = false ; - CudaParallelLaunch< ParallelScanWithTotal, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelScanWithTotal, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute m_final = true ; - CudaParallelLaunch< ParallelScanWithTotal, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute + CudaParallelLaunch< ParallelScanWithTotal, LaunchBounds >( *this, grid, block, shmem , m_policy.space().impl_internal_space_instance() , false ); // copy to device and execute const int size = ValueTraits::value_size( m_functor ); DeepCopy( &m_returnvalue, m_scratch_space + (grid_x - 1)*size/sizeof(int), size ); diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index d09854c3a5..c39dddb198 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -376,13 +376,13 @@ template< class ReducerType > __device__ inline typename std::enable_if< Kokkos::is_reducer::value >::type cuda_intra_warp_reduction( const ReducerType& reducer, + typename ReducerType::value_type& result, const uint32_t max_active_thread = blockDim.y) { typedef typename ReducerType::value_type ValueType; unsigned int shift = 1; - ValueType result = reducer.reference(); //Reduce over values from threads with different threadIdx.y while(blockDim.x * shift < 32 ) { const ValueType tmp = shfl_down(result, blockDim.x*shift,32u); @@ -400,6 +400,7 @@ template< class ReducerType > __device__ inline typename std::enable_if< Kokkos::is_reducer::value >::type cuda_inter_warp_reduction( const ReducerType& reducer, + typename ReducerType::value_type value, const int max_active_thread = blockDim.y) { typedef typename ReducerType::value_type ValueType; @@ -410,7 +411,6 @@ cuda_inter_warp_reduction( const ReducerType& reducer, // could lead to race conditions __shared__ double sh_result[(sizeof(ValueType)+7)/8*STEP_WIDTH]; ValueType* result = (ValueType*) & sh_result; - ValueType value = reducer.reference(); const int step = 32 / blockDim.x; int shift = STEP_WIDTH; const int id = threadIdx.y%step==0?threadIdx.y/step:65000; @@ -438,9 +438,18 @@ template< class ReducerType > __device__ inline typename std::enable_if< Kokkos::is_reducer::value >::type cuda_intra_block_reduction( const ReducerType& reducer, + typename ReducerType::value_type value, const int max_active_thread = blockDim.y) { - cuda_intra_warp_reduction(reducer,max_active_thread); - cuda_inter_warp_reduction(reducer,max_active_thread); + cuda_intra_warp_reduction(reducer,value,max_active_thread); + cuda_inter_warp_reduction(reducer,value,max_active_thread); +} + +template< class ReducerType > +__device__ inline +typename std::enable_if< Kokkos::is_reducer::value >::type +cuda_intra_block_reduction( const ReducerType& reducer, + const int max_active_thread = blockDim.y) { + cuda_intra_block_reduction(reducer,reducer.reference(),max_active_thread); } template< class ReducerType> diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp index ee949583f1..ac36cfd67e 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -54,194 +54,8 @@ namespace Kokkos { namespace Impl { -template class TaskQueue< Kokkos::Cuda > ; - -//---------------------------------------------------------------------------- - -__device__ -void TaskQueueSpecialization< Kokkos::Cuda >::driver - ( TaskQueueSpecialization< Kokkos::Cuda >::queue_type * const queue - , int32_t shmem_per_warp ) -{ - using Member = TaskExec< Kokkos::Cuda > ; - using Queue = TaskQueue< Kokkos::Cuda > ; - using task_root_type = TaskBase< void , void , void > ; - - extern __shared__ int32_t shmem_all[]; - - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - - int32_t * const warp_shmem = - shmem_all + ( threadIdx.z * shmem_per_warp ) / sizeof(int32_t); - - task_root_type * const task_shmem = (task_root_type *) warp_shmem ; - - const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x ; - - Member single_exec( warp_shmem , 1 ); - Member team_exec( warp_shmem , blockDim.y ); - - task_root_type * task_ptr ; - - // Loop until all queues are empty and no tasks in flight - - do { - - // Each team lead attempts to acquire either a thread team task - // or collection of single thread tasks for the team. - - if ( 0 == warp_lane ) { - - task_ptr = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; - - // Loop by priority and then type - for ( int i = 0 ; i < Queue::NumQueue && end == task_ptr ; ++i ) { - for ( int j = 0 ; j < 2 && end == task_ptr ; ++j ) { - task_ptr = Queue::pop_ready_task( & queue->m_ready[i][j] ); - } - } - -#if 0 -printf("TaskQueue::driver(%d,%d) task(%lx)\n",threadIdx.z,blockIdx.x - , uintptr_t(task_ptr)); -#endif - - } - - // Synchronize warp with memory fence before broadcasting task pointer: - - // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "A" ); - KOKKOS_IMPL_CUDA_SYNCWARP ; - - // Broadcast task pointer: - - ((int*) & task_ptr )[0] = KOKKOS_IMPL_CUDA_SHFL( ((int*) & task_ptr )[0] , 0 , 32 ); - ((int*) & task_ptr )[1] = KOKKOS_IMPL_CUDA_SHFL( ((int*) & task_ptr )[1] , 0 , 32 ); - -#if defined( KOKKOS_DEBUG ) - KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "TaskQueue CUDA task_ptr" ); -#endif - - if ( 0 == task_ptr ) break ; // 0 == queue->m_ready_count - - if ( end != task_ptr ) { - - // Whole warp copy task's closure to/from shared memory. - // Use all threads of warp for coalesced read/write. - - int32_t const b = sizeof(task_root_type) / sizeof(int32_t); - int32_t const e = *((int32_t volatile *)( & task_ptr->m_alloc_size )) / sizeof(int32_t); - - int32_t volatile * const task_mem = (int32_t volatile *) task_ptr ; - - // copy task closure from global to shared memory: - - for ( int32_t i = warp_lane ; i < e ; i += CudaTraits::WarpSize ) { - warp_shmem[i] = task_mem[i] ; - } - - // Synchronize threads of the warp and insure memory - // writes are visible to all threads in the warp. - - // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "B" ); - KOKKOS_IMPL_CUDA_SYNCWARP ; - - if ( task_root_type::TaskTeam == task_shmem->m_task_type ) { - // Thread Team Task - (*task_shmem->m_apply)( task_shmem , & team_exec ); - } - else if ( 0 == threadIdx.y ) { - // Single Thread Task - (*task_shmem->m_apply)( task_shmem , & single_exec ); - } - - // Synchronize threads of the warp and insure memory - // writes are visible to all threads in the warp. - - // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "C" ); - KOKKOS_IMPL_CUDA_SYNCWARP ; - - // copy task closure from shared to global memory: - - for ( int32_t i = b + warp_lane ; i < e ; i += CudaTraits::WarpSize ) { - task_mem[i] = warp_shmem[i] ; - } - - // Synchronize threads of the warp and insure memory - // writes are visible to root thread of the warp for - // respawn or completion. - - // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "D" ); - KOKKOS_IMPL_CUDA_SYNCWARP ; - - // If respawn requested copy respawn data back to main memory - - if ( 0 == warp_lane ) { - - if ( ((task_root_type *) task_root_type::LockTag) != task_shmem->m_next ) { - ( (volatile task_root_type *) task_ptr )->m_next = task_shmem->m_next ; - ( (volatile task_root_type *) task_ptr )->m_priority = task_shmem->m_priority ; - } - - queue->complete( task_ptr ); - } - } - } while(1); -} - -namespace { - -__global__ -void cuda_task_queue_execute( TaskQueue< Kokkos::Cuda > * queue - , int32_t shmem_size ) -{ TaskQueueSpecialization< Kokkos::Cuda >::driver( queue , shmem_size ); } - -} - -void TaskQueueSpecialization< Kokkos::Cuda >::execute - ( TaskQueue< Kokkos::Cuda > * const queue ) -{ - const int shared_per_warp = 2048 ; - const int warps_per_block = 4 ; - const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); - const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block ); - const int shared_total = shared_per_warp * warps_per_block ; - const cudaStream_t stream = 0 ; - - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - -#if 0 -printf("cuda_task_queue_execute before\n"); -#endif - - // Query the stack size, in bytes: - - size_t previous_stack_size = 0 ; - CUDA_SAFE_CALL( cudaDeviceGetLimit( & previous_stack_size , cudaLimitStackSize ) ); - - // If not large enough then set the stack size, in bytes: - - const size_t larger_stack_size = 2048 ; - - if ( previous_stack_size < larger_stack_size ) { - CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , larger_stack_size ) ); - } - - cuda_task_queue_execute<<< grid , block , shared_total , stream >>>( queue , shared_per_warp ); - - CUDA_SAFE_CALL( cudaGetLastError() ); - - CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - - if ( previous_stack_size < larger_stack_size ) { - CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , previous_stack_size ) ); - } - -#if 0 -printf("cuda_task_queue_execute after\n"); -#endif - -} +template class TaskQueue< Kokkos::Cuda, Impl::default_tasking_memory_space_for_execution_space_t > ; +template class TaskQueueMultiple< Kokkos::Cuda, Impl::default_tasking_memory_space_for_execution_space_t > ; }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp index 8fa1192567..c35987e49e 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -50,6 +50,14 @@ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#include + +#include +#include // CUDA_SAFE_CALL +#include + +//---------------------------------------------------------------------------- + namespace Kokkos { namespace Impl { namespace { @@ -57,54 +65,498 @@ namespace { template< typename TaskType > __global__ void set_cuda_task_base_apply_function_pointer - ( TaskBase::function_type * ptr ) -{ *ptr = TaskType::apply ; } + ( typename TaskType::function_type * ptr, typename TaskType::destroy_type* dtor ) +{ + *ptr = TaskType::apply; + *dtor = TaskType::destroy; +} + +template< typename Scheduler > +__global__ +void cuda_task_queue_execute( Scheduler scheduler, int32_t shmem_size ) { + TaskQueueSpecialization< Scheduler >::driver( std::move(scheduler) , shmem_size ); +} } -template< class > class TaskExec ; +template class TaskExec ; -template<> -class TaskQueueSpecialization< Kokkos::Cuda > +template +class TaskQueueSpecialization< + SimpleTaskScheduler +> { public: - using execution_space = Kokkos::Cuda ; - using memory_space = Kokkos::CudaUVMSpace ; - using queue_type = TaskQueue< execution_space > ; - using member_type = TaskExec< Kokkos::Cuda > ; + using scheduler_type = SimpleTaskScheduler; + using execution_space = Kokkos::Cuda; + using memory_space = Kokkos::CudaUVMSpace; + using member_type = TaskExec ; + enum : long { max_league_size = 16 }; + enum : int { warps_per_block = 4 }; + + KOKKOS_INLINE_FUNCTION static - void iff_single_thread_recursive_execute( queue_type * const ) {} + void iff_single_thread_recursive_execute( scheduler_type const& ) {} + + static int get_max_team_count( + execution_space const& + ) { + return Kokkos::Impl::cuda_internal_multiprocessor_count() * warps_per_block; + } __device__ - static void driver( queue_type * const , int32_t ); + static void driver(scheduler_type scheduler, int32_t shmem_per_warp) + { + using queue_type = typename scheduler_type::task_queue_type; + using task_base_type = typename scheduler_type::task_base_type; + using runnable_task_base_type = typename scheduler_type::runnable_task_base_type; + using scheduling_info_storage_type = + SchedulingInfoStorage< + runnable_task_base_type, + typename scheduler_type::task_scheduling_info_type + >; + + extern __shared__ int32_t shmem_all[]; + + int32_t* const warp_shmem = shmem_all + (threadIdx.z * shmem_per_warp) / sizeof(int32_t); + + task_base_type* const shared_memory_task_copy = (task_base_type*)warp_shmem; + + const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x; + + member_type single_exec(scheduler, warp_shmem, 1); + member_type team_exec(scheduler, warp_shmem, blockDim.y); + + auto& queue = scheduler.queue(); + auto& team_scheduler = team_exec.scheduler(); + + auto current_task = OptionalRef(); + + // Loop until all queues are empty and no tasks in flight + while(not queue.is_done()) { + + if(warp_lane == 0) { // should be (?) same as team_exec.team_rank() == 0 + // pop off a task + current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info()); + } + + // Broadcast task pointer: + + // Sync before the broadcast + KOKKOS_IMPL_CUDA_SYNCWARP; + + // pretend it's an int* for shuffle purposes + ((int*) ¤t_task)[0] = KOKKOS_IMPL_CUDA_SHFL(((int*) ¤t_task)[0], 0, 32); + ((int*) ¤t_task)[1] = KOKKOS_IMPL_CUDA_SHFL(((int*) ¤t_task)[1], 0, 32); + + if(current_task) { + + KOKKOS_ASSERT(!current_task->as_runnable_task().get_respawn_flag()); + + int32_t b = sizeof(scheduling_info_storage_type) / sizeof(int32_t); + static_assert( + sizeof(scheduling_info_storage_type) % sizeof(int32_t) == 0, + "bad task size" + ); + int32_t const e = current_task->get_allocation_size() / sizeof(int32_t); + KOKKOS_ASSERT(current_task->get_allocation_size() % sizeof(int32_t) == 0); + + int32_t volatile* const task_mem = (int32_t volatile*)current_task.get(); + + // do a coordinated copy of the task closure from global to shared memory: + for(int32_t i = warp_lane; i < e; i += CudaTraits::WarpSize) { + warp_shmem[i] = task_mem[i]; + } + + // Synchronize threads of the warp and insure memory + // writes are visible to all threads in the warp. + KOKKOS_IMPL_CUDA_SYNCWARP; + + if(shared_memory_task_copy->is_team_runnable()) { + // Thread Team Task + shared_memory_task_copy->as_runnable_task().run(team_exec); + } + else if(threadIdx.y == 0) { + // TODO @tasking @optimization DSH Change this to warp_lane == 0 when we allow blockDim.x to be more than 1 + // Single Thread Task + shared_memory_task_copy->as_runnable_task().run(single_exec); + } + + // Synchronize threads of the warp and insure memory + // writes are visible to all threads in the warp. + + KOKKOS_IMPL_CUDA_SYNCWARP; + + //if(warp_lane < b % CudaTraits::WarpSize) b += CudaTraits::WarpSize; + //b -= b % CudaTraits::WarpSize; + + // copy task closure from shared to global memory: + for (int32_t i = b + warp_lane; i < e; i += CudaTraits::WarpSize) { + task_mem[i] = warp_shmem[i]; + } + + // Synchronize threads of the warp and insure memory + // writes are visible to root thread of the warp for + // respawn or completion. + + KOKKOS_IMPL_CUDA_SYNCWARP; + + + if (warp_lane == 0) { + // If respawn requested copy respawn data back to main memory + if(shared_memory_task_copy->as_runnable_task().get_respawn_flag()) { + if(shared_memory_task_copy->as_runnable_task().has_predecessor()) { + // It's not necessary to make this a volatile write because + // the next read of the predecessor is on this thread in complete, + // and the predecessor is cleared there (using a volatile write) + current_task->as_runnable_task().acquire_predecessor_from( + shared_memory_task_copy->as_runnable_task() + ); + } + + // It may not necessary to make this a volatile write, since the + // next read will be done by this thread in complete where the + // rescheduling occurs, but since the task could be stolen later + // before this is written again, we should do the volatile write + // here. (It might not be necessary though because I don't know + // where else the priority would be read after it is scheduled + // by this thread; for now, we leave it volatile, but we should + // benchmark the cost of this.) + current_task.as_volatile()->set_priority(shared_memory_task_copy->get_priority()); + + // It's not necessary to make this a volatile write, since the + // next read of it (if true) will be by this thread in `complete()`, + // which will unset the flag (using volatile) once it has handled + // the respawn + current_task->as_runnable_task().set_respawn_flag(); + + } + + queue.complete( + (*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info() + ); + } + + } + } + } static - void execute( queue_type * const ); + void execute(scheduler_type const& scheduler) + { + const int shared_per_warp = 2048 ; + const dim3 grid(Kokkos::Impl::cuda_internal_multiprocessor_count(), 1, 1); + const dim3 block(1, Kokkos::Impl::CudaTraits::WarpSize, warps_per_block); + const int shared_total = shared_per_warp * warps_per_block; + const cudaStream_t stream = nullptr; + + KOKKOS_ASSERT( + static_cast(grid.x * grid.y * grid.z * block.x * block.y * block.z) + == static_cast(get_max_team_count(scheduler.get_execution_space()) * Kokkos::Impl::CudaTraits::WarpSize) + ); + + auto& queue = scheduler.queue(); + + CUDA_SAFE_CALL(cudaDeviceSynchronize()); + + // Query the stack size, in bytes: + + size_t previous_stack_size = 0; + CUDA_SAFE_CALL(cudaDeviceGetLimit(&previous_stack_size, cudaLimitStackSize)); + + // If not large enough then set the stack size, in bytes: + + const size_t larger_stack_size = 1 << 11; + + if (previous_stack_size < larger_stack_size) { + CUDA_SAFE_CALL(cudaDeviceSetLimit(cudaLimitStackSize, larger_stack_size)); + } + + cuda_task_queue_execute<<>>(scheduler, shared_per_warp); + + CUDA_SAFE_CALL(cudaGetLastError()); + + CUDA_SAFE_CALL(cudaDeviceSynchronize()); + + if (previous_stack_size < larger_stack_size) { + CUDA_SAFE_CALL(cudaDeviceSetLimit(cudaLimitStackSize, previous_stack_size)); + } + } + + template + static + // TODO @tasking @optimiazation DSH specialize this for trivially destructible types + void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) + { + using function_type = typename TaskType::function_type; + using destroy_type = typename TaskType::destroy_type; + + // TODO @tasking @minor DSH make sure there aren't any alignment concerns? + void* storage = cuda_internal_scratch_unified( + Kokkos::Cuda(), + sizeof(function_type) + sizeof(destroy_type) + ); + function_type* ptr_ptr = (function_type*)storage; + destroy_type* dtor_ptr = (destroy_type*)((char*)storage + sizeof(function_type)); + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + set_cuda_task_base_apply_function_pointer<<<1,1>>>(ptr_ptr, dtor_ptr); + + CUDA_SAFE_CALL( cudaGetLastError() ); + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + ptr = *ptr_ptr; + dtor = *dtor_ptr; + } +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +class TaskQueueSpecializationConstrained< + Scheduler, + typename std::enable_if< + std::is_same::value + >::type +> +{ +public: + + using scheduler_type = Scheduler; + using execution_space = Kokkos::Cuda; + using memory_space = Kokkos::CudaUVMSpace; + using member_type = TaskExec ; + + enum : long { max_league_size = 16 }; + + KOKKOS_INLINE_FUNCTION + static + void iff_single_thread_recursive_execute( scheduler_type const& ) {} + + __device__ + static void driver(scheduler_type scheduler, int32_t shmem_per_warp) + { + using queue_type = typename scheduler_type::queue_type; + using task_root_type = TaskBase; + + extern __shared__ int32_t shmem_all[]; + + task_root_type* const end = (task_root_type *) task_root_type::EndTag ; + task_root_type* const no_more_tasks_sentinel = nullptr; + + int32_t * const warp_shmem = + shmem_all + ( threadIdx.z * shmem_per_warp ) / sizeof(int32_t); + + task_root_type * const task_shmem = (task_root_type *) warp_shmem ; + + const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x ; + + member_type single_exec(scheduler, warp_shmem, 1); + member_type team_exec(scheduler, warp_shmem, blockDim.y); + + auto& team_queue = team_exec.scheduler().queue(); + + task_root_type * task_ptr = no_more_tasks_sentinel; + + // Loop until all queues are empty and no tasks in flight + + do { + + // Each team lead attempts to acquire either a thread team task + // or collection of single thread tasks for the team. + + if ( 0 == warp_lane ) { + + if( *((volatile int *) & team_queue.m_ready_count) > 0 ) { + task_ptr = end; + // Attempt to acquire a task + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task_ptr ; ++i ) { + for ( int j = 0 ; j < 2 && end == task_ptr ; ++j ) { + task_ptr = queue_type::pop_ready_task( & team_queue.m_ready[i][j] ); + } + } + } + else { + // returns nullptr if and only if all other queues have a ready + // count of 0 also. Otherwise, returns a task from another queue + // or `end` if one couldn't be popped + task_ptr = team_queue.attempt_to_steal_task(); + #if 0 + if(task != no_more_tasks_sentinel && task != end) { + std::printf("task stolen on rank %d\n", team_exec.league_rank()); + } + #endif + } + + } + + // Synchronize warp with memory fence before broadcasting task pointer: + + // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "A" ); + KOKKOS_IMPL_CUDA_SYNCWARP ; + + // Broadcast task pointer: + + ((int*) & task_ptr )[0] = KOKKOS_IMPL_CUDA_SHFL( ((int*) & task_ptr )[0] , 0 , 32 ); + ((int*) & task_ptr )[1] = KOKKOS_IMPL_CUDA_SHFL( ((int*) & task_ptr )[1] , 0 , 32 ); + + #if defined( KOKKOS_DEBUG ) + KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "TaskQueue CUDA task_ptr" ); + #endif + + if ( 0 == task_ptr ) break ; // 0 == queue->m_ready_count + + if ( end != task_ptr ) { + + // Whole warp copy task's closure to/from shared memory. + // Use all threads of warp for coalesced read/write. + + int32_t const b = sizeof(task_root_type) / sizeof(int32_t); + int32_t const e = *((int32_t volatile *)( & task_ptr->m_alloc_size )) / sizeof(int32_t); + + int32_t volatile * const task_mem = (int32_t volatile *) task_ptr ; + + KOKKOS_ASSERT(e * sizeof(int32_t) < shmem_per_warp); + + // copy task closure from global to shared memory: + + for ( int32_t i = warp_lane ; i < e ; i += CudaTraits::WarpSize ) { + warp_shmem[i] = task_mem[i] ; + } + + // Synchronize threads of the warp and insure memory + // writes are visible to all threads in the warp. + + // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "B" ); + KOKKOS_IMPL_CUDA_SYNCWARP ; + + if ( task_root_type::TaskTeam == task_shmem->m_task_type ) { + // Thread Team Task + (*task_shmem->m_apply)( task_shmem , & team_exec ); + } + else if ( 0 == threadIdx.y ) { + // Single Thread Task + (*task_shmem->m_apply)( task_shmem , & single_exec ); + } + + // Synchronize threads of the warp and insure memory + // writes are visible to all threads in the warp. + + // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "C" ); + KOKKOS_IMPL_CUDA_SYNCWARP ; + + // copy task closure from shared to global memory: + + for ( int32_t i = b + warp_lane ; i < e ; i += CudaTraits::WarpSize ) { + task_mem[i] = warp_shmem[i] ; + } + + // Synchronize threads of the warp and insure memory + // writes are visible to root thread of the warp for + // respawn or completion. + + // KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN( "D" ); + KOKKOS_IMPL_CUDA_SYNCWARP ; + + // If respawn requested copy respawn data back to main memory + + if ( 0 == warp_lane ) { + + if ( ((task_root_type *) task_root_type::LockTag) != task_shmem->m_next ) { + ( (volatile task_root_type *) task_ptr )->m_next = task_shmem->m_next ; + ( (volatile task_root_type *) task_ptr )->m_priority = task_shmem->m_priority ; + } + + team_queue.complete( task_ptr ); + } + + } + } while(1); + } + + static + void execute(scheduler_type const& scheduler) + { + const int shared_per_warp = 2048 ; + const int warps_per_block = 4 ; + const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); + //const dim3 grid( 1 , 1 , 1 ); + const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block ); + const int shared_total = shared_per_warp * warps_per_block ; + const cudaStream_t stream = 0 ; + + auto& queue = scheduler.queue(); + queue.initialize_team_queues(warps_per_block * grid.x); + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + // Query the stack size, in bytes: + + size_t previous_stack_size = 0 ; + CUDA_SAFE_CALL( cudaDeviceGetLimit( & previous_stack_size , cudaLimitStackSize ) ); + + // If not large enough then set the stack size, in bytes: + + const size_t larger_stack_size = 2048 ; + + if ( previous_stack_size < larger_stack_size ) { + CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , larger_stack_size ) ); + } + + cuda_task_queue_execute<<< grid , block , shared_total , stream >>>( scheduler , shared_per_warp ); + + CUDA_SAFE_CALL( cudaGetLastError() ); + + CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + if ( previous_stack_size < larger_stack_size ) { + CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , previous_stack_size ) ); + } + + } template< typename TaskType > static - typename TaskType::function_type - get_function_pointer() + void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) { - using function_type = typename TaskType::function_type ; + using function_type = typename TaskType::function_type; + using destroy_type = typename TaskType::destroy_type; - function_type * const ptr = - (function_type*) cuda_internal_scratch_unified( sizeof(function_type) ); + void* storage = cuda_internal_scratch_unified( + Kokkos::Cuda(), + sizeof(function_type) + sizeof(destroy_type) + ); + function_type* ptr_ptr = (function_type*)storage; + destroy_type* dtor_ptr = (destroy_type*)((char*)storage + sizeof(function_type)); CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - set_cuda_task_base_apply_function_pointer<<<1,1>>>(ptr); + set_cuda_task_base_apply_function_pointer<<<1,1>>>(ptr_ptr, dtor_ptr); CUDA_SAFE_CALL( cudaGetLastError() ); CUDA_SAFE_CALL( cudaDeviceSynchronize() ); - return *ptr ; + ptr = *ptr_ptr; + dtor = *dtor_ptr; + } }; -extern template class TaskQueue< Kokkos::Cuda > ; +extern template class TaskQueue< Kokkos::Cuda, default_tasking_memory_space_for_execution_space_t > ; }} /* namespace Kokkos::Impl */ @@ -136,8 +588,8 @@ namespace Impl { * When executing a single thread task the syncwarp or other * warp synchronizing functions must not be called. */ -template<> -class TaskExec< Kokkos::Cuda > +template +class TaskExec { private: @@ -148,24 +600,39 @@ private: TaskExec & operator = ( TaskExec && ) = delete ; TaskExec & operator = ( TaskExec const & ) = delete ; - friend class Kokkos::Impl::TaskQueue< Kokkos::Cuda > ; - friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::Cuda > ; + friend class Kokkos::Impl::TaskQueue< Kokkos::Cuda, default_tasking_memory_space_for_execution_space_t > ; + template + friend class Kokkos::Impl::TaskQueueSpecializationConstrained; + template + friend class Kokkos::Impl::TaskQueueSpecialization; int32_t * m_team_shmem ; const int m_team_size ; + Scheduler m_scheduler; // If constructed with arg_team_size == 1 the object // can only be used by 0 == threadIdx.y. - __device__ - TaskExec( int32_t * arg_team_shmem , int arg_team_size = blockDim.y ) - : m_team_shmem( arg_team_shmem ) - , m_team_size( arg_team_size ) {} + KOKKOS_INLINE_FUNCTION + TaskExec( + Scheduler const& parent_scheduler, + int32_t* arg_team_shmem, + int arg_team_size = blockDim.y + ) + : m_team_shmem(arg_team_shmem), + m_team_size(arg_team_size), + m_scheduler(parent_scheduler.get_team_scheduler(league_rank())) + { } public: + using thread_team_member = TaskExec; + #if defined( __CUDA_ARCH__ ) - __device__ int team_rank() const { return threadIdx.y ; } - __device__ int team_size() const { return m_team_size ; } + __device__ int team_rank() const { return threadIdx.y ; } + __device__ int team_size() const { return m_team_size ; } + //__device__ int league_rank() const { return threadIdx.z; } + __device__ int league_rank() const { return blockIdx.x * blockDim.z + threadIdx.z; } + __device__ int league_size() const { return blockDim.z * gridDim.x; } __device__ void team_barrier() const { @@ -186,13 +653,18 @@ public: } #else - __host__ int team_rank() const { return 0 ; } - __host__ int team_size() const { return 0 ; } + __host__ int team_rank() const { return 0 ; } + __host__ int team_size() const { return 0 ; } + __host__ int league_rank() const { return 0; } + __host__ int league_size() const { return 0; } __host__ void team_barrier() const {} template< class ValueType > __host__ void team_broadcast( ValueType & , const int ) const {} #endif + KOKKOS_INLINE_FUNCTION Scheduler const& scheduler() const noexcept { return m_scheduler; } + KOKKOS_INLINE_FUNCTION Scheduler& scheduler() noexcept { return m_scheduler; } + }; }} /* namespace Kokkos::Impl */ @@ -203,20 +675,22 @@ public: namespace Kokkos { namespace Impl { -template -struct TeamThreadRangeBoundariesStruct > +template +struct TeamThreadRangeBoundariesStruct> { - typedef iType index_type; + using index_type = iType; + using member_type = TaskExec; + const iType start ; const iType end ; const iType increment ; - const TaskExec< Kokkos::Cuda > & thread; + member_type const& thread; #if defined( __CUDA_ARCH__ ) __device__ inline TeamThreadRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const iType& arg_count) + ( member_type const& arg_thread, const iType& arg_count) : start( threadIdx.y ) , end(arg_count) , increment( blockDim.y ) @@ -225,7 +699,7 @@ struct TeamThreadRangeBoundariesStruct > __device__ inline TeamThreadRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread + ( member_type const& arg_thread , const iType & arg_start , const iType & arg_end ) @@ -238,10 +712,10 @@ struct TeamThreadRangeBoundariesStruct > #else TeamThreadRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const iType& arg_count); + ( member_type const& arg_thread, const iType& arg_count); TeamThreadRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread + ( member_type const& arg_thread , const iType & arg_start , const iType & arg_end ); @@ -252,20 +726,22 @@ struct TeamThreadRangeBoundariesStruct > //---------------------------------------------------------------------------- -template -struct ThreadVectorRangeBoundariesStruct > +template +struct ThreadVectorRangeBoundariesStruct > { - typedef iType index_type; + using index_type = iType; + using member_type = TaskExec; + const index_type start ; const index_type end ; const index_type increment ; - const TaskExec< Kokkos::Cuda > & thread; + const member_type& thread; #if defined( __CUDA_ARCH__ ) __device__ inline ThreadVectorRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const index_type& arg_count ) + ( member_type const& arg_thread, const index_type& arg_count ) : start( threadIdx.x ) , end(arg_count) , increment( blockDim.x ) @@ -274,9 +750,9 @@ struct ThreadVectorRangeBoundariesStruct > __device__ inline ThreadVectorRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const index_type& arg_begin, const index_type& arg_end ) + ( member_type const& arg_thread, const index_type& arg_begin, const index_type& arg_end ) : start( arg_begin + threadIdx.x ) - , end(arg_count) + , end(arg_end) , increment( blockDim.x ) , thread(arg_thread) {} @@ -284,10 +760,10 @@ struct ThreadVectorRangeBoundariesStruct > #else ThreadVectorRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const index_type& arg_count ); + ( member_type const& arg_thread, const index_type& arg_count ); ThreadVectorRangeBoundariesStruct - ( const TaskExec< Kokkos::Cuda > & arg_thread, const index_type& arg_begin, const index_type& arg_end); + ( member_type const& arg_thread, const index_type& arg_begin, const index_type& arg_end); #endif @@ -299,69 +775,69 @@ struct ThreadVectorRangeBoundariesStruct > namespace Kokkos { -template -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > > -TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( thread, count ); -} +//template +//KOKKOS_INLINE_FUNCTION +//Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > > +//TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread, const iType & count ) +//{ +// return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( thread, count ); +//} +// +//template +//KOKKOS_INLINE_FUNCTION +//Impl::TeamThreadRangeBoundariesStruct +// < typename std::common_type::type +// , Impl::TaskExec< Kokkos::Cuda > > +//TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread +// , const iType1 & begin, const iType2 & end ) +//{ +// typedef typename std::common_type< iType1, iType2 >::type iType; +// return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( +// thread, iType(begin), iType(end) ); +//} +// +//template +//KOKKOS_INLINE_FUNCTION +//Impl::ThreadVectorRangeBoundariesStruct > +//ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread +// , const iType & count ) +//{ +// return Impl::ThreadVectorRangeBoundariesStruct >(thread,count); +//} +// +//template +//KOKKOS_INLINE_FUNCTION +//Impl::ThreadVectorRangeBoundariesStruct > +//ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread +// , const iType & arg_begin +// , const iType & arg_end ) +//{ +// return Impl::ThreadVectorRangeBoundariesStruct >(thread,arg_begin,arg_end); +//} -template -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct - < typename std::common_type::type - , Impl::TaskExec< Kokkos::Cuda > > -TeamThreadRange( const Impl::TaskExec< Kokkos::Cuda > & thread - , const iType1 & begin, const iType2 & end ) -{ - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Cuda > >( - thread, iType(begin), iType(end) ); -} +// KOKKOS_INLINE_FUNCTION +// Impl::ThreadSingleStruct > +// PerTeam(const Impl::TaskExec< Kokkos::Cuda >& thread) +// { +// return Impl::ThreadSingleStruct >(thread); +// } -template -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread - , const iType & count ) -{ - return Impl::ThreadVectorRangeBoundariesStruct >(thread,count); -} - -template -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange( const Impl::TaskExec< Kokkos::Cuda > & thread - , const iType & arg_begin - , const iType & arg_end ) -{ - return Impl::ThreadVectorRangeBoundariesStruct >(thread,arg_begin,arg_end); -} - -KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct > -PerTeam(const Impl::TaskExec< Kokkos::Cuda >& thread) -{ - return Impl::ThreadSingleStruct >(thread); -} - -KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct > -PerThread(const Impl::TaskExec< Kokkos::Cuda >& thread) -{ - return Impl::VectorSingleStruct >(thread); -} +// KOKKOS_INLINE_FUNCTION +// Impl::VectorSingleStruct > +// PerThread(const Impl::TaskExec< Kokkos::Cuda >& thread) +// { +// return Impl::VectorSingleStruct >(thread); +// } /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. * * The range i=0..N-1 is mapped to all threads of the the calling thread team. * This functionality requires C++11 support. */ -template +template KOKKOS_INLINE_FUNCTION void parallel_for - ( const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries + ( const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries , const Lambda& lambda ) { @@ -370,10 +846,10 @@ void parallel_for } } -template< typename iType, class Lambda > +template< typename iType, class Lambda, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_for - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda) { for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { lambda(i); @@ -459,14 +935,14 @@ void parallel_reduce // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Lambda, typename ValueType > +template< typename iType, class Lambda, typename ValueType, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, + (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, ValueType& initialized_result) { - //TODO what is the point of creating this temporary? + //TODO @internal_documentation what is the point of creating this temporary? ValueType result = initialized_result; for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { lambda(i,result); @@ -487,15 +963,15 @@ void parallel_reduce } } -template< typename iType, class Lambda, typename ReducerType > +template< typename iType, class Lambda, typename ReducerType, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, + (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { typedef typename ReducerType::value_type ValueType; - //TODO what is the point of creating this temporary? + //TODO @internal_documentation what is the point of creating this temporary? ValueType result = ValueType(); reducer.init(result); @@ -549,10 +1025,10 @@ void parallel_reduce // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Lambda, typename ValueType > +template< typename iType, class Lambda, typename ValueType, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, ValueType& initialized_result) { @@ -576,10 +1052,10 @@ void parallel_reduce } } -template< typename iType, class Lambda, typename ReducerType > +template< typename iType, class Lambda, typename ReducerType, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { @@ -611,10 +1087,10 @@ void parallel_reduce // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Closure > +template< typename iType, class Closure, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_scan - (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, + (const Impl::TeamThreadRangeBoundariesStruct >& loop_boundaries, const Closure & closure ) { // Extract value_type from closure @@ -676,10 +1152,10 @@ void parallel_scan // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Closure > +template< typename iType, class Closure, class Scheduler > KOKKOS_INLINE_FUNCTION void parallel_scan - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, const Closure & closure ) { // Extract value_type from closure @@ -735,25 +1211,25 @@ void parallel_scan namespace Kokkos { - template + template KOKKOS_INLINE_FUNCTION - void single(const Impl::VectorSingleStruct >& , const FunctorType& lambda) { + void single(const Impl::VectorSingleStruct >& , const FunctorType& lambda) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0) lambda(); #endif } - template + template KOKKOS_INLINE_FUNCTION - void single(const Impl::ThreadSingleStruct >& , const FunctorType& lambda) { + void single(const Impl::ThreadSingleStruct >& , const FunctorType& lambda) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0 && threadIdx.y == 0) lambda(); #endif } - template + template KOKKOS_INLINE_FUNCTION - void single(const Impl::VectorSingleStruct >& s , const FunctorType& lambda, ValueType& val) { + void single(const Impl::VectorSingleStruct >& s , const FunctorType& lambda, ValueType& val) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0) lambda(val); if ( 1 < s.team_member.team_size() ) { @@ -762,9 +1238,9 @@ namespace Kokkos { #endif } - template + template KOKKOS_INLINE_FUNCTION - void single(const Impl::ThreadSingleStruct >& single_struct, const FunctorType& lambda, ValueType& val) { + void single(const Impl::ThreadSingleStruct >& single_struct, const FunctorType& lambda, ValueType& val) { #ifdef __CUDA_ARCH__ if(threadIdx.x == 0 && threadIdx.y == 0) { lambda(val); diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp index 18271a5146..587ad6001d 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp @@ -56,9 +56,9 @@ #include #include -#include +#include #include -#include +#include #include #if defined(KOKKOS_ENABLE_PROFILING) @@ -101,11 +101,13 @@ struct CudaJoinFunctor { * total available shared memory must be partitioned among teams. */ class CudaTeamMember { -private: +public: typedef Kokkos::Cuda execution_space ; typedef execution_space::scratch_memory_space scratch_memory_space ; +private: + mutable void * m_team_reduce ; scratch_memory_space m_team_shared ; int m_team_reduce_size ; @@ -221,12 +223,21 @@ public: KOKKOS_INLINE_FUNCTION typename std::enable_if< is_reducer< ReducerType >::value >::type team_reduce( ReducerType const & reducer ) const noexcept + { + team_reduce(reducer,reducer.reference()); + } + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer, typename ReducerType::value_type& value ) const noexcept { #ifdef __CUDA_ARCH__ - cuda_intra_block_reduction(reducer,blockDim.y); + cuda_intra_block_reduction(reducer,value,blockDim.y); #endif /* #ifdef __CUDA_ARCH__ */ } + //-------------------------------------------------------------------------- /** \brief Intra-team exclusive prefix sum with team_rank() ordering * with intra-team non-deterministic ordering accumulation. @@ -281,20 +292,28 @@ public: template< typename ReducerType > KOKKOS_INLINE_FUNCTION static typename std::enable_if< is_reducer< ReducerType >::value >::type - vector_reduce( ReducerType const & reducer ) + vector_reduce( ReducerType const & reducer ) { + vector_reduce(reducer,reducer.reference()); + } + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION static + typename std::enable_if< is_reducer< ReducerType >::value >::type + vector_reduce( ReducerType const & reducer, typename ReducerType::value_type& value ) { #ifdef __CUDA_ARCH__ if(blockDim.x == 1) return; // Intra vector lane shuffle reduction: - typename ReducerType::value_type tmp ( reducer.reference() ); + typename ReducerType::value_type tmp ( value ); + typename ReducerType::value_type tmp2 = tmp; unsigned mask = blockDim.x==32?0xffffffff:((1<>= 1 ) ; ) { - cuda_shfl_down( reducer.reference() , tmp , i , blockDim.x , mask ); - if ( (int)threadIdx.x < i ) { reducer.join( tmp , reducer.reference() ); } + cuda_shfl_down( tmp2 , tmp , i , blockDim.x , mask ); + if ( (int)threadIdx.x < i ) { reducer.join( tmp , tmp2 ); } } // Broadcast from root lane to all other lanes. @@ -302,7 +321,9 @@ public: // because floating point summation is not associative // and thus different threads could have different results. - cuda_shfl( reducer.reference() , tmp , 0 , blockDim.x , mask ); + cuda_shfl( tmp2 , tmp , 0 , blockDim.x , mask ); + value = tmp2; + reducer.reference() = tmp2; #endif } @@ -543,19 +564,37 @@ struct TeamThreadRangeBoundariesStruct { const iType end; KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count) + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, iType count) : member(thread_) , start( 0 ) , end( count ) {} KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_) + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, iType begin_, iType end_) : member(thread_) , start( begin_ ) , end( end_ ) {} }; +template +struct TeamVectorRangeBoundariesStruct { + typedef iType index_type; + const CudaTeamMember& member; + const iType start; + const iType end; + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count) + : member(thread_) + , start( 0 ) + , end( count ) {} + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_) + : member(thread_) + , start( begin_ ) + , end( end_ ) {} +}; template struct ThreadVectorRangeBoundariesStruct { @@ -564,19 +603,19 @@ struct ThreadVectorRangeBoundariesStruct { const index_type end; KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const index_type& count) + ThreadVectorRangeBoundariesStruct (const CudaTeamMember, index_type count) : start( static_cast(0) ), end( count ) {} KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const index_type& count) + ThreadVectorRangeBoundariesStruct (index_type count) : start( static_cast(0) ), end( count ) {} KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const index_type& arg_begin, const index_type& arg_end) + ThreadVectorRangeBoundariesStruct (const CudaTeamMember, index_type arg_begin, index_type arg_end) : start( arg_begin ), end( arg_end ) {} KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const index_type& arg_begin, const index_type& arg_end) + ThreadVectorRangeBoundariesStruct (index_type arg_begin, index_type arg_end) : start( arg_begin ), end( arg_end ) {} }; @@ -585,7 +624,7 @@ struct ThreadVectorRangeBoundariesStruct { template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember > -TeamThreadRange( const Impl::CudaTeamMember & thread, const iType & count ) { +TeamThreadRange( const Impl::CudaTeamMember & thread, iType count ) { return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, count ); } @@ -593,22 +632,38 @@ template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, Impl::CudaTeamMember > -TeamThreadRange( const Impl::CudaTeamMember & thread, const iType1 & begin, const iType2 & end ) { +TeamThreadRange( const Impl::CudaTeamMember & thread, iType1 begin, iType2 end ) { typedef typename std::common_type< iType1, iType2 >::type iType; return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, iType(begin), iType(end) ); } +template +KOKKOS_INLINE_FUNCTION +Impl::TeamVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember > +TeamVectorRange( const Impl::CudaTeamMember & thread, const iType & count ) { + return Impl::TeamVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, count ); +} + +template< typename iType1, typename iType2 > +KOKKOS_INLINE_FUNCTION +Impl::TeamVectorRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::CudaTeamMember > +TeamVectorRange( const Impl::CudaTeamMember & thread, const iType1 & begin, const iType2 & end ) { + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, iType(begin), iType(end) ); +} + template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) { +ThreadVectorRange(const Impl::CudaTeamMember& thread, iType count) { return Impl::ThreadVectorRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& arg_begin, const iType& arg_end) { +ThreadVectorRange(const Impl::CudaTeamMember& thread, iType arg_begin, iType arg_end) { return Impl::ThreadVectorRangeBoundariesStruct(thread,arg_begin,arg_end); } @@ -667,16 +722,16 @@ parallel_reduce ) { #ifdef __CUDA_ARCH__ - - reducer.init( reducer.reference() ); + typename ReducerType::value_type value; + reducer.init( value ); for( iType i = loop_boundaries.start + threadIdx.y ; i < loop_boundaries.end ; i += blockDim.y ) { - closure(i,reducer.reference()); + closure(i,value); } - loop_boundaries.member.team_reduce( reducer ); + loop_boundaries.member.team_reduce( reducer, value ); #endif } @@ -701,19 +756,88 @@ parallel_reduce ) { #ifdef __CUDA_ARCH__ - - Kokkos::Sum reducer(result); + ValueType val; + Kokkos::Sum reducer(val); reducer.init( reducer.reference() ); for( iType i = loop_boundaries.start + threadIdx.y ; i < loop_boundaries.end ; i += blockDim.y ) { - closure(i,result); + closure(i,val); } - loop_boundaries.member.team_reduce( reducer ); + loop_boundaries.member.team_reduce( reducer , val); + result = reducer.reference(); +#endif +} +template +KOKKOS_INLINE_FUNCTION +void parallel_for + ( const Impl::TeamVectorRangeBoundariesStruct& + loop_boundaries + , const Closure & closure + ) +{ + #ifdef __CUDA_ARCH__ + for( iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.y*blockDim.x ) + closure(i); + #endif +} + +template< typename iType, class Closure, class ReducerType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +parallel_reduce + ( const Impl::TeamVectorRangeBoundariesStruct & + loop_boundaries + , const Closure & closure + , const ReducerType & reducer + ) +{ +#ifdef __CUDA_ARCH__ + typename ReducerType::value_type value; + reducer.init( value ); + + for( iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.y * blockDim.x ) { + closure(i,value); + } + + loop_boundaries.member.vector_reduce( reducer, value ); + loop_boundaries.member.team_reduce( reducer, value ); +#endif +} + +template< typename iType, class Closure, typename ValueType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< ! Kokkos::is_reducer< ValueType >::value >::type +parallel_reduce + ( const Impl::TeamVectorRangeBoundariesStruct & + loop_boundaries + , const Closure & closure + , ValueType & result + ) +{ +#ifdef __CUDA_ARCH__ + ValueType val; + Kokkos::Sum reducer(val); + + reducer.init( reducer.reference() ); + + for( iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.y * blockDim.x ) { + closure(i,val); + } + + loop_boundaries.member.vector_reduce( reducer ); + loop_boundaries.member.team_reduce( reducer ); + result = reducer.reference(); #endif } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index af2aff8b35..2fe9d8ccf7 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -241,7 +241,7 @@ class ViewDataHandle< Traits , sizeof(typename Traits::const_value_type) == 16 ) && // Random access trait - ( Traits::memory_traits::RandomAccess != 0 ) + ( Traits::memory_traits::is_random_access != 0 ) )>::type > { public: diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp index 94e293d7c7..9c0ac470c8 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp @@ -102,9 +102,8 @@ public: const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 ); const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block ); const int shared = 0 ; - const cudaStream_t stream = 0 ; - Kokkos::Impl::CudaParallelLaunch(*this, grid, block, shared, stream); + Kokkos::Impl::CudaParallelLaunch(*this, grid, block, shared, Cuda().impl_internal_space_instance() , false ); } inline diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp b/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp new file mode 100644 index 0000000000..da9783467c --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX.cpp @@ -0,0 +1,152 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#include + +#ifdef KOKKOS_ENABLE_HPX +#include + +#include + +namespace Kokkos { +namespace Experimental { + +bool HPX::m_hpx_initialized = false; +Kokkos::Impl::thread_buffer HPX::m_buffer; +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) +hpx::future HPX::m_future = hpx::make_ready_future(); +#endif + +int HPX::concurrency() { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + return hpx::threads::hardware_concurrency(); + } else { + if (hpx::threads::get_self_ptr() == nullptr) { + return hpx::resource::get_thread_pool(0).get_os_thread_count(); + } else { + return hpx::this_thread::get_pool()->get_os_thread_count(); + } + } +} + +void HPX::impl_initialize(int thread_count) { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + std::vector config = { + "hpx.os_threads=" + std::to_string(thread_count), +#ifdef KOKKOS_DEBUG + "--hpx:attach-debugger=exception", +#endif + }; + int argc_hpx = 1; + char name[] = "kokkos_hpx"; + char *argv_hpx[] = {name, nullptr}; + hpx::start(nullptr, argc_hpx, argv_hpx, config); + + // NOTE: Wait for runtime to start. hpx::start returns as soon as + // possible, meaning some operations are not allowed immediately + // after hpx::start. Notably, hpx::stop needs state_running. This + // needs to be fixed in HPX itself. + + // Get runtime pointer again after it has been started. + rt = hpx::get_runtime_ptr(); + hpx::util::yield_while( + [rt]() { return rt->get_state() < hpx::state_running; }); + + m_hpx_initialized = true; + } +} + +void HPX::impl_initialize() { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + std::vector config = { +#ifdef KOKKOS_DEBUG + "--hpx:attach-debugger=exception", +#endif + }; + int argc_hpx = 1; + char name[] = "kokkos_hpx"; + char *argv_hpx[] = {name, nullptr}; + hpx::start(nullptr, argc_hpx, argv_hpx, config); + + // NOTE: Wait for runtime to start. hpx::start returns as soon as + // possible, meaning some operations are not allowed immediately + // after hpx::start. Notably, hpx::stop needs state_running. This + // needs to be fixed in HPX itself. + + // Get runtime pointer again after it has been started. + rt = hpx::get_runtime_ptr(); + hpx::util::yield_while( + [rt]() { return rt->get_state() < hpx::state_running; }); + + m_hpx_initialized = true; + } +} + +bool HPX::impl_is_initialized() noexcept { + hpx::runtime *rt = hpx::get_runtime_ptr(); + return rt != nullptr; +} + +void HPX::impl_finalize() { + if (m_hpx_initialized) { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt != nullptr) { + hpx::apply([]() { hpx::finalize(); }); + hpx::stop(); + } else { + Kokkos::abort("Kokkos::Experimental::HPX::impl_finalize: Kokkos started " + "HPX but something else already stopped HPX\n"); + } + } +} + +} // namespace Experimental +} // namespace Kokkos + +#else +void KOKKOS_CORE_SRC_IMPL_HPX_PREVENT_LINK_ERROR() {} +#endif //#ifdef KOKKOS_ENABLE_HPX diff --git a/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp similarity index 76% rename from lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp rename to lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp index d001e0a88c..df7c403685 100644 --- a/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.cpp @@ -41,38 +41,25 @@ //@HEADER */ -#ifndef KOKKOS_STATICASSERT_HPP -#define KOKKOS_STATICASSERT_HPP +#include +#if defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_TASKDAG) + +#include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { -template < bool , class T = void > -struct StaticAssert ; - -template< class T > -struct StaticAssert< true , T > { - typedef T type ; - static const bool value = true ; -}; - -template < class A , class B > -struct StaticAssertSame ; - -template < class A > -struct StaticAssertSame { typedef A type ; }; - -template < class A , class B > -struct StaticAssertAssignable ; - -template < class A > -struct StaticAssertAssignable { typedef A type ; }; - -template < class A > -struct StaticAssertAssignable< const A , A > { typedef const A type ; }; +template class TaskQueue; } // namespace Impl } // namespace Kokkos -#endif /* KOKKOS_STATICASSERT_HPP */ - +#else +void KOKKOS_CORE_SRC_IMPL_HPX_TASK_PREVENT_LINK_ERROR() {} +#endif // #if defined( KOKKOS_ENABLE_HPX ) && defined( KOKKOS_ENABLE_TASKDAG ) diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp new file mode 100644 index 0000000000..c3a14efee6 --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_Task.hpp @@ -0,0 +1,298 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_TASK_HPP +#define KOKKOS_HPX_TASK_HPP + +#include +#if defined(KOKKOS_ENABLE_HPX) && defined(KOKKOS_ENABLE_TASKDAG) + +#include + +#include + +#include +#include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +class TaskQueueSpecialization< + SimpleTaskScheduler> { +public: + using execution_space = Kokkos::Experimental::HPX; + using scheduler_type = + SimpleTaskScheduler; + using member_type = + TaskTeamMemberAdapter; + using memory_space = Kokkos::HostSpace; + + static void execute(scheduler_type const &scheduler) { + // NOTE: We create an instance so that we can use dispatch_execute_task. + // This is not necessarily the most efficient, but can be improved later. + TaskQueueSpecialization task_queue; + task_queue.scheduler = &scheduler; + Kokkos::Impl::dispatch_execute_task(&task_queue); + Kokkos::Experimental::HPX().fence(); + } + + // Must provide task queue execution function + void execute_task() const { + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + using task_base_type = typename scheduler_type::task_base_type; + + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, 512); + + auto &queue = scheduler->queue(); + + counting_semaphore sem(0); + + for (int thread = 0; thread < num_worker_threads; ++thread) { + apply([this, &sem, &queue, &buffer, num_worker_threads, thread]() { + // NOTE: This implementation has been simplified based on the + // assumption that team_size = 1. The HPX backend currently only + // supports a team size of 1. + std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); + + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()); + HPXTeamMember member(TeamPolicyInternal( + Kokkos::Experimental::HPX(), num_worker_threads, 1), + 0, t, buffer.get(t), 512); + + member_type single_exec(*scheduler, member); + member_type &team_exec = single_exec; + + auto &team_scheduler = team_exec.scheduler(); + auto current_task = OptionalRef(nullptr); + + while (!queue.is_done()) { + current_task = + queue.pop_ready_task(team_scheduler.team_scheduler_info()); + + if (current_task) { + KOKKOS_ASSERT(current_task->is_single_runnable() || + current_task->is_team_runnable()); + current_task->as_runnable_task().run(single_exec); + queue.complete((*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info()); + } + } + + sem.signal(1); + }); + } + + sem.wait(num_worker_threads); + } + + static uint32_t get_max_team_count(execution_space const &espace) { + return static_cast(espace.concurrency()); + } + + template + static void get_function_pointer(typename TaskType::function_type &ptr, + typename TaskType::destroy_type &dtor) { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } + +private: + const scheduler_type *scheduler; +}; + +template +class TaskQueueSpecializationConstrained< + Scheduler, typename std::enable_if< + std::is_same::value>::type> { +public: + using execution_space = Kokkos::Experimental::HPX; + using scheduler_type = Scheduler; + using member_type = + TaskTeamMemberAdapter; + using memory_space = Kokkos::HostSpace; + + static void + iff_single_thread_recursive_execute(scheduler_type const &scheduler) { + using task_base_type = typename scheduler_type::task_base; + using queue_type = typename scheduler_type::queue_type; + + if (1 == Kokkos::Experimental::HPX::concurrency()) { + task_base_type *const end = (task_base_type *)task_base_type::EndTag; + task_base_type *task = end; + + HPXTeamMember member(TeamPolicyInternal( + Kokkos::Experimental::HPX(), 1, 1), + 0, 0, nullptr, 0); + member_type single_exec(scheduler, member); + + do { + task = end; + + // Loop by priority and then type + for (int i = 0; i < queue_type::NumQueue && end == task; ++i) { + for (int j = 0; j < 2 && end == task; ++j) { + task = + queue_type::pop_ready_task(&scheduler.m_queue->m_ready[i][j]); + } + } + + if (end == task) + break; + + (*task->m_apply)(task, &single_exec); + + scheduler.m_queue->complete(task); + + } while (true); + } + } + + static void execute(scheduler_type const &scheduler) { + // NOTE: We create an instance so that we can use dispatch_execute_task. + // This is not necessarily the most efficient, but can be improved later. + TaskQueueSpecializationConstrained task_queue; + task_queue.scheduler = &scheduler; + Kokkos::Impl::dispatch_execute_task(&task_queue); + Kokkos::Experimental::HPX().fence(); + } + + // Must provide task queue execution function + void execute_task() const { + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + using task_base_type = typename scheduler_type::task_base; + using queue_type = typename scheduler_type::queue_type; + + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + static task_base_type *const end = (task_base_type *)task_base_type::EndTag; + constexpr task_base_type *no_more_tasks_sentinel = nullptr; + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, 512); + + auto &queue = scheduler->queue(); + queue.initialize_team_queues(num_worker_threads); + + counting_semaphore sem(0); + + for (int thread = 0; thread < num_worker_threads; ++thread) { + apply([this, &sem, &buffer, num_worker_threads, thread]() { + // NOTE: This implementation has been simplified based on the assumption + // that team_size = 1. The HPX backend currently only supports a team + // size of 1. + std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); + + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()); + HPXTeamMember member( + TeamPolicyInternal( + Kokkos::Experimental::HPX(), num_worker_threads, 1), + 0, t, buffer.get(t), 512); + + member_type single_exec(*scheduler, member); + member_type &team_exec = single_exec; + + auto &team_queue = team_exec.scheduler().queue(); + task_base_type *task = no_more_tasks_sentinel; + + do { + if (task != no_more_tasks_sentinel && task != end) { + team_queue.complete(task); + } + + if (*((volatile int *)&team_queue.m_ready_count) > 0) { + task = end; + for (int i = 0; i < queue_type::NumQueue && end == task; ++i) { + for (int j = 0; j < 2 && end == task; ++j) { + task = queue_type::pop_ready_task(&team_queue.m_ready[i][j]); + } + } + } else { + task = team_queue.attempt_to_steal_task(); + } + + if (task != no_more_tasks_sentinel && task != end) { + (*task->m_apply)(task, &single_exec); + } + } while (task != no_more_tasks_sentinel); + + sem.signal(1); + }); + } + + sem.wait(num_worker_threads); + } + + template + static void get_function_pointer(typename TaskType::function_type &ptr, + typename TaskType::destroy_type &dtor) { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } + +private: + const scheduler_type *scheduler; +}; + +extern template class TaskQueue< + Kokkos::Experimental::HPX, + typename Kokkos::Experimental::HPX::memory_space>; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_HPX_TASK_HPP */ diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp new file mode 100644 index 0000000000..bbc1b33bf9 --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIAvail.hpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_VIEWETIAVAIL_HPP +#define KOKKOS_HPX_VIEWETIAVAIL_HPP + +namespace Kokkos { +namespace Impl { +#define KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE Kokkos::Experimental::HPX + +#include + +#undef KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE +} +} +#endif + diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp new file mode 100644 index 0000000000..aa1c2f1518 --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_ViewCopyETIDecl.hpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_VIEWETIDECL_HPP +#define KOKKOS_HPX_VIEWETIDECL_HPP + +namespace Kokkos { +namespace Impl { +#define KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE Kokkos::Experimental::HPX + +#include + +#undef KOKKOS_IMPL_VIEWCOPY_ETI_AVAIL_EXECSPACE +} +} +#endif + diff --git a/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp b/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp new file mode 100644 index 0000000000..4dd28dd994 --- /dev/null +++ b/lib/kokkos/core/src/HPX/Kokkos_HPX_WorkGraphPolicy.hpp @@ -0,0 +1,116 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_WORKGRAPHPOLICY_HPP +#define KOKKOS_HPX_WORKGRAPHPOLICY_HPP + +#include +#include + +namespace Kokkos { +namespace Impl { + +template +class ParallelFor, + Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::WorkGraphPolicy; + using WorkTag = typename Policy::work_tag; + + Policy m_policy; + FunctorType m_functor; + + template + typename std::enable_if::value>::type + execute_functor(const std::int32_t w) const noexcept { + m_functor(w); + } + + template + typename std::enable_if::value>::type + execute_functor(const std::int32_t w) const noexcept { + const TagType t{}; + m_functor(t, w); + } + +public: + void execute() const { + dispatch_execute_task(this); + Kokkos::Experimental::HPX().fence(); + } + + void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + + for (int thread = 0; thread < num_worker_threads; ++thread) { + apply([this, &sem]() { + std::int32_t w = m_policy.pop_work(); + while (w != Policy::COMPLETED_TOKEN) { + if (w != Policy::END_TOKEN) { + execute_functor(w); + m_policy.completed_work(w); + } + + w = m_policy.pop_work(); + } + + sem.signal(1); + }); + } + + sem.wait(num_worker_threads); + } + + inline ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) + : m_policy(arg_policy), m_functor(arg_functor) {} +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* #define KOKKOS_HPX_WORKGRAPHPOLICY_HPP */ diff --git a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp index fb0d6cde84..1972aa485b 100644 --- a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp +++ b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -125,6 +125,8 @@ struct MDRangePolicy using traits = Kokkos::Impl::PolicyTraits; using range_policy = RangePolicy; + typename traits::execution_space m_space; + using impl_range_policy = RangePolicy< typename traits::execution_space , typename traits::schedule_type , typename traits::index_type @@ -132,6 +134,9 @@ struct MDRangePolicy typedef MDRangePolicy execution_policy; // needed for is_execution_space interrogation + template + friend struct MDRangePolicy; + static_assert( !std::is_same::value , "Kokkos Error: MD iteration pattern not defined" ); @@ -192,13 +197,54 @@ struct MDRangePolicy static constexpr int Right = static_cast( Iterate::Right ); static constexpr int Left = static_cast( Iterate::Left ); + KOKKOS_INLINE_FUNCTION const typename traits::execution_space & space() const { return m_space ; } + template < typename LT , typename UT , typename TT = array_index_type > + MDRangePolicy(std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) + : m_space() { + init(lower, upper, tile); + } + + template < typename LT , typename UT , typename TT = array_index_type > + MDRangePolicy(const typename traits::execution_space & work_space, + std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) + : m_space( work_space ) { + init(lower, upper, tile); + } + MDRangePolicy( point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} ) - : m_lower(lower) + : m_space() + , m_lower(lower) , m_upper(upper) , m_tile(tile) , m_num_tiles(1) - , m_prod_tile_dims(1) - { + , m_prod_tile_dims(1) { + init(); + } + + MDRangePolicy( const typename traits::execution_space & work_space, + point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} ) + : m_space( work_space ) + , m_lower(lower) + , m_upper(upper) + , m_tile(tile) + , m_num_tiles(1) + , m_prod_tile_dims(1) { + init(); + } + + template + MDRangePolicy( const MDRangePolicy p ): + m_space(p.m_space), + m_lower(p.m_lower), + m_upper(p.m_upper), + m_tile(p.m_tile), + m_tile_end(p.m_tile_end), + m_num_tiles(p.m_num_tiles), + m_prod_tile_dims(p.m_prod_tile_dims) {} + +private: + + void init() { // Host if ( true #if defined(KOKKOS_ENABLE_CUDA) @@ -211,7 +257,7 @@ struct MDRangePolicy { index_type span; for (int i=0; i 0)) ) @@ -311,11 +357,9 @@ struct MDRangePolicy #endif } - template < typename LT , typename UT , typename TT = array_index_type > - MDRangePolicy( std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) + void init( std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} ) { - if(static_cast(m_lower.size()) != rank || static_cast(m_upper.size()) != rank) Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size"); @@ -589,5 +633,26 @@ void md_parallel_reduce( const std::string& str } } // namespace Kokkos::Experimental #endif +namespace Kokkos { +namespace Experimental { +namespace Impl { + +template +struct PolicyPropertyAdaptor,MDRangePolicy> { + typedef MDRangePolicy policy_in_t; + typedef MDRangePolicy> policy_out_t; +}; + +} +} +} + + #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP diff --git a/lib/kokkos/core/src/Kokkos_Atomic.hpp b/lib/kokkos/core/src/Kokkos_Atomic.hpp index cf0f25969d..c2268bd35f 100644 --- a/lib/kokkos/core/src/Kokkos_Atomic.hpp +++ b/lib/kokkos/core/src/Kokkos_Atomic.hpp @@ -90,6 +90,7 @@ #if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \ ! defined( KOKKOS_ENABLE_INTEL_ATOMICS ) && \ ! defined( KOKKOS_ENABLE_OPENMP_ATOMICS ) && \ + ! defined( KOKKOS_ENABLE_STD_ATOMICS ) && \ ! defined( KOKKOS_ENABLE_SERIAL_ATOMICS ) // Compiling for non-Cuda atomic implementation has not been pre-selected. @@ -168,6 +169,12 @@ const char * atomic_query_version() } // namespace Kokkos +//---------------------------------------------------------------------------- +// Atomic Memory Orders +// +// Implements Strongly-typed analogs of C++ standard memory orders +#include "impl/Kokkos_Atomic_Memory_Order.hpp" + #if defined( KOKKOS_ENABLE_ROCM ) namespace Kokkos { namespace Impl { @@ -287,6 +294,14 @@ void unlock_address_rocm_space(void* ptr); #ifndef _WIN32 #include "impl/Kokkos_Atomic_Generic.hpp" #endif + +//---------------------------------------------------------------------------- +// Provide atomic loads and stores with memory order semantics + +#include "impl/Kokkos_Atomic_Load.hpp" +#include "impl/Kokkos_Atomic_Store.hpp" + + //---------------------------------------------------------------------------- // This atomic-style macro should be an inlined function, not a macro diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp index 08cbba3b31..a3ada5d55e 100644 --- a/lib/kokkos/core/src/Kokkos_Complex.hpp +++ b/lib/kokkos/core/src/Kokkos_Complex.hpp @@ -631,8 +631,10 @@ RealType real (const complex& x) { template KOKKOS_INLINE_FUNCTION RealType abs (const complex& x) { - // FIXME (mfh 31 Oct 2014) Scale to avoid unwarranted overflow. - return std::sqrt (real (x) * real (x) + imag (x) * imag (x)); +#ifndef __CUDA_ARCH__ + using std::hypot; +#endif + return hypot(x.real(),x.imag()); } //! Power of a complex number diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp index 117469b0a2..98ae141de4 100644 --- a/lib/kokkos/core/src/Kokkos_Concepts.hpp +++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp @@ -79,6 +79,45 @@ struct IndexType using type = T; }; +namespace Experimental { + struct WorkItemProperty { + template + struct ImplWorkItemProperty { + static const unsigned value = Property; + using work_item_property = ImplWorkItemProperty; + }; + + constexpr static const ImplWorkItemProperty<0> None = ImplWorkItemProperty<0>(); + constexpr static const ImplWorkItemProperty<1> HintLightWeight = ImplWorkItemProperty<1>(); + constexpr static const ImplWorkItemProperty<2> HintHeavyWeight = ImplWorkItemProperty<2>(); + constexpr static const ImplWorkItemProperty<4> HintRegular = ImplWorkItemProperty<4>(); + constexpr static const ImplWorkItemProperty<8> HintIrregular = ImplWorkItemProperty<8>(); + typedef ImplWorkItemProperty<0> None_t; + typedef ImplWorkItemProperty<1> HintLightWeight_t; + typedef ImplWorkItemProperty<2> HintHeavyWeight_t; + typedef ImplWorkItemProperty<4> HintRegular_t; + typedef ImplWorkItemProperty<8> HintIrregular_t; + }; + +template +inline constexpr WorkItemProperty::ImplWorkItemProperty operator | + (WorkItemProperty::ImplWorkItemProperty, WorkItemProperty::ImplWorkItemProperty) { + return WorkItemProperty::ImplWorkItemProperty(); +} + +template +inline constexpr WorkItemProperty::ImplWorkItemProperty operator & + (WorkItemProperty::ImplWorkItemProperty, WorkItemProperty::ImplWorkItemProperty) { + return WorkItemProperty::ImplWorkItemProperty(); +} + +template +inline constexpr bool operator == (WorkItemProperty::ImplWorkItemProperty, WorkItemProperty::ImplWorkItemProperty) { + return pv1 == pv2; +} + +} + /**\brief Specify Launch Bounds for CUDA execution. * * If no launch bounds specified then do not set launch bounds. @@ -105,9 +144,13 @@ namespace Kokkos { template< typename T > struct is_ ## CONCEPT { \ private: \ template< typename , typename = std::true_type > struct have : std::false_type {}; \ - template< typename U > struct have::type, \ - typename std::remove_cv::type \ + template< typename U > struct have::type, \ + typename std::remove_cv::type \ + >::type> : std::true_type {}; \ + template< typename U > struct have::type, \ + typename std::remove_cv::type \ >::type> : std::true_type {}; \ public: \ enum { value = is_ ## CONCEPT::template have::value }; \ @@ -121,6 +164,9 @@ KOKKOS_IMPL_IS_CONCEPT( execution_space ) KOKKOS_IMPL_IS_CONCEPT( execution_policy ) KOKKOS_IMPL_IS_CONCEPT( array_layout ) KOKKOS_IMPL_IS_CONCEPT( reducer ) +namespace Experimental { +KOKKOS_IMPL_IS_CONCEPT( work_item_property ) +} namespace Impl { @@ -138,6 +184,8 @@ KOKKOS_IMPL_IS_CONCEPT( iteration_pattern ) KOKKOS_IMPL_IS_CONCEPT( schedule_type ) KOKKOS_IMPL_IS_CONCEPT( index_type ) KOKKOS_IMPL_IS_CONCEPT( launch_bounds ) +KOKKOS_IMPL_IS_CONCEPT( thread_team_member ) +KOKKOS_IMPL_IS_CONCEPT( host_thread_team_member ) } diff --git a/lib/kokkos/core/src/Kokkos_CopyViews.hpp b/lib/kokkos/core/src/Kokkos_CopyViews.hpp index 31605c9d39..f919fdb755 100644 --- a/lib/kokkos/core/src/Kokkos_CopyViews.hpp +++ b/lib/kokkos/core/src/Kokkos_CopyViews.hpp @@ -186,9 +186,9 @@ struct ViewFill typedef Kokkos::RangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-1D",policy_type(0,a.extent(0)),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -206,10 +206,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-2D", policy_type({0,0},{a.extent(0),a.extent(1)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -227,10 +227,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-3D", policy_type({0,0,0},{a.extent(0),a.extent(1),a.extent(2)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -248,10 +248,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-4D", policy_type({0,0,0,0},{a.extent(0),a.extent(1),a.extent(2),a.extent(3)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -269,10 +269,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-5D", policy_type({0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2),a.extent(3),a.extent(4)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -290,10 +290,10 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-6D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2),a.extent(3),a.extent(4),a.extent(5)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -311,11 +311,11 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-7D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2),a.extent(3), a.extent(5),a.extent(6)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -335,11 +335,11 @@ struct ViewFill typedef Kokkos::MDRangePolicy> policy_type; ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_):a(a_),val(val_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-8D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(3), a.extent(5),a.extent(6),a.extent(7)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -437,10 +437,10 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-1D", policy_type(0,a.extent(0)),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -459,10 +459,10 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-2D", policy_type({0,0},{a.extent(0),a.extent(1)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -482,10 +482,10 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-3D", policy_type({0,0,0},{a.extent(0),a.extent(1),a.extent(2)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -505,11 +505,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-4D", policy_type({0,0,0,0},{a.extent(0),a.extent(1),a.extent(2), a.extent(3)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -530,11 +530,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-5D", policy_type({0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2), a.extent(3),a.extent(4)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -555,11 +555,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-6D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(2), a.extent(3),a.extent(4),a.extent(5)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -581,11 +581,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-7D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(3), a.extent(4),a.extent(5),a.extent(6)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -607,11 +607,11 @@ struct ViewCopy> policy_type; ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_):a(a_),b(b_) { - ExecSpace::fence(); + ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewCopy-8D", policy_type({0,0,0,0,0,0},{a.extent(0),a.extent(1),a.extent(3), a.extent(5),a.extent(6),a.extent(7)}),*this); - ExecSpace::fence(); + ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -1538,6 +1538,779 @@ void deep_copy } } +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +namespace Experimental { +/** \brief A local deep copy between views of the default specialization, compatible type, + * same non-zero rank. + */ +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(const TeamType& team, const View & dst, const View & src) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, src.span()), [&] (const int& i) { + dst.data()[i] = src.data()[i]; + }); +} +//---------------------------------------------------------------------------- +template< class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(const View & dst, const View & src) { + + for(size_t i=0;i +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 1 && + unsigned(ViewTraits::rank) == 1 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0); + + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + dst(i) = src(i); + }); + team.team_barrier(); +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 2 && + unsigned(ViewTraits::rank) == 2 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int i1 = i/dst.extent(0); + dst(i0,i1) = src(i0,i1); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 3 && + unsigned(ViewTraits::rank) == 3 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + int i2 = itmp/dst.extent(1); + dst(i0,i1,i2) = src(i0,i1,i2); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 4 && + unsigned(ViewTraits::rank) == 4 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + int i3 = itmp/dst.extent(2); + dst(i0,i1,i2,i3) = src(i0,i1,i2,i3); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 5 && + unsigned(ViewTraits::rank) == 5 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + int i4 = itmp/dst.extent(3); + dst(i0,i1,i2,i3,i4) = src(i0,i1,i2,i3,i4); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 6 && + unsigned(ViewTraits::rank) == 6 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4)*dst.extent(5); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + itmp = itmp/dst.extent(3); + int i4 = itmp%dst.extent(4); + int i5 = itmp/dst.extent(4); + dst(i0,i1,i2,i3,i4,i5) = src(i0,i1,i2,i3,i4,i5); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 7 && + unsigned(ViewTraits::rank) == 7 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4)*dst.extent(5)*dst.extent(6); + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,src); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + itmp = itmp/dst.extent(3); + int i4 = itmp%dst.extent(4); + itmp = itmp/dst.extent(4); + int i5 = itmp%dst.extent(5); + int i6 = itmp/dst.extent(5); + dst(i0,i1,i2,i3,i4,i5,i6) = src(i0,i1,i2,i3,i4,i5,i6); + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class DT , class ... DP , class ST , class ... SP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 1 && + unsigned(ViewTraits::rank) == 1 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0); + + + for(size_t i=0;i +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 2 && + unsigned(ViewTraits::rank) == 2 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 3 && + unsigned(ViewTraits::rank) == 3 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 4 && + unsigned(ViewTraits::rank) == 4 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 5 && + unsigned(ViewTraits::rank) == 5 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 6 && + unsigned(ViewTraits::rank) == 6 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + const View & src, + typename std::enable_if<( unsigned(ViewTraits::rank) == 7 && + unsigned(ViewTraits::rank) == 7 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() && src.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,src); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(const TeamType& team, const View & dst, typename ViewTraits::const_value_type & value) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, dst.span()), [&] (const int& i) { + dst.data()[i] = value; + }); +} +//---------------------------------------------------------------------------- +template< class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy_contiguous(const View & dst, typename ViewTraits::const_value_type & value) { + + for(size_t i=0;i +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 1 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0); + + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + dst(i) = value; + }); + team.team_barrier(); +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 2 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int i1 = i/dst.extent(0); + dst(i0,i1) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 3 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + int i2 = itmp/dst.extent(1); + dst(i0,i1,i2) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 4 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + int i3 = itmp/dst.extent(2); + dst(i0,i1,i2,i3) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 5 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + int i4 = itmp/dst.extent(3); + dst(i0,i1,i2,i3,i4) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 6 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4)*dst.extent(5); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + itmp = itmp/dst.extent(3); + int i4 = itmp%dst.extent(4); + int i5 = itmp/dst.extent(4); + dst(i0,i1,i2,i3,i4,i5) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class TeamType, class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const TeamType& team, const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 7 )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0)*dst.extent(1)*dst.extent(2)*dst.extent(3)*dst.extent(4)*dst.extent(5)*dst.extent(6); + + if ( dst.span_is_contiguous() ) { + team.team_barrier(); + local_deep_copy_contiguous(team,dst,value); + team.team_barrier(); + } else { + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [&] (const int& i) { + int i0 = i%dst.extent(0); + int itmp = i/dst.extent(0); + int i1 = itmp%dst.extent(1); + itmp = itmp/dst.extent(1); + int i2 = itmp%dst.extent(2); + itmp = itmp/dst.extent(2); + int i3 = itmp%dst.extent(3); + itmp = itmp/dst.extent(3); + int i4 = itmp%dst.extent(4); + itmp = itmp/dst.extent(4); + int i5 = itmp%dst.extent(5); + int i6 = itmp/dst.extent(5); + dst(i0,i1,i2,i3,i4,i5,i6) = value; + }); + team.team_barrier(); + } +} +//---------------------------------------------------------------------------- +template< class DT , class ... DP > +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 1 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + const size_t N = dst.extent(0); + + + for(size_t i=0;i +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 2 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 3 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 4 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 5 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 6 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0 +void KOKKOS_INLINE_FUNCTION local_deep_copy (const View & dst, + typename ViewTraits::const_value_type & value, + typename std::enable_if<( unsigned(ViewTraits::rank) == 7 + )>::type * = 0 ) +{ + if( dst.data() == nullptr ) { + return; + } + + if ( dst.span_is_contiguous() ) { + local_deep_copy_contiguous(dst,value); + } else { + + for(size_t i0=0;i0::value_type >::value , "deep_copy requires non-const type" ); - ExecSpace::fence(); + ExecSpace().fence(); typedef typename View::uniform_runtime_nomemspace_type ViewTypeUniform; Kokkos::Impl::ViewFill< ViewTypeUniform >( dst , value ); - ExecSpace::fence(); + ExecSpace().fence(); } /** \brief Deep copy into a value in Host memory from a view. */ @@ -2184,6 +2957,25 @@ create_mirror_view_and_copy(const Space& , const Kokkos::View & src deep_copy(mirror, src); return mirror; } + +// Create a mirror view in a new space without initializing (specialization for same space) +template +typename Impl::MirrorViewType::view_type +create_mirror_view(const Space& , const Kokkos::View & src + , Kokkos::Impl::WithoutInitializing_t + , typename std::enable_if::is_same_memspace>::type* = 0 ) { + return src; +} + +// Create a mirror view in a new space without initializing (specialization for different space) +template +typename Impl::MirrorViewType::view_type +create_mirror_view(const Space& , const Kokkos::View & src + , Kokkos::Impl::WithoutInitializing_t + , typename std::enable_if::is_same_memspace>::type* = 0 ) { + using Mirror = typename Impl::MirrorViewType::view_type; + return Mirror(Kokkos::ViewAllocateWithoutInitializing(src.label()), src.layout()); +} } /* namespace Kokkos */ diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp index 4d0625ee1b..9fbba0abfa 100644 --- a/lib/kokkos/core/src/Kokkos_Core.hpp +++ b/lib/kokkos/core/src/Kokkos_Core.hpp @@ -66,6 +66,10 @@ #include #endif +#if defined( KOKKOS_ENABLE_HPX ) +#include +#endif + #if defined( KOKKOS_ENABLE_THREADS ) #include #endif @@ -87,6 +91,7 @@ #include #include #include +#include #include diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp index 150865d0f5..55c6a5494a 100644 --- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp +++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -100,6 +100,12 @@ class Serial; ///< Execution space main process on CPU. class Qthreads; ///< Execution space with Qthreads back-end. #endif +#if defined( KOKKOS_ENABLE_HPX ) +namespace Experimental { +class HPX; ///< Execution space with HPX back-end. +} +#endif + #if defined( KOKKOS_ENABLE_THREADS ) class Threads; ///< Execution space with pthreads back-end. #endif @@ -156,6 +162,8 @@ namespace Kokkos { typedef Threads DefaultExecutionSpace; //#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) // typedef Qthreads DefaultExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX ) + typedef Kokkos::Experimental::HPX DefaultExecutionSpace; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) typedef Serial DefaultExecutionSpace; #else @@ -176,6 +184,8 @@ namespace Kokkos { typedef Threads DefaultHostExecutionSpace; //#elif defined( KOKKOS_ENABLE_QTHREADS ) // typedef Qthreads DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_HPX ) + typedef Kokkos::Experimental::HPX DefaultHostExecutionSpace; #elif defined( KOKKOS_ENABLE_SERIAL ) typedef Serial DefaultHostExecutionSpace; #else diff --git a/lib/kokkos/core/src/Kokkos_Crs.hpp b/lib/kokkos/core/src/Kokkos_Crs.hpp index ccc3944d86..8412ced921 100644 --- a/lib/kokkos/core/src/Kokkos_Crs.hpp +++ b/lib/kokkos/core/src/Kokkos_Crs.hpp @@ -187,7 +187,7 @@ class GetCrsTransposeCounts { using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this, policy_type(0, index_type(in.entries.size()))); closure.execute(); - execution_space::fence(); + execution_space().fence(); } }; @@ -266,7 +266,7 @@ class FillCrsTransposeEntries { using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this, policy_type(0, index_type(in.numRows()))); closure.execute(); - execution_space::fence(); + execution_space().fence(); } }; diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp index 726a574961..4eb8ab4d4b 100644 --- a/lib/kokkos/core/src/Kokkos_Cuda.hpp +++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp @@ -52,6 +52,7 @@ #include #include +#include #include #include @@ -67,6 +68,7 @@ namespace Kokkos { namespace Impl { class CudaExec ; +class CudaInternal ; } // namespace Impl } // namespace Kokkos @@ -74,6 +76,23 @@ class CudaExec ; namespace Kokkos { +namespace Impl { + namespace Experimental { + enum class CudaLaunchMechanism:unsigned{Default=0,ConstantMemory=1,GlobalMemory=2,LocalMemory=4}; + + constexpr inline CudaLaunchMechanism operator | (CudaLaunchMechanism p1, CudaLaunchMechanism p2) { + return static_cast(static_cast(p1) | static_cast(p2)); + } + constexpr inline CudaLaunchMechanism operator & (CudaLaunchMechanism p1, CudaLaunchMechanism p2) { + return static_cast(static_cast(p1) & static_cast(p2)); + } + + template + struct CudaDispatchProperties { + CudaLaunchMechanism launch_mechanism = l; + }; + } +} /// \class Cuda /// \brief Kokkos Execution Space that uses CUDA to run on GPUs. /// @@ -153,7 +172,13 @@ public: /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. + static void impl_static_fence(); + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence(); + #else + void fence() const; + #endif /** \brief Return the maximum amount of concurrency. */ static int concurrency(); @@ -165,15 +190,18 @@ public: //-------------------------------------------------- //! \name Cuda space instances + KOKKOS_INLINE_FUNCTION ~Cuda() {} + Cuda(); - explicit Cuda( const int instance_id ); Cuda( Cuda && ) = default ; Cuda( const Cuda & ) = default ; Cuda & operator = ( Cuda && ) = default ; Cuda & operator = ( const Cuda & ) = default ; + Cuda(cudaStream_t stream); + //-------------------------------------------------------------------------- //! \name Device-specific functions //@{ @@ -219,18 +247,18 @@ public: */ static std::vector detect_device_arch(); - cudaStream_t cuda_stream() const { return m_stream ; } - int cuda_device() const { return m_device ; } + cudaStream_t cuda_stream() const; + int cuda_device() const; //@} //-------------------------------------------------------------------------- static const char* name(); + inline Impl::CudaInternal* impl_internal_space_instance() const { return m_space_instance; } private: - int m_device ; - cudaStream_t m_stream ; + Impl::CudaInternal* m_space_instance; }; } // namespace Kokkos @@ -302,7 +330,8 @@ struct VerifyExecutionCanAccessMemorySpace /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ -#include +#include +#include #include #include #include diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp index d4693b43c1..5c85850fda 100644 --- a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -46,12 +46,14 @@ #include #include -#include #include #include #include #include #include +#if defined(KOKKOS_ENABLE_PROFILING) +#include +#endif // KOKKOS_ENABLE_PROFILING //---------------------------------------------------------------------------- @@ -91,8 +93,9 @@ template class RangePolicy : public Impl::PolicyTraits { -private: +public: typedef Impl::PolicyTraits traits; +private: typename traits::execution_space m_space ; typename traits::index_type m_begin ; @@ -100,6 +103,9 @@ private: typename traits::index_type m_granularity ; typename traits::index_type m_granularity_mask ; + template + friend class RangePolicy; + public: //! Tag this class as an execution policy typedef RangePolicy execution_policy; @@ -118,6 +124,15 @@ public: RangePolicy(const RangePolicy&) = default; RangePolicy(RangePolicy&&) = default; + template + RangePolicy(const RangePolicy p) { + m_space = p.m_space; + m_begin = p.m_begin; + m_end = p.m_end; + m_granularity = p.m_granularity; + m_granularity_mask = p.m_granularity_mask; + } + inline RangePolicy() : m_space(), m_begin(0), m_end(0) {} /** \brief Total range */ @@ -523,19 +538,22 @@ class TeamPolicy: public typename Impl::PolicyTraits::execution_space, Properties ...> internal_policy; - typedef Impl::PolicyTraits traits; + template + friend class TeamPolicy; public: + typedef Impl::PolicyTraits traits; + typedef TeamPolicy execution_policy; TeamPolicy& operator = (const TeamPolicy&) = default; /** \brief Construct policy with the given instance of the execution space */ - TeamPolicy( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 ) - : internal_policy(typename traits::execution_space(),league_size_request,team_size_request, vector_length_request) {first_arg = false;} + TeamPolicy( const typename traits::execution_space & space_ , int league_size_request , int team_size_request , int vector_length_request = 1 ) + : internal_policy(space_,league_size_request,team_size_request, vector_length_request) {first_arg = false;} - TeamPolicy( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 ) - : internal_policy(typename traits::execution_space(),league_size_request,Kokkos::AUTO(), vector_length_request) {first_arg = false;} + TeamPolicy( const typename traits::execution_space & space_, int league_size_request , const Kokkos::AUTO_t & , int vector_length_request = 1 ) + : internal_policy(space_,league_size_request,Kokkos::AUTO(), vector_length_request) {first_arg = false;} /** \brief Construct policy with the default instance of the execution space */ TeamPolicy( int league_size_request , int team_size_request , int vector_length_request = 1 ) @@ -618,6 +636,11 @@ public: } #endif + template + TeamPolicy(const TeamPolicy p):internal_policy(p) { + first_arg = p.first_arg; + } + private: bool first_arg; TeamPolicy(const internal_policy& p):internal_policy(p) {first_arg = false;} @@ -754,6 +777,59 @@ public: {} }; +template +struct TeamVectorRangeBoundariesStruct { +private: + + KOKKOS_INLINE_FUNCTION static + iType ibegin( const iType & arg_begin + , const iType & arg_end + , const iType & arg_rank + , const iType & arg_size + ) + { + return arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * arg_rank ; + } + + KOKKOS_INLINE_FUNCTION static + iType iend( const iType & arg_begin + , const iType & arg_end + , const iType & arg_rank + , const iType & arg_size + ) + { + const iType end_ = arg_begin + ( ( arg_end - arg_begin + arg_size - 1 ) / arg_size ) * ( arg_rank + 1 ); + return end_ < arg_end ? end_ : arg_end ; + } + +public: + + typedef iType index_type; + const iType start; + const iType end; + enum {increment = 1}; + const TeamMemberType& thread; + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct( const TeamMemberType& arg_thread + , const iType& arg_end + ) + : start( ibegin( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , end( iend( 0 , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , thread( arg_thread ) + {} + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct( const TeamMemberType& arg_thread + , const iType& arg_begin + , const iType& arg_end + ) + : start( ibegin( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , end( iend( arg_begin , arg_end , arg_thread.team_rank() , arg_thread.team_size() ) ) + , thread( arg_thread ) + {} +}; + template struct ThreadVectorRangeBoundariesStruct { typedef iType index_type; @@ -804,10 +880,10 @@ struct VectorSingleStruct { * This policy is used together with a parallel pattern as a nested layer within a kernel launched * with the TeamPolicy. This variant expects a single count. So the range is (0,count]. */ -template -KOKKOS_INLINE_FUNCTION +template +KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct -TeamThreadRange( const TeamMemberType&, const iType& count ); +TeamThreadRange( const TeamMemberType&, const iType& count ) = delete; /** \brief Execution policy for parallel work over a threads within a team. * @@ -815,10 +891,32 @@ TeamThreadRange( const TeamMemberType&, const iType& count ); * This policy is used together with a parallel pattern as a nested layer within a kernel launched * with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end]. */ -template -KOKKOS_INLINE_FUNCTION +template +KOKKOS_INLINE_FUNCTION_DELETED Impl::TeamThreadRangeBoundariesStruct::type, TeamMemberType> -TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end ); +TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end ) = delete; + +/** \brief Execution policy for parallel work over a threads within a team. + * + * The range is split over all threads in a team. The Mapping scheme depends on the architecture. + * This policy is used together with a parallel pattern as a nested layer within a kernel launched + * with the TeamPolicy. This variant expects a single count. So the range is (0,count]. + */ +template +KOKKOS_INLINE_FUNCTION_DELETED +Impl::TeamThreadRangeBoundariesStruct +TeamVectorRange( const TeamMemberType&, const iType& count ) = delete; + +/** \brief Execution policy for parallel work over a threads within a team. + * + * The range is split over all threads in a team. The Mapping scheme depends on the architecture. + * This policy is used together with a parallel pattern as a nested layer within a kernel launched + * with the TeamPolicy. This variant expects a begin and end. So the range is (begin,end]. + */ +template +KOKKOS_INLINE_FUNCTION_DELETED +Impl::TeamThreadRangeBoundariesStruct::type, TeamMemberType> +TeamVectorRange( const TeamMemberType&, const iType1& begin, const iType2& end ) = delete; /** \brief Execution policy for a vector parallel loop. * @@ -826,15 +924,15 @@ TeamThreadRange( const TeamMemberType&, const iType1& begin, const iType2& end ) * This policy is used together with a parallel pattern as a nested layer within a kernel launched * with the TeamPolicy. This variant expects a single count. So the range is (0,count]. */ -template -KOKKOS_INLINE_FUNCTION +template +KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange( const TeamMemberType&, const iType& count ); +ThreadVectorRange( const TeamMemberType&, const iType& count ) = delete; -template -KOKKOS_INLINE_FUNCTION +template +KOKKOS_INLINE_FUNCTION_DELETED Impl::ThreadVectorRangeBoundariesStruct -ThreadVectorRange( const TeamMemberType&, const iType& arg_begin, const iType& arg_end ); +ThreadVectorRange( const TeamMemberType&, const iType& arg_begin, const iType& arg_end ) = delete; #if defined(KOKKOS_ENABLE_PROFILING) namespace Impl { @@ -877,5 +975,44 @@ struct ParallelConstructName { } // namespace Kokkos +namespace Kokkos { +namespace Experimental { + +namespace Impl { + template + struct PolicyPropertyAdaptor; + + template + struct PolicyPropertyAdaptor,RangePolicy> { + typedef RangePolicy policy_in_t; + typedef RangePolicy> policy_out_t; + }; + + template + struct PolicyPropertyAdaptor,TeamPolicy> { + typedef TeamPolicy policy_in_t; + typedef TeamPolicy> policy_out_t; + }; +} + +template +constexpr typename Impl::PolicyPropertyAdaptor,PolicyType>::policy_out_t + require(const PolicyType p, WorkItemProperty::ImplWorkItemProperty

){ + return typename Impl::PolicyPropertyAdaptor,PolicyType>::policy_out_t(p); +} +} //Experimental +} //Kokkos #endif /* #define KOKKOS_EXECPOLICY_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_Extents.hpp b/lib/kokkos/core/src/Kokkos_Extents.hpp new file mode 100644 index 0000000000..c8b9110485 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Extents.hpp @@ -0,0 +1,186 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_EXTENTS_HPP +#define KOKKOS_KOKKOS_EXTENTS_HPP + +#include + +namespace Kokkos { +namespace Experimental { + +constexpr ptrdiff_t dynamic_extent = -1; + +template +struct Extents { + /* TODO @enhancement flesh this out more */ +}; + +template +struct PrependExtent; + +template +struct PrependExtent< + Extents, NewExtent +> { + using type = Extents; +}; + +template +struct AppendExtent; + +template +struct AppendExtent< + Extents, NewExtent +> { + using type = Extents; +}; + +} // end namespace Experimental + +namespace Impl { + +namespace _parse_view_extents_impl { + +template +struct _all_remaining_extents_dynamic : std::true_type { }; + +template +struct _all_remaining_extents_dynamic + : _all_remaining_extents_dynamic +{ }; + +template +struct _all_remaining_extents_dynamic + : std::false_type +{ }; + +template +struct _parse_impl { + using type = Result; +}; + +// We have to treat the case of int**[x] specially, since it *doesn't* go backwards +template +struct _parse_impl< + T*, Experimental::Extents, + typename std::enable_if<_all_remaining_extents_dynamic::value>::type +> + : _parse_impl< + T, Experimental::Extents + > +{ }; + +// int*(*[x])[y] should still work also (meaning int[][x][][y]) +template +struct _parse_impl< + T*, Experimental::Extents, + typename std::enable_if::value>::type +> +{ + using _next = Kokkos::Experimental::AppendExtent< + typename _parse_impl, void>::type, + Experimental::dynamic_extent + >; + using type = typename _next::type; +}; + +template +struct _parse_impl< + T[N], Experimental::Extents, void +> + : _parse_impl< + T, Experimental::Extents // TODO @pedantic this could be a narrowing cast + > +{ }; + +} // end namespace _parse_view_extents_impl + +template +struct ParseViewExtents { + using type = + typename _parse_view_extents_impl + ::_parse_impl>::type; +}; + +template +struct ApplyExtent +{ + using type = ValueType[Ext]; +}; + +template +struct ApplyExtent +{ + using type = ValueType*; +}; + +template +struct ApplyExtent +{ + using type = typename ApplyExtent::type[N]; +}; + +template +struct ApplyExtent +{ + using type = ValueType*[Ext]; +}; + +template +struct ApplyExtent +{ + using type = typename ApplyExtent::type*; +}; + +template +struct ApplyExtent +{ + using type = typename ApplyExtent::type[N]; +}; + +} // end namespace Impl + +} // end namespace Kokkos + +#endif //KOKKOS_KOKKOS_EXTENTS_HPP diff --git a/lib/kokkos/core/src/Kokkos_Future.hpp b/lib/kokkos/core/src/Kokkos_Future.hpp new file mode 100644 index 0000000000..665ce71cf5 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_Future.hpp @@ -0,0 +1,567 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_FUTURE_HPP +#define KOKKOS_FUTURE_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include +//---------------------------------------------------------------------------- + +#include +#include +#include +#include + +#include // is_space + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// For now, hack this in as a partial specialization +// TODO @tasking @cleanup Make this the "normal" class template and make the old code the specialization +template +class BasicFuture> +{ +public: + + using value_type = ValueType; + using execution_space = ExecutionSpace; + using scheduler_type = SimpleTaskScheduler; + using queue_type = typename scheduler_type::task_queue_type; + + +private: + + template + friend class SimpleTaskScheduler; + template + friend class BasicFuture; + + using task_base_type = typename scheduler_type::task_base_type; + using task_queue_type = typename scheduler_type::task_queue_type; + + using task_queue_traits = typename scheduler_type::task_queue_traits; + using task_scheduling_info_type = typename scheduler_type::task_scheduling_info_type; + + using result_storage_type = + Impl::TaskResultStorage< + ValueType, + Impl::SchedulingInfoStorage< + Impl::RunnableTaskBase, + task_scheduling_info_type + > + >; + + + + OwningRawPtr m_task = nullptr; + + KOKKOS_INLINE_FUNCTION + explicit + BasicFuture(task_base_type* task) + : m_task(task) + { + // Note: reference count starts at 2 to account for initial increment + // TODO @tasking @minor DSH verify reference count here and/or encapsulate starting reference count closer to here + } + +public: + + KOKKOS_INLINE_FUNCTION + BasicFuture() noexcept : m_task(nullptr) { } + + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture&& rhs) noexcept + : m_task(std::move(rhs.m_task)) + { + rhs.m_task = nullptr; + } + + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture const& rhs) + // : m_task(rhs.m_task) + : m_task(nullptr) + { + *static_cast(&m_task) = rhs.m_task; + if(m_task) m_task->increment_reference_count(); + } + + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture&& rhs) noexcept + { + if(m_task != rhs.m_task) { + clear(); + //m_task = std::move(rhs.m_task); + *static_cast(&m_task) = rhs.m_task; + // rhs.m_task reference count is unchanged, since this is a move + } + else { + // They're the same, but this is a move, so 1 fewer references now + rhs.clear(); + } + rhs.m_task = nullptr; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture const& rhs) + { + if(m_task != rhs.m_task) { + clear(); + //m_task = rhs.m_task; + *static_cast(&m_task) = rhs.m_task; + } + if(m_task != nullptr) { m_task->increment_reference_count(); } + return *this; + } + + //---------------------------------------- + + template + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture&& rhs) noexcept // NOLINT(google-explicit-constructor) + : m_task(std::move(rhs.m_task)) + { + static_assert( + std::is_same::value || + std::is_same::value, + "Moved Futures must have the same scheduler" + ); + + static_assert( + std::is_same::value || + std::is_same::value, + "Moved Futures must have the same value_type" + ); + + // reference counts are unchanged, since this is a move + rhs.m_task = nullptr; + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture const& rhs) // NOLINT(google-explicit-constructor) + //: m_task(rhs.m_task) + : m_task(nullptr) + { + static_assert( + std::is_same::value || + std::is_same::value, + "Copied Futures must have the same scheduler" + ); + + static_assert( + std::is_same::value || + std::is_same::value, + "Copied Futures must have the same value_type" + ); + + *static_cast(&m_task) = rhs.m_task; + if(m_task) m_task->increment_reference_count(); + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture& + operator=(BasicFuture const& rhs) + { + static_assert( + std::is_same::value || + std::is_same::value, + "Assigned Futures must have the same scheduler" + ); + + static_assert( + std::is_same::value || + std::is_same::value, + "Assigned Futures must have the same value_type" + ); + + if(m_task != rhs.m_task) { + clear(); + //m_task = rhs.m_task; + *static_cast(&m_task) = rhs.m_task; + if(m_task != nullptr) { m_task->increment_reference_count(); } + } + return *this; + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture&& rhs) + { + static_assert( + std::is_same::value || + std::is_same::value, + "Assigned Futures must have the same scheduler" + ); + + static_assert( + std::is_same::value || + std::is_same::value, + "Assigned Futures must have the same value_type" + ); + + if(m_task != rhs.m_task) { + clear(); + //m_task = std::move(rhs.m_task); + *static_cast(&m_task) = rhs.m_task; + // rhs.m_task reference count is unchanged, since this is a move + } + else { + // They're the same, but this is a move, so 1 fewer references now + rhs.clear(); + } + rhs.m_task = nullptr; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + ~BasicFuture() noexcept { clear(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + void clear() noexcept { + if(m_task) { + bool should_delete = m_task->decrement_and_check_reference_count(); + if(should_delete) { + static_cast(m_task->ready_queue_base_ptr()) + ->deallocate(std::move(*m_task)); + } + } + //m_task = nullptr; + *static_cast(&m_task) = nullptr; + } + + KOKKOS_INLINE_FUNCTION + bool is_null() const noexcept { + return m_task == nullptr; + } + + + KOKKOS_INLINE_FUNCTION + bool is_ready() const noexcept { + return (m_task == nullptr) || m_task->wait_queue_is_consumed(); + } + + KOKKOS_INLINE_FUNCTION + const typename Impl::TaskResult< ValueType >::reference_type + get() const + { + KOKKOS_EXPECTS(is_ready()); + return static_cast(m_task)->value_reference(); + //return Impl::TaskResult::get(m_task); + } + +}; + +//////////////////////////////////////////////////////////////////////////////// +// OLD CODE +//////////////////////////////////////////////////////////////////////////////// + +template +class BasicFuture { +private: + + template< typename , typename > friend class BasicTaskScheduler ; + template< typename , typename > friend class BasicFuture ; + friend class Impl::TaskBase ; + template< typename , typename , typename > friend class Impl::Task ; + + + //---------------------------------------- + +public: + + //---------------------------------------- + + using scheduler_type = Scheduler; + using queue_type = typename scheduler_type::queue_type; + using execution_space = typename scheduler_type::execution_space; + using value_type = ValueType; + + //---------------------------------------- + +private: + + //---------------------------------------- + + using task_base = Impl::TaskBase; + + task_base * m_task ; + + KOKKOS_INLINE_FUNCTION explicit + BasicFuture( task_base * task ) : m_task(0) + { if ( task ) queue_type::assign( & m_task , task ); } + + //---------------------------------------- + +public: + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + bool is_null() const { return 0 == m_task ; } + + KOKKOS_INLINE_FUNCTION + int reference_count() const + { return 0 != m_task ? m_task->reference_count() : 0 ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + void clear() + { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + ~BasicFuture() { clear(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + BasicFuture() noexcept : m_task(nullptr) { } + + KOKKOS_INLINE_FUNCTION + BasicFuture( BasicFuture && rhs ) noexcept + : m_task( rhs.m_task ) + { + rhs.m_task = 0; + } + + KOKKOS_INLINE_FUNCTION + BasicFuture( const BasicFuture & rhs ) + : m_task(0) + { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); } + + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture&& rhs) noexcept + { + clear(); + m_task = rhs.m_task ; + rhs.m_task = 0 ; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture const& rhs) + { + if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + return *this ; + } + + //---------------------------------------- + + template + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture&& rhs) noexcept // NOLINT(google-explicit-constructor) + : m_task( rhs.m_task ) + { + static_assert + ( std::is_same::value || + std::is_same::value + , "Assigned Futures must have the same scheduler" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same::value + , "Assigned Futures must have the same value_type" ); + + rhs.m_task = 0 ; + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture(BasicFuture const& rhs) // NOLINT(google-explicit-constructor) + : m_task(nullptr) + { + static_assert + ( std::is_same::value || + std::is_same::value + , "Assigned Futures must have the same scheduler" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same::value + , "Assigned Futures must have the same value_type" ); + + if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture& + operator=(BasicFuture const& rhs) + { + static_assert + ( std::is_same::value || + std::is_same::value + , "Assigned Futures must have the same scheduler" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same::value + , "Assigned Futures must have the same value_type" ); + + if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); + return *this ; + } + + template + KOKKOS_INLINE_FUNCTION + BasicFuture& operator=(BasicFuture&& rhs) + { + static_assert + ( std::is_same::value || + std::is_same::value + , "Assigned Futures must have the same scheduler" ); + + static_assert + ( std::is_same< value_type , void >::value || + std::is_same::value + , "Assigned Futures must have the same value_type" ); + + clear(); + m_task = rhs.m_task ; + rhs.m_task = 0 ; + return *this ; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int is_ready() const noexcept + { return ( 0 == m_task ) || ( ((task_base*) task_base::LockTag) == m_task->m_wait ); } + + KOKKOS_INLINE_FUNCTION + const typename Impl::TaskResult< ValueType >::reference_type + get() const + { + if ( 0 == m_task ) { + Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()"); + } + return Impl::TaskResult< ValueType >::get( m_task ); + } +}; + +// Is a Future with the given execution space +template< typename , typename ExecSpace = void > +struct is_future : public std::false_type {}; + +template +struct is_future, ExecSpace> + : std::integral_constant::value + || std::is_void::value + > +{}; + +//////////////////////////////////////////////////////////////////////////////// +// END OLD CODE +//////////////////////////////////////////////////////////////////////////////// + +namespace Impl { + +template +class ResolveFutureArgOrder { +private: + enum { Arg1_is_space = Kokkos::is_space::value }; + enum { Arg2_is_space = Kokkos::is_space::value }; + enum { Arg1_is_value = !Arg1_is_space && !std::is_same::value }; + enum { Arg2_is_value = !Arg2_is_space && !std::is_same::value }; + + static_assert( + ! ( Arg1_is_space && Arg2_is_space ), + "Future cannot be given two spaces" + ); + + static_assert( + ! ( Arg1_is_value && Arg2_is_value ), + "Future cannot be given two value types" + ); + + using value_type = + typename std::conditional::type + >::type; + + using execution_space = + typename std::conditional::type + >::type::execution_space; + +public: + + using type = BasicFuture>; + +}; + +} // end namespace Impl + +/** + * + * Future< space > // value_type == void + * Future< value > // space == Default + * Future< value , space > + * + */ +template +using Future = typename Impl::ResolveFutureArgOrder::type; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_FUTURE */ diff --git a/lib/kokkos/core/src/Kokkos_HPX.hpp b/lib/kokkos/core/src/Kokkos_HPX.hpp new file mode 100644 index 0000000000..79a2b74da4 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_HPX.hpp @@ -0,0 +1,1999 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HPX_HPP +#define KOKKOS_HPX_HPP + +#include +#if defined(KOKKOS_ENABLE_HPX) + +#include + +#include +#include +#include + +#ifdef KOKKOS_ENABLE_HBWSPACE +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +// There are currently two different implementations for the parallel dispatch +// functions: +// +// - 0: The HPX way. Unfortunately, this comes with unnecessary +// overheads at the moment, so there is +// - 1: The manual way. This way is more verbose and does not take advantage of +// e.g. parallel::for_loop in HPX but it is significantly faster in many +// benchmarks. +// +// In the long run 0 should be the preferred implementation, but until HPX is +// improved 1 will be the default. +#ifndef KOKKOS_HPX_IMPLEMENTATION +#define KOKKOS_HPX_IMPLEMENTATION 1 +#endif + +#if (KOKKOS_HPX_IMPLEMENTATION < 0) || (KOKKOS_HPX_IMPLEMENTATION > 1) +#error "You have chosen an invalid value for KOKKOS_HPX_IMPLEMENTATION" +#endif + +namespace Kokkos { +namespace Impl { +class thread_buffer { + static constexpr std::size_t m_cache_line_size = 64; + + std::size_t m_num_threads; + std::size_t m_size_per_thread; + std::size_t m_size_total; + char *m_data; + + void pad_to_cache_line(std::size_t &size) { + size = ((size + m_cache_line_size - 1) / m_cache_line_size) * + m_cache_line_size; + } + +public: + thread_buffer() + : m_num_threads(0), m_size_per_thread(0), m_size_total(0), + m_data(nullptr) {} + thread_buffer(const std::size_t num_threads, + const std::size_t size_per_thread) { + resize(num_threads, size_per_thread); + } + ~thread_buffer() { delete[] m_data; } + + thread_buffer(const thread_buffer &) = delete; + thread_buffer(thread_buffer &&) = delete; + thread_buffer &operator=(const thread_buffer &) = delete; + thread_buffer &operator=(thread_buffer) = delete; + + void resize(const std::size_t num_threads, + const std::size_t size_per_thread) { + m_num_threads = num_threads; + m_size_per_thread = size_per_thread; + + pad_to_cache_line(m_size_per_thread); + + std::size_t size_total_new = m_num_threads * m_size_per_thread; + + if (m_size_total < size_total_new) { + delete[] m_data; + m_data = new char[size_total_new]; + m_size_total = size_total_new; + } + } + + char *get(std::size_t thread_num) { + assert(thread_num < m_num_threads); + if (m_data == nullptr) { + return nullptr; + } + return &m_data[thread_num * m_size_per_thread]; + } + + std::size_t size_per_thread() const noexcept { return m_size_per_thread; } + std::size_t size_total() const noexcept { return m_size_total; } +}; +} // namespace Impl + +namespace Experimental { +class HPX { +private: + static bool m_hpx_initialized; + static Kokkos::Impl::thread_buffer m_buffer; +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + static hpx::future m_future; +#endif + +public: + using execution_space = HPX; + using memory_space = HostSpace; + using device_type = Kokkos::Device; + using array_layout = LayoutRight; + using size_type = memory_space::size_type; + using scratch_memory_space = ScratchMemorySpace; + + HPX() noexcept {} + static void print_configuration(std::ostream &, + const bool /* verbose */ = false) { + std::cout << "HPX backend" << std::endl; + } + + static bool in_parallel(HPX const & = HPX()) noexcept { return false; } + static void impl_static_fence(HPX const & = HPX()) + #if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + { + if (hpx::threads::get_self_ptr() == nullptr) { + hpx::threads::run_as_hpx_thread([]() { impl_get_future().wait(); }); + } else { + impl_get_future().wait(); + } + } + #else + noexcept { + } + #endif + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE + static void fence(HPX const & = HPX()) { + #else + void fence() const { + #endif + impl_static_fence(); + } + + static bool is_asynchronous(HPX const & = HPX()) noexcept { +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + return true; +#else + return false; +#endif + } + + static std::vector partition(...) { + Kokkos::abort("Kokkos::Experimental::HPX::partition_master: can't partition an HPX " + "instance\n"); + return std::vector(); + } + + template + static void partition_master(F const &f, int requested_num_partitions = 0, + int requested_partition_size = 0) { + if (requested_num_partitions > 1) { + Kokkos::abort("Kokkos::Experimental::HPX::partition_master: can't partition an " + "HPX instance\n"); + } + } + + static int concurrency(); + static void impl_initialize(int thread_count); + static void impl_initialize(); + static bool impl_is_initialized() noexcept; + static void impl_finalize(); + + static int impl_thread_pool_size() noexcept { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + return 0; + } else { + if (hpx::threads::get_self_ptr() == nullptr) { + return hpx::resource::get_thread_pool(0).get_os_thread_count(); + } else { + return hpx::this_thread::get_pool()->get_os_thread_count(); + } + } + } + + static int impl_thread_pool_rank() noexcept { + hpx::runtime *rt = hpx::get_runtime_ptr(); + if (rt == nullptr) { + return 0; + } else { + if (hpx::threads::get_self_ptr() == nullptr) { + return 0; + } else { + return hpx::this_thread::get_pool()->get_pool_index(); + } + } + } + + static int impl_thread_pool_size(int depth) { + if (depth == 0) { + return impl_thread_pool_size(); + } else { + return 1; + } + } + + static int impl_max_hardware_threads() noexcept { + return hpx::threads::hardware_concurrency(); + } + + static int impl_hardware_thread_id() noexcept { + return hpx::get_worker_thread_num(); + } + + static Kokkos::Impl::thread_buffer &impl_get_buffer() noexcept { + return m_buffer; + } +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + static hpx::future &impl_get_future() noexcept { return m_future; } +#endif + + static constexpr const char *name() noexcept { return "HPX"; } +}; +} // namespace Experimental + +namespace Impl { +template +inline void dispatch_execute_task(Closure *closure) { +#if defined(KOKKOS_ENABLE_HPX_ASYNC_DISPATCH) + if (hpx::threads::get_self_ptr() == nullptr) { + hpx::threads::run_as_hpx_thread([closure]() { + hpx::future &fut = Kokkos::Experimental::HPX::impl_get_future(); + Closure closure_copy = *closure; + fut = fut.then([closure_copy](hpx::future &&) { + closure_copy.execute_task(); + }); + }); + } else { + hpx::future &fut = Kokkos::Experimental::HPX::impl_get_future(); + Closure closure_copy = *closure; + fut = fut.then( + [closure_copy](hpx::future &&) { closure_copy.execute_task(); }); + } +#else + if (hpx::threads::get_self_ptr() == nullptr) { + hpx::threads::run_as_hpx_thread([closure]() { closure->execute_task(); }); + } else { + closure->execute_task(); + } +#endif +} +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { +template <> +struct MemorySpaceAccess { + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + +template <> +struct VerifyExecutionCanAccessMemorySpace< + Kokkos::Experimental::HPX::memory_space, + Kokkos::Experimental::HPX::scratch_memory_space> { + enum { value = true }; + inline static void verify(void) {} + inline static void verify(const void *) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { +template <> class UniqueToken { +public: + using execution_space = HPX; + using size_type = int; + UniqueToken(execution_space const & = execution_space()) noexcept {} + + // NOTE: Currently this assumes that there is no oversubscription. + // hpx::get_num_worker_threads can't be used directly because it may yield + // it's task (problematic if called after hpx::get_worker_thread_num). + int size() const noexcept { return HPX::impl_max_hardware_threads(); } + int acquire() const noexcept { return HPX::impl_hardware_thread_id(); } + void release(int) const noexcept {} +}; + +template <> class UniqueToken { +public: + using execution_space = HPX; + using size_type = int; + UniqueToken(execution_space const & = execution_space()) noexcept {} + + // NOTE: Currently this assumes that there is no oversubscription. + // hpx::get_num_worker_threads can't be used directly because it may yield + // it's task (problematic if called after hpx::get_worker_thread_num). + int size() const noexcept { return HPX::impl_max_hardware_threads(); } + int acquire() const noexcept { return HPX::impl_hardware_thread_id(); } + void release(int) const noexcept {} +}; +} // namespace Experimental +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + +struct HPXTeamMember { +public: + using execution_space = Kokkos::Experimental::HPX; + using scratch_memory_space = + Kokkos::ScratchMemorySpace; + +private: + scratch_memory_space m_team_shared; + std::size_t m_team_shared_size; + + int m_league_size; + int m_league_rank; + int m_team_size; + int m_team_rank; + +public: + KOKKOS_INLINE_FUNCTION + const scratch_memory_space &team_shmem() const { + return m_team_shared.set_team_thread_mode(0, 1, 0); + } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space &team_scratch(const int) const { + return m_team_shared.set_team_thread_mode(0, 1, 0); + } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space &thread_scratch(const int) const { + return m_team_shared.set_team_thread_mode(0, team_size(), team_rank()); + } + + KOKKOS_INLINE_FUNCTION int league_rank() const noexcept { + return m_league_rank; + } + + KOKKOS_INLINE_FUNCTION int league_size() const noexcept { + return m_league_size; + } + + KOKKOS_INLINE_FUNCTION int team_rank() const noexcept { return m_team_rank; } + KOKKOS_INLINE_FUNCTION int team_size() const noexcept { return m_team_size; } + + template + constexpr KOKKOS_INLINE_FUNCTION + HPXTeamMember(const TeamPolicyInternal &policy, + const int team_rank, const int league_rank, void *scratch, + int scratch_size) noexcept + : m_team_shared(scratch, scratch_size, scratch, scratch_size), + m_team_shared_size(scratch_size), m_league_size(policy.league_size()), + m_league_rank(league_rank), m_team_size(policy.team_size()), + m_team_rank(team_rank) {} + + KOKKOS_INLINE_FUNCTION + void team_barrier() const {} + + template + KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType &, const int &) const { + static_assert(std::is_trivially_default_constructible(), + "Only trivial constructible types can be broadcasted"); + } + + template + KOKKOS_INLINE_FUNCTION void team_broadcast(const Closure &, ValueType &, + const int &) const { + static_assert(std::is_trivially_default_constructible(), + "Only trivial constructible types can be broadcasted"); + } + + template + KOKKOS_INLINE_FUNCTION ValueType team_reduce(const ValueType &value, + const JoinOp &) const { + return value; + } + + template + KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + team_reduce(const ReducerType &reducer) const {} + + template + KOKKOS_INLINE_FUNCTION Type + team_scan(const Type &value, Type *const global_accum = nullptr) const { + if (global_accum) { + Kokkos::atomic_fetch_add(global_accum, value); + } + + return 0; + } +}; + +template +class TeamPolicyInternal + : public PolicyTraits { + using traits = PolicyTraits; + + int m_league_size; + int m_team_size; + std::size_t m_team_scratch_size[2]; + std::size_t m_thread_scratch_size[2]; + int m_chunk_size; + +public: + using member_type = HPXTeamMember; + + // NOTE: Max size is 1 for simplicity. In most cases more than 1 is not + // necessary on CPU. Implement later if there is a need. + template + inline static int team_size_max(const FunctorType &) { + return 1; + } + + template + inline static int team_size_recommended(const FunctorType &) { + return 1; + } + + template + inline static int team_size_recommended(const FunctorType &, const int &) { + return 1; + } + + template + int team_size_max(const FunctorType &, const ParallelForTag &) const { + return 1; + } + + template + int team_size_max(const FunctorType &, const ParallelReduceTag &) const { + return 1; + } + template + int team_size_recommended(const FunctorType &, const ParallelForTag &) const { + return 1; + } + template + int team_size_recommended(const FunctorType &, + const ParallelReduceTag &) const { + return 1; + } + +private: + inline void init(const int league_size_request, const int team_size_request) { + m_league_size = league_size_request; + const int max_team_size = 1; // TODO: Can't use team_size_max(...) because + // it requires a functor as argument. + m_team_size = + team_size_request > max_team_size ? max_team_size : team_size_request; + + if (m_chunk_size > 0) { + if (!Impl::is_integral_power_of_two(m_chunk_size)) + Kokkos::abort("TeamPolicy blocking granularity must be power of two"); + } else { + int new_chunk_size = 1; + while (new_chunk_size * 4 * Kokkos::Experimental::HPX::concurrency() < + m_league_size) { + new_chunk_size *= 2; + } + + if (new_chunk_size < 128) { + new_chunk_size = 1; + while ((new_chunk_size * Kokkos::Experimental::HPX::concurrency() < + m_league_size) && + (new_chunk_size < 128)) + new_chunk_size *= 2; + } + + m_chunk_size = new_chunk_size; + } + } + +public: + inline int team_size() const { return m_team_size; } + inline int league_size() const { return m_league_size; } + + inline size_t scratch_size(const int &level, int team_size_ = -1) const { + if (team_size_ < 0) { + team_size_ = m_team_size; + } + return m_team_scratch_size[level] + + team_size_ * m_thread_scratch_size[level]; + } + +public: + template + friend class TeamPolicyInternal; + + template + TeamPolicyInternal( + const TeamPolicyInternal &p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + + TeamPolicyInternal(const typename traits::execution_space &, + int league_size_request, int team_size_request, + int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, + m_chunk_size(0) { + init(league_size_request, team_size_request); + } + + TeamPolicyInternal(const typename traits::execution_space &, + int league_size_request, + const Kokkos::AUTO_t &team_size_request, + int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, + m_chunk_size(0) { + init(league_size_request, 1); + } + + TeamPolicyInternal(int league_size_request, int team_size_request, + int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, + m_chunk_size(0) { + init(league_size_request, team_size_request); + } + + TeamPolicyInternal(int league_size_request, + const Kokkos::AUTO_t &team_size_request, + int /* vector_length_request */ = 1) + : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, + m_chunk_size(0) { + init(league_size_request, 1); + } + + inline int chunk_size() const { return m_chunk_size; } + + inline TeamPolicyInternal & + set_chunk_size(typename traits::index_type chunk_size_) { + m_chunk_size = chunk_size_; + return *this; + } + + inline TeamPolicyInternal &set_scratch_size(const int &level, + const PerTeamValue &per_team) { + m_team_scratch_size[level] = per_team.value; + return *this; + } + + inline TeamPolicyInternal & + set_scratch_size(const int &level, const PerThreadValue &per_thread) { + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + inline TeamPolicyInternal & + set_scratch_size(const int &level, const PerTeamValue &per_team, + const PerThreadValue &per_thread) { + m_team_scratch_size[level] = per_team.value; + m_thread_scratch_size[level] = per_thread.value; + return *this; + } +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + +template +class ParallelFor, + Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::RangePolicy; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + + const FunctorType m_functor; + const Policy m_policy; + + template + static typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Member i) { + functor(i); + } + + template + static typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Member i) { + const TagType t{}; + functor(t, i); + } + + template + static typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end) { + for (Member i = i_begin; i < i_end; ++i) { + functor(i); + } + } + + template + static typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end) { + const TagType t{}; + for (Member i = i_begin; i < i_end; ++i) { + functor(t, i); + } + } + +public: + void execute() const { Kokkos::Impl::dispatch_execute_task(this); } + + void execute_task() const { +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), + m_policy.begin(), m_policy.end(), [this](const Member i) { + execute_functor(m_functor, i); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (Member i_begin = m_policy.begin(); i_begin < m_policy.end(); + i_begin += m_policy.chunk_size()) { + apply([this, &sem, i_begin]() { + const Member i_end = + (std::min)(i_begin + m_policy.chunk_size(), m_policy.end()); + execute_functor_range(m_functor, i_begin, i_end); + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + } + + inline ParallelFor(const FunctorType &arg_functor, Policy arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +template +class ParallelFor, + Kokkos::Experimental::HPX> { +private: + using MDRangePolicy = Kokkos::MDRangePolicy; + using Policy = typename MDRangePolicy::impl_range_policy; + using WorkTag = typename MDRangePolicy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using iterate_type = + typename Kokkos::Impl::HostIterateTile; + + const FunctorType m_functor; + const MDRangePolicy m_mdr_policy; + const Policy m_policy; + +public: + void execute() const { dispatch_execute_task(this); } + + inline void execute_task() const { +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), + m_policy.begin(), m_policy.end(), [this](const Member i) { + iterate_type(m_mdr_policy, m_functor)(i); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (Member i_begin = m_policy.begin(); i_begin < m_policy.end(); + i_begin += m_policy.chunk_size()) { + apply([this, &sem, i_begin]() { + const Member i_end = + (std::min)(i_begin + m_policy.chunk_size(), m_policy.end()); + for (Member i = i_begin; i < i_end; ++i) { + iterate_type(m_mdr_policy, m_functor)(i); + } + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + } + + inline ParallelFor(const FunctorType &arg_functor, MDRangePolicy arg_policy) + : m_functor(arg_functor), m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { +template +class ParallelReduce, ReducerType, + Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::RangePolicy; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using Analysis = + FunctorAnalysis; + using ReducerConditional = + Kokkos::Impl::if_c::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using value_type = typename Analysis::value_type; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + bool m_force_synchronous; + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Member i, + reference_type update) { + functor(i, update); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Member i, + reference_type update) { + const TagType t{}; + functor(t, i, update); + } + + template + inline typename std::enable_if::value>::type + execute_functor_range(reference_type update, const Member i_begin, + const Member i_end) const { + for (Member i = i_begin; i < i_end; ++i) { + m_functor(i, update); + } + } + + template + inline typename std::enable_if::value>::type + execute_functor_range(reference_type update, const Member i_begin, + const Member i_end) const { + const TagType t{}; + + for (Member i = i_begin; i < i_end; ++i) { + m_functor(t, i, update); + } + } + + class value_type_wrapper { + private: + std::size_t m_value_size; + char *m_value_buffer; + + public: + value_type_wrapper() : m_value_size(0), m_value_buffer(nullptr) {} + + value_type_wrapper(const std::size_t value_size) + : m_value_size(value_size), m_value_buffer(new char[m_value_size]) {} + + value_type_wrapper(const value_type_wrapper &other) + : m_value_size(0), m_value_buffer(nullptr) { + if (this != &other) { + m_value_buffer = new char[other.m_value_size]; + m_value_size = other.m_value_size; + + std::copy(other.m_value_buffer, other.m_value_buffer + m_value_size, + m_value_buffer); + } + } + + ~value_type_wrapper() { delete[] m_value_buffer; } + + value_type_wrapper(value_type_wrapper &&other) + : m_value_size(0), m_value_buffer(nullptr) { + if (this != &other) { + m_value_buffer = other.m_value_buffer; + m_value_size = other.m_value_size; + + other.m_value_buffer = nullptr; + other.m_value_size = 0; + } + } + + value_type_wrapper &operator=(const value_type_wrapper &other) { + if (this != &other) { + delete[] m_value_buffer; + m_value_buffer = new char[other.m_value_size]; + m_value_size = other.m_value_size; + + std::copy(other.m_value_buffer, other.m_value_buffer + m_value_size, + m_value_buffer); + } + + return *this; + } + + value_type_wrapper &operator=(value_type_wrapper &&other) { + if (this != &other) { + delete[] m_value_buffer; + m_value_buffer = other.m_value_buffer; + m_value_size = other.m_value_size; + + other.m_value_buffer = nullptr; + other.m_value_size = 0; + } + + return *this; + } + + pointer_type pointer() const { + return reinterpret_cast(m_value_buffer); + } + + reference_type reference() const { + return ValueOps::reference( + reinterpret_cast(m_value_buffer)); + } + }; + +public: + void execute() const { + dispatch_execute_task(this); + } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + + std::size_t value_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + +#if KOKKOS_HPX_IMPLEMENTATION == 0 + // NOTE: This version makes the most use of HPX functionality, but + // requires the struct value_type_wrapper to handle different + // reference_types. It is also significantly slower than the version + // below due to not reusing the buffer used by other functions. + using hpx::parallel::reduction; + using hpx::parallel::execution::static_chunk_size; + + value_type_wrapper final_value(value_size); + value_type_wrapper identity(value_size); + + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + final_value.pointer()); + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + identity.pointer()); + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), + m_policy.begin(), m_policy.end(), + reduction(final_value, identity, + [this](value_type_wrapper &a, + value_type_wrapper &b) -> value_type_wrapper & { + ValueJoin::join( + ReducerConditional::select(m_functor, m_reducer), + a.pointer(), b.pointer()); + return a; + }), + [this](Member i, value_type_wrapper &update) { + execute_functor(m_functor, i, update.reference()); + }); + + pointer_type final_value_ptr = final_value.pointer(); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, value_size); + + for_loop(par, 0, num_worker_threads, [this, &buffer](std::size_t t) { + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(t))); + }); + + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (Member i_begin = m_policy.begin(); i_begin < m_policy.end(); + i_begin += m_policy.chunk_size()) { + apply([this, &buffer, &sem, i_begin]() { + reference_type update = + ValueOps::reference(reinterpret_cast( + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()))); + const Member i_end = + (std::min)(i_begin + m_policy.chunk_size(), m_policy.end()); + execute_functor_range(update, i_begin, i_end); + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); + + for (int i = 1; i < num_worker_threads; ++i) { + ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(0)), + reinterpret_cast(buffer.get(i))); + } + + pointer_type final_value_ptr = + reinterpret_cast(buffer.get(0)); +#endif + + Kokkos::Impl::FunctorFinal::final( + ReducerConditional::select(m_functor, m_reducer), final_value_ptr); + + if (m_result_ptr != nullptr) { + const int n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + + for (int j = 0; j < n; ++j) { + m_result_ptr[j] = final_value_ptr[j]; + } + } + } + + template + inline ParallelReduce( + const FunctorType &arg_functor, Policy arg_policy, + const ViewType &arg_view, + typename std::enable_if::value && + !Kokkos::is_reducer_type::value, + void *>::type = NULL) + : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), + m_result_ptr(arg_view.data()), + m_force_synchronous(!arg_view.impl_track().has_record()) {} + + inline ParallelReduce(const FunctorType &arg_functor, Policy arg_policy, + const ReducerType &reducer) + : m_functor(arg_functor), m_policy(arg_policy), m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_force_synchronous(!reducer.view().impl_track().has_record()) {} +}; + +template +class ParallelReduce, ReducerType, + Kokkos::Experimental::HPX> { +private: + using MDRangePolicy = Kokkos::MDRangePolicy; + using Policy = typename MDRangePolicy::impl_range_policy; + using WorkTag = typename MDRangePolicy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using Analysis = FunctorAnalysis; + using ReducerConditional = + Kokkos::Impl::if_c::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using pointer_type = typename Analysis::pointer_type; + using value_type = typename Analysis::value_type; + using reference_type = typename Analysis::reference_type; + using iterate_type = + typename Kokkos::Impl::HostIterateTile; + + const FunctorType m_functor; + const MDRangePolicy m_mdr_policy; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + bool m_force_synchronous; + +public: + void execute() const { + dispatch_execute_task(this); + } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + const std::size_t value_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, value_size); + + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + + for_loop(par, 0, num_worker_threads, [this, &buffer](std::size_t t) { + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(t))); + }); + +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::execution::static_chunk_size; + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), + m_policy.begin(), m_policy.end(), [this, &buffer](const Member i) { + reference_type update = ValueOps::reference( + reinterpret_cast(buffer.get( + Kokkos::Experimental::HPX::impl_hardware_thread_id()))); + iterate_type(m_mdr_policy, m_functor, update)(i); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (Member i_begin = m_policy.begin(); i_begin < m_policy.end(); + i_begin += m_policy.chunk_size()) { + apply([this, &buffer, &sem, i_begin]() { + reference_type update = + ValueOps::reference(reinterpret_cast( + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()))); + const Member i_end = + (std::min)(i_begin + m_policy.chunk_size(), m_policy.end()); + + for (Member i = i_begin; i < i_end; ++i) { + iterate_type(m_mdr_policy, m_functor, update)(i); + } + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + + for (int i = 1; i < num_worker_threads; ++i) { + ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(0)), + reinterpret_cast(buffer.get(i))); + } + + Kokkos::Impl::FunctorFinal::final( + ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(0))); + + if (m_result_ptr != nullptr) { + const int n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + + for (int j = 0; j < n; ++j) { + m_result_ptr[j] = reinterpret_cast(buffer.get(0))[j]; + } + } + } + + template + inline ParallelReduce( + const FunctorType &arg_functor, MDRangePolicy arg_policy, + const ViewType &arg_view, + typename std::enable_if::value && + !Kokkos::is_reducer_type::value, + void *>::type = NULL) + : m_functor(arg_functor), m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_reducer(InvalidType()), m_result_ptr(arg_view.data()), + m_force_synchronous(!arg_view.impl_track().has_record()) {} + + inline ParallelReduce(const FunctorType &arg_functor, + MDRangePolicy arg_policy, const ReducerType &reducer) + : m_functor(arg_functor), m_mdr_policy(arg_policy), + m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), + m_reducer(reducer), m_result_ptr(reducer.view().data()), + m_force_synchronous(!reducer.view().impl_track().has_record()) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + +template +class ParallelScan, + Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::RangePolicy; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using Analysis = + FunctorAnalysis; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; + + const FunctorType m_functor; + const Policy m_policy; + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { + for (Member i = i_begin; i < i_end; ++i) { + functor(i, update, final); + } + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { + const TagType t{}; + for (Member i = i_begin; i < i_end; ++i) { + functor(t, i, update, final); + } + } + +public: + void execute() const { dispatch_execute_task(this); } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + const int value_count = Analysis::value_count(m_functor); + const std::size_t value_size = Analysis::value_size(m_functor); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, 2 * value_size); + + using hpx::lcos::local::barrier; + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + barrier bar(num_worker_threads); + + for_loop(par.with(static_chunk_size(1)), 0, num_worker_threads, + [this, &buffer, &bar, num_worker_threads, value_count, + value_size](std::size_t const t) { + reference_type update_sum = ValueInit::init( + m_functor, reinterpret_cast(buffer.get(t))); + + const WorkRange range(m_policy, t, num_worker_threads); + execute_functor_range(m_functor, range.begin(), + range.end(), update_sum, false); + + bar.wait(); + + if (t == 0) { + ValueInit::init(m_functor, reinterpret_cast( + buffer.get(0) + value_size)); + + for (int i = 1; i < num_worker_threads; ++i) { + pointer_type ptr_1_prev = + reinterpret_cast(buffer.get(i - 1)); + pointer_type ptr_2_prev = reinterpret_cast( + buffer.get(i - 1) + value_size); + pointer_type ptr_2 = reinterpret_cast( + buffer.get(i) + value_size); + + for (int j = 0; j < value_count; ++j) { + ptr_2[j] = ptr_2_prev[j]; + } + + ValueJoin::join(m_functor, ptr_2, ptr_1_prev); + } + } + + bar.wait(); + + reference_type update_base = ValueOps::reference( + reinterpret_cast(buffer.get(t) + value_size)); + + execute_functor_range(m_functor, range.begin(), + range.end(), update_base, true); + }); + } + + inline ParallelScan(const FunctorType &arg_functor, const Policy &arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +template +class ParallelScanWithTotal, + ReturnType, Kokkos::Experimental::HPX> { +private: + using Policy = Kokkos::RangePolicy; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using Member = typename Policy::member_type; + using Analysis = + FunctorAnalysis; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; + + const FunctorType m_functor; + const Policy m_policy; + ReturnType &m_returnvalue; + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { + for (Member i = i_begin; i < i_end; ++i) { + functor(i, update, final); + } + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Member i_begin, + const Member i_end, reference_type update, + const bool final) { + const TagType t{}; + for (Member i = i_begin; i < i_end; ++i) { + functor(t, i, update, final); + } + } + +public: + void execute() const { dispatch_execute_task(this); } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + const int value_count = Analysis::value_count(m_functor); + const std::size_t value_size = Analysis::value_size(m_functor); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, 2 * value_size); + + using hpx::lcos::local::barrier; + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + barrier bar(num_worker_threads); + + for_loop(par.with(static_chunk_size(1)), 0, num_worker_threads, + [this, &buffer, &bar, num_worker_threads, value_count, + value_size](std::size_t const t) { + reference_type update_sum = ValueInit::init( + m_functor, reinterpret_cast(buffer.get(t))); + + const WorkRange range(m_policy, t, num_worker_threads); + execute_functor_range(m_functor, range.begin(), + range.end(), update_sum, false); + + bar.wait(); + + if (t == 0) { + ValueInit::init(m_functor, reinterpret_cast( + buffer.get(0) + value_size)); + + for (int i = 1; i < num_worker_threads; ++i) { + pointer_type ptr_1_prev = + reinterpret_cast(buffer.get(i - 1)); + pointer_type ptr_2_prev = reinterpret_cast( + buffer.get(i - 1) + value_size); + pointer_type ptr_2 = reinterpret_cast( + buffer.get(i) + value_size); + + for (int j = 0; j < value_count; ++j) { + ptr_2[j] = ptr_2_prev[j]; + } + + ValueJoin::join(m_functor, ptr_2, ptr_1_prev); + } + } + + bar.wait(); + + reference_type update_base = ValueOps::reference( + reinterpret_cast(buffer.get(t) + value_size)); + + execute_functor_range(m_functor, range.begin(), + range.end(), update_base, true); + + if (t == std::size_t(num_worker_threads - 1)) { + m_returnvalue = update_base; + } + }); + } + + inline ParallelScanWithTotal(const FunctorType &arg_functor, + const Policy &arg_policy, + ReturnType &arg_returnvalue) + : m_functor(arg_functor), m_policy(arg_policy), + m_returnvalue(arg_returnvalue) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { +template +class ParallelFor, + Kokkos::Experimental::HPX> { +private: + using Policy = TeamPolicyInternal; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using memory_space = Kokkos::HostSpace; + + const FunctorType m_functor; + const Policy m_policy; + const int m_league; + const std::size_t m_shared; + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Policy &policy, + const int league_rank, char *local_buffer, + const std::size_t local_buffer_size) { + functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size)); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Policy &policy, + const int league_rank, char *local_buffer, + const std::size_t local_buffer_size) { + const TagType t{}; + functor(t, Member(policy, 0, league_rank, local_buffer, local_buffer_size)); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, + const int league_rank_end, char *local_buffer, + const std::size_t local_buffer_size) { + for (int league_rank = league_rank_begin; league_rank < league_rank_end; + ++league_rank) { + functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size)); + } + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, + const int league_rank_end, char *local_buffer, + const std::size_t local_buffer_size) { + const TagType t{}; + for (int league_rank = league_rank_begin; league_rank < league_rank_end; + ++league_rank) { + functor(t, + Member(policy, 0, league_rank, local_buffer, local_buffer_size)); + } + } + +public: + void execute() const { dispatch_execute_task(this); } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, m_shared); + +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + using hpx::parallel::execution::static_chunk_size; + + for_loop(par.with(static_chunk_size(m_policy.chunk_size())), 0, + m_policy.league_size(), [this, &buffer](const int league_rank) { + execute_functor( + m_functor, m_policy, league_rank, + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()), + m_shared); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (int league_rank_begin = 0; league_rank_begin < m_policy.league_size(); + league_rank_begin += m_policy.chunk_size()) { + apply([this, &buffer, &sem, league_rank_begin]() { + const int league_rank_end = (std::min)( + league_rank_begin + m_policy.chunk_size(), m_policy.league_size()); + execute_functor_range( + m_functor, m_policy, league_rank_begin, league_rank_end, + buffer.get(Kokkos::Experimental::HPX::impl_hardware_thread_id()), m_shared); + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + } + + ParallelFor(const FunctorType &arg_functor, const Policy &arg_policy) + : m_functor(arg_functor), m_policy(arg_policy), + m_league(arg_policy.league_size()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize::value( + arg_functor, arg_policy.team_size())) {} +}; + +template +class ParallelReduce, + ReducerType, Kokkos::Experimental::HPX> { +private: + using Policy = TeamPolicyInternal; + using Analysis = + FunctorAnalysis; + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using ReducerConditional = + Kokkos::Impl::if_c::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + using ValueOps = Kokkos::Impl::FunctorValueOps; + using pointer_type = typename Analysis::pointer_type; + using reference_type = typename Analysis::reference_type; + using value_type = typename Analysis::value_type; + + const FunctorType m_functor; + const int m_league; + const Policy m_policy; + const ReducerType m_reducer; + pointer_type m_result_ptr; + const std::size_t m_shared; + + bool m_force_synchronous; + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Policy &policy, + const int league_rank, char *local_buffer, + const std::size_t local_buffer_size, + reference_type update) { + functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size), + update); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor(const FunctorType &functor, const Policy &policy, + const int league_rank, char *local_buffer, + const std::size_t local_buffer_size, + reference_type update) { + const TagType t{}; + functor(t, Member(policy, 0, league_rank, local_buffer, local_buffer_size), + update); + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, + const int league_rank_end, char *local_buffer, + const std::size_t local_buffer_size, + reference_type update) { + for (int league_rank = league_rank_begin; league_rank < league_rank_end; + ++league_rank) { + functor(Member(policy, 0, league_rank, local_buffer, local_buffer_size), + update); + } + } + + template + inline static + typename std::enable_if::value>::type + execute_functor_range(const FunctorType &functor, const Policy &policy, + const int league_rank_begin, + const int league_rank_end, char *local_buffer, + const std::size_t local_buffer_size, + reference_type update) { + const TagType t{}; + for (int league_rank = league_rank_begin; league_rank < league_rank_end; + ++league_rank) { + functor(t, + Member(policy, 0, league_rank, local_buffer, local_buffer_size), + update); + } + } + +public: + void execute() const { + dispatch_execute_task(this); + } + + inline void execute_task() const { + const int num_worker_threads = Kokkos::Experimental::HPX::concurrency(); + const std::size_t value_size = + Analysis::value_size(ReducerConditional::select(m_functor, m_reducer)); + + thread_buffer &buffer = Kokkos::Experimental::HPX::impl_get_buffer(); + buffer.resize(num_worker_threads, value_size + m_shared); + + using hpx::parallel::for_loop; + using hpx::parallel::execution::par; + + for_loop(par, 0, num_worker_threads, [this, &buffer](std::size_t t) { + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + reinterpret_cast(buffer.get(t))); + }); + +#if KOKKOS_HPX_IMPLEMENTATION == 0 + using hpx::parallel::execution::static_chunk_size; + + hpx::parallel::for_loop( + par.with(static_chunk_size(m_policy.chunk_size())), 0, + m_policy.league_size(), + [this, &buffer, value_size](const int league_rank) { + std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); + reference_type update = ValueOps::reference( + reinterpret_cast(buffer.get(t))); + + execute_functor(m_functor, m_policy, league_rank, + buffer.get(t) + value_size, m_shared, + update); + }); + +#elif KOKKOS_HPX_IMPLEMENTATION == 1 + using hpx::apply; + using hpx::lcos::local::counting_semaphore; + + counting_semaphore sem(0); + std::size_t num_tasks = 0; + + for (int league_rank_begin = 0; league_rank_begin < m_policy.league_size(); + league_rank_begin += m_policy.chunk_size()) { + apply([this, &buffer, &sem, league_rank_begin, value_size]() { + std::size_t t = Kokkos::Experimental::HPX::impl_hardware_thread_id(); + reference_type update = + ValueOps::reference(reinterpret_cast(buffer.get(t))); + const int league_rank_end = (std::min)( + league_rank_begin + m_policy.chunk_size(), m_policy.league_size()); + execute_functor_range( + m_functor, m_policy, league_rank_begin, league_rank_end, + buffer.get(t) + value_size, m_shared, update); + + sem.signal(1); + }); + + ++num_tasks; + } + + sem.wait(num_tasks); +#endif + + const pointer_type ptr = reinterpret_cast(buffer.get(0)); + for (int t = 1; t < num_worker_threads; ++t) { + ValueJoin::join(ReducerConditional::select(m_functor, m_reducer), ptr, + reinterpret_cast(buffer.get(t))); + } + + Kokkos::Impl::FunctorFinal::final( + ReducerConditional::select(m_functor, m_reducer), ptr); + + if (m_result_ptr) { + const int n = Analysis::value_count( + ReducerConditional::select(m_functor, m_reducer)); + + for (int j = 0; j < n; ++j) { + m_result_ptr[j] = ptr[j]; + } + } + } + + template + ParallelReduce( + const FunctorType &arg_functor, const Policy &arg_policy, + const ViewType &arg_result, + typename std::enable_if::value && + !Kokkos::is_reducer_type::value, + void *>::type = NULL) + : m_functor(arg_functor), m_league(arg_policy.league_size()), + m_policy(arg_policy), m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize::value( + m_functor, arg_policy.team_size())), + m_force_synchronous(!arg_result.impl_track().has_record()) {} + + inline ParallelReduce(const FunctorType &arg_functor, Policy arg_policy, + const ReducerType &reducer) + : m_functor(arg_functor), m_league(arg_policy.league_size()), + m_policy(arg_policy), m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_shared(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + + FunctorTeamShmemSize::value( + arg_functor, arg_policy.team_size())), + m_force_synchronous(!reducer.view().impl_track().has_record()) {} +}; +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { + +template +KOKKOS_INLINE_FUNCTION + Impl::TeamThreadRangeBoundariesStruct + TeamThreadRange(const Impl::HPXTeamMember &thread, const iType &count) { + return Impl::TeamThreadRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type::type, Impl::HPXTeamMember> +TeamThreadRange(const Impl::HPXTeamMember &thread, const iType1 &i_begin, + const iType2 &i_end) { + using iType = typename std::common_type::type; + return Impl::TeamThreadRangeBoundariesStruct( + thread, iType(i_begin), iType(i_end)); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::TeamThreadRangeBoundariesStruct + TeamVectorRange(const Impl::HPXTeamMember &thread, const iType &count) { + return Impl::TeamThreadRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type::type, Impl::HPXTeamMember> +TeamVectorRange(const Impl::HPXTeamMember &thread, const iType1 &i_begin, + const iType2 &i_end) { + using iType = typename std::common_type::type; + return Impl::TeamThreadRangeBoundariesStruct( + thread, iType(i_begin), iType(i_end)); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct + ThreadVectorRange(const Impl::HPXTeamMember &thread, const iType &count) { + return Impl::ThreadVectorRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct + ThreadVectorRange(const Impl::HPXTeamMember &thread, const iType &i_begin, + const iType &i_end) { + return Impl::ThreadVectorRangeBoundariesStruct( + thread, i_begin, i_end); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct +PerTeam(const Impl::HPXTeamMember &thread) { + return Impl::ThreadSingleStruct(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct +PerThread(const Impl::HPXTeamMember &thread) { + return Impl::VectorSingleStruct(thread); +} + +/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each + * i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::TeamThreadRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda) { + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) + lambda(i); +} + +/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, + * ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team + * and a summation of val is performed and put into result. This functionality + * requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_reduce( + const Impl::TeamThreadRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda, ValueType &result) { + result = ValueType(); + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, result); + } +} + +/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each + * i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. + * This functionality requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::ThreadVectorRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda) { +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i); + } +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, + * ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread + * and a summation of val is performed and put into result. This functionality + * requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_reduce( + const Impl::ThreadVectorRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda, ValueType &result) { + result = ValueType(); +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, result); + } +} + +template +KOKKOS_INLINE_FUNCTION void parallel_reduce( + const Impl::TeamThreadRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda, const ReducerType &reducer) { + reducer.init(reducer.reference()); + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, reducer.reference()); + } +} + +template +KOKKOS_INLINE_FUNCTION void parallel_reduce( + const Impl::ThreadVectorRangeBoundariesStruct + &loop_boundaries, + const Lambda &lambda, const ReducerType &reducer) { + reducer.init(reducer.reference()); +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, reducer.reference()); + } +} + +template +KOKKOS_INLINE_FUNCTION void parallel_scan( + Impl::TeamThreadRangeBoundariesStruct const + &loop_boundaries, + const FunctorType &lambda) { + using value_type = typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, void, + FunctorType>::value_type; + + value_type scan_val = value_type(); + + // Intra-member scan + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, false); + } + + // 'scan_val' output is the exclusive prefix sum + scan_val = loop_boundaries.thread.team_scan(scan_val); + + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, true); + } +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. Executes + * lambda(iType i, ValueType & val, bool final) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan + * operation is performed. Depending on the target execution space the operator + * might be called twice: once with final=false and once with final=true. When + * final==true val contains the prefix sum value. The contribution of this "i" + * needs to be added to val no matter whether final==true or not. In a serial + * execution (i.e. team_size==1) the operator is only called once with + * final==true. Scan_val will be set to the final sum value over all vector + * lanes. This functionality requires C++11 support.*/ +template +KOKKOS_INLINE_FUNCTION void parallel_scan( + const Impl::ThreadVectorRangeBoundariesStruct + &loop_boundaries, + const FunctorType &lambda) { + using ValueTraits = Kokkos::Impl::FunctorValueTraits; + using value_type = typename ValueTraits::value_type; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for (iType i = loop_boundaries.start; i < loop_boundaries.end; + i += loop_boundaries.increment) { + lambda(i, scan_val, true); + } +} + +template +KOKKOS_INLINE_FUNCTION void +single(const Impl::VectorSingleStruct &single_struct, + const FunctorType &lambda) { + lambda(); +} + +template +KOKKOS_INLINE_FUNCTION void +single(const Impl::ThreadSingleStruct &single_struct, + const FunctorType &lambda) { + lambda(); +} + +template +KOKKOS_INLINE_FUNCTION void +single(const Impl::VectorSingleStruct &single_struct, + const FunctorType &lambda, ValueType &val) { + lambda(val); +} + +template +KOKKOS_INLINE_FUNCTION void +single(const Impl::ThreadSingleStruct &single_struct, + const FunctorType &lambda, ValueType &val) { + lambda(val); +} + +} // namespace Kokkos + +#include + +#endif /* #if defined( KOKKOS_ENABLE_HPX ) */ +#endif /* #ifndef KOKKOS_HPX_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp index 3fd55d9148..921ba0df34 100644 --- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -57,6 +57,8 @@ #include #include +#include "impl/Kokkos_HostSpace_deepcopy.hpp" + /*--------------------------------------------------------------------------*/ namespace Kokkos { @@ -113,6 +115,8 @@ public: typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) typedef Kokkos::Threads execution_space; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX ) + typedef Kokkos::Experimental::HPX execution_space; //#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) // typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_OPENMP ) @@ -121,6 +125,8 @@ public: typedef Kokkos::Threads execution_space; //#elif defined( KOKKOS_ENABLE_QTHREADS ) // typedef Kokkos::Qthreads execution_space; +#elif defined( KOKKOS_ENABLE_HPX ) + typedef Kokkos::Experimental::HPX execution_space; #elif defined( KOKKOS_ENABLE_SERIAL ) typedef Kokkos::Serial execution_space; #else @@ -291,15 +297,18 @@ namespace Kokkos { namespace Impl { +#define PAR_DEEP_COPY_USE_MEMCPY + template< class ExecutionSpace > struct DeepCopy< HostSpace, HostSpace, ExecutionSpace > { DeepCopy( void * dst, const void * src, size_t n ) { - memcpy( dst, src, n ); + hostspace_parallel_deepcopy(dst,src,n); } DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst, src, n ); + hostspace_parallel_deepcopy(dst,src,n); + exec.fence(); } }; diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp index 43e117783b..6f423d545f 100644 --- a/lib/kokkos/core/src/Kokkos_Layout.hpp +++ b/lib/kokkos/core/src/Kokkos_Layout.hpp @@ -193,6 +193,9 @@ struct LayoutStride { {} }; +// ========================================================================== +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + //---------------------------------------------------------------------------- /// \struct LayoutTileLeft /// \brief Memory layout tag indicating left-to-right (Fortran scheme) @@ -243,6 +246,8 @@ struct LayoutTileLeft { : dimension { argN0 , argN1 , argN2 , argN3 , argN4 , argN5 , argN6 , argN7 } {} }; +#endif // KOKKOS_ENABLE_DEPRECATED_CODE +// =================================================================================== ////////////////////////////////////////////////////////////////////////////////////// @@ -269,14 +274,14 @@ namespace Experimental { template < Kokkos::Iterate OuterP, Kokkos::Iterate InnerP, unsigned ArgN0 , unsigned ArgN1 , unsigned ArgN2 = 0, unsigned ArgN3 = 0, unsigned ArgN4 = 0, unsigned ArgN5 = 0, unsigned ArgN6 = 0, unsigned ArgN7 = 0, bool IsPowerOfTwo = - ( Impl::is_integral_power_of_two(ArgN0) && - Impl::is_integral_power_of_two(ArgN1) && - (Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0) ) && - (Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0) ) && - (Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0) ) && - (Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0) ) && - (Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0) ) && - (Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0) ) + ( Kokkos::Impl::is_integral_power_of_two(ArgN0) && + Kokkos::Impl::is_integral_power_of_two(ArgN1) && + (Kokkos::Impl::is_integral_power_of_two(ArgN2) || (ArgN2 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN3) || (ArgN3 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN4) || (ArgN4 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN5) || (ArgN5 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN6) || (ArgN6 == 0) ) && + (Kokkos::Impl::is_integral_power_of_two(ArgN7) || (ArgN7 == 0) ) ) > struct LayoutTiled { diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index 10fc09423e..6b8ae02f82 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -50,6 +50,7 @@ * KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces * KOKKOS_ENABLE_THREADS Kokkos::Threads execution space * KOKKOS_ENABLE_QTHREADS Kokkos::Qthreads execution space + * KOKKOS_ENABLE_HPX Kokkos::Experimental::HPX execution space * KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space * KOKKOS_ENABLE_OPENMPTARGET Kokkos::Experimental::OpenMPTarget execution space * KOKKOS_ENABLE_HWLOC HWLOC library is available. @@ -98,12 +99,14 @@ #if defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_THREADS) || \ defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_QTHREADS) || \ + defined(KOKKOS_ENABLE_HPX) || \ defined(KOKKOS_ENABLE_ROCM) || defined(KOKKOS_ENABLE_OPENMPTARGET) #define KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND #endif #if !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_CUDA) && \ !defined(KOKKOS_ENABLE_OPENMP) && !defined(KOKKOS_ENABLE_QTHREADS) && \ + !defined(KOKKOS_ENABLE_HPX) && \ !defined(KOKKOS_ENABLE_ROCM) && !defined(KOKKOS_ENABLE_OPENMPTARGET) #define KOKKOS_INTERNAL_NOT_PARALLEL #endif @@ -174,33 +177,22 @@ #if ( 10000 > CUDA_VERSION ) #define KOKKOS_ENABLE_PRE_CUDA_10_DEPRECATION_API #endif + + #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700) + // PTX atomics with memory order semantics are only available on volta and later + #if !defined(KOKKOS_DISABLE_CUDA_ASM) + #if !defined(KOKKOS_ENABLE_CUDA_ASM) + #define KOKKOS_ENABLE_CUDA_ASM + #if !defined(KOKKOS_DISABLE_CUDA_ASM_ATOMICS) + #define KOKKOS_ENABLE_CUDA_ASM_ATOMICS + #endif + #endif + #endif + #endif + + #endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) -//---------------------------------------------------------------------------- -// Language info: C++, CUDA, OPENMP - -#if defined( KOKKOS_ENABLE_CUDA ) - // Compiling Cuda code to 'ptx' - - #define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ - #define KOKKOS_INLINE_FUNCTION __device__ __host__ inline - #define KOKKOS_FUNCTION __device__ __host__ -#endif // #if defined( __CUDA_ARCH__ ) - -#if defined( KOKKOS_ENABLE_ROCM ) && defined( __HCC__ ) - - #define KOKKOS_FORCEINLINE_FUNCTION __attribute__((amp,cpu)) inline - #define KOKKOS_INLINE_FUNCTION __attribute__((amp,cpu)) inline - #define KOKKOS_FUNCTION __attribute__((amp,cpu)) - #define KOKKOS_LAMBDA [=] __attribute__((amp,cpu)) -#endif - -#if defined( _OPENMP ) - // Compiling with OpenMP. - // The value of _OPENMP is an integer value YYYYMM - // where YYYY and MM are the year and month designation - // of the supported OpenMP API version. -#endif // #if defined( _OPENMP ) //---------------------------------------------------------------------------- // Mapping compiler built-ins to KOKKOS_COMPILER_*** macros @@ -263,7 +255,7 @@ #endif #endif -#if defined( __PGIC__ ) +#if defined( __PGIC__ ) #define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__ #if ( 1540 > KOKKOS_COMPILER_PGI ) @@ -272,6 +264,36 @@ #endif //#endif // #if !defined( __CUDA_ARCH__ ) +//---------------------------------------------------------------------------- +// Language info: C++, CUDA, OPENMP + +#if defined( KOKKOS_ENABLE_CUDA ) + // Compiling Cuda code to 'ptx' + + #define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ + #define KOKKOS_INLINE_FUNCTION __device__ __host__ inline + #define KOKKOS_FUNCTION __device__ __host__ + #if defined( KOKKOS_COMPILER_NVCC ) + #define KOKKOS_INLINE_FUNCTION_DELETED inline + #else + #define KOKKOS_INLINE_FUNCTION_DELETED __device__ __host__ inline + #endif +#endif // #if defined( __CUDA_ARCH__ ) + +#if defined( KOKKOS_ENABLE_ROCM ) && defined( __HCC__ ) + + #define KOKKOS_FORCEINLINE_FUNCTION __attribute__((amp,cpu)) inline + #define KOKKOS_INLINE_FUNCTION __attribute__((amp,cpu)) inline + #define KOKKOS_FUNCTION __attribute__((amp,cpu)) + #define KOKKOS_LAMBDA [=] __attribute__((amp,cpu)) +#endif + +#if defined( _OPENMP ) + // Compiling with OpenMP. + // The value of _OPENMP is an integer value YYYYMM + // where YYYY and MM are the year and month designation + // of the supported OpenMP API version. +#endif // #if defined( _OPENMP ) //---------------------------------------------------------------------------- // Intel compiler macros @@ -320,7 +342,10 @@ #if defined( KOKKOS_ARCH_AVX512MIC ) #define KOKKOS_ENABLE_RFO_PREFETCH 1 - #endif + #if (KOKKOS_COMPILER_INTEL < 1800) && !defined(KOKKOS_KNL_USE_ASM_WORKAROUND) + #define KOKKOS_KNL_USE_ASM_WORKAROUND 1 + #endif + #endif #if defined( __MIC__ ) // Compiling for Xeon Phi @@ -386,6 +411,8 @@ #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #endif + #define KOKKOS_RESTRICT __restrict__ + #if !defined( KOKKOS_ENABLE_ASM ) && !defined( __PGIC__ ) && \ ( defined( __amd64 ) || defined( __amd64__ ) || \ defined( __x86_64 ) || defined( __x86_64__ ) || \ @@ -416,7 +443,7 @@ // Define function marking macros if compiler specific macros are undefined: #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) - #define KOKKOS_FORCEINLINE_FUNCTION inline + define KOKKOS_FORCEINLINE_FUNCTION inline #endif #if !defined( KOKKOS_INLINE_FUNCTION ) @@ -427,6 +454,9 @@ #define KOKKOS_FUNCTION /**/ #endif +#if !defined( KOKKOS_INLINE_FUNCTION_DELETED ) + #define KOKKOS_INLINE_FUNCTION_DELETED inline +#endif //---------------------------------------------------------------------------- // Define empty macro for restrict if necessary: @@ -459,18 +489,20 @@ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) ) #error "More than one KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_* specified." #endif // If default is not specified then chose from enabled execution spaces. -// Priority: CUDA, OPENMP, THREADS, QTHREADS, SERIAL +// Priority: CUDA, OPENMP, THREADS, QTHREADS, HPX, SERIAL #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) //#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) #elif defined( KOKKOS_ENABLE_CUDA ) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA @@ -484,6 +516,8 @@ #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS //#elif defined( KOKKOS_ENABLE_QTHREADS ) // #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS +#elif defined( KOKKOS_ENABLE_HPX ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX #else #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL #endif @@ -539,7 +573,27 @@ #define KOKKOS_IMPL_CTOR_DEFAULT_ARG KOKKOS_INVALID_INDEX #endif +#if (defined(KOKKOS_ENABLE_CXX14) || defined(KOKKOS_ENABLE_CXX17) || defined(KOKKOS_ENABLE_CXX20)) + #define KOKKOS_CONSTEXPR_14 constexpr + #define KOKKOS_DEPRECATED [[deprecated]] + #define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE +#else + #define KOKKOS_CONSTEXPR_14 + #if defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) + #define KOKKOS_DEPRECATED + #define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE __attribute__ ((deprecated)) + #else + #define KOKKOS_DEPRECATED + #define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE + #endif +#endif +// DJS 05/28/2019: Bugfix: Issue 2155 +// Use KOKKOS_ENABLE_CUDA_LDG_INTRINSIC to avoid memory leak in RandomAccess View +#if defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC) + #define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC +#endif + #endif // #ifndef KOKKOS_MACROS_HPP diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp index 157345c552..365db2baec 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -132,12 +132,18 @@ private: public: + using memory_space = typename DeviceType::memory_space; + /**\brief The maximum size of a superblock and block */ enum : uint32_t { max_superblock_size = 1LU << 31 /* 2 gigabytes */ }; enum : uint32_t { max_block_per_superblock = max_bit_count }; //-------------------------------------------------------------------------- + KOKKOS_INLINE_FUNCTION + bool operator==(MemoryPool const& other) const + { return m_sb_state_array == other.m_sb_state_array; } + KOKKOS_INLINE_FUNCTION size_t capacity() const noexcept { return size_t(m_sb_count) << m_sb_size_lg2 ; } diff --git a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp index eebc83cf3d..509ac6499e 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp @@ -71,13 +71,18 @@ template < unsigned T > struct MemoryTraits { //! Tag this class as a kokkos memory traits: typedef MemoryTraits memory_traits ; - +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE enum : bool { Unmanaged = (unsigned(0) != (T & unsigned(Kokkos::Unmanaged))) }; enum : bool { RandomAccess = (unsigned(0) != (T & unsigned(Kokkos::RandomAccess))) }; enum : bool { Atomic = (unsigned(0) != (T & unsigned(Kokkos::Atomic))) }; enum : bool { Restrict = (unsigned(0) != (T & unsigned(Kokkos::Restrict))) }; enum : bool { Aligned = (unsigned(0) != (T & unsigned(Kokkos::Aligned))) }; - +#endif + enum : bool { is_unmanaged = (unsigned(0) != (T & unsigned(Kokkos::Unmanaged))) }; + enum : bool { is_random_access = (unsigned(0) != (T & unsigned(Kokkos::RandomAccess))) }; + enum : bool { is_atomic = (unsigned(0) != (T & unsigned(Kokkos::Atomic))) }; + enum : bool { is_restrict = (unsigned(0) != (T & unsigned(Kokkos::Restrict))) }; + enum : bool { is_aligned = (unsigned(0) != (T & unsigned(Kokkos::Aligned))) }; }; } // namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp index ed4071a6da..6ee8f08dc8 100644 --- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp @@ -107,8 +107,14 @@ public: /// \brief Wait until all dispatched functors complete on the given instance /// /// This is a no-op on OpenMP - inline + static void impl_static_fence( OpenMP const& = OpenMP() ) noexcept; + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence( OpenMP const& = OpenMP() ) noexcept; + #else + void fence() const; + #endif + /// \brief Does the given instance return immediately after launching /// a parallel algorithm diff --git a/lib/kokkos/core/src/Kokkos_Pair.hpp b/lib/kokkos/core/src/Kokkos_Pair.hpp index 1be763be85..ab0ab8152a 100644 --- a/lib/kokkos/core/src/Kokkos_Pair.hpp +++ b/lib/kokkos/core/src/Kokkos_Pair.hpp @@ -528,6 +528,15 @@ KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>= (const pair& lhs, const pair& rhs) { return !(lhs struct is_pair_like : std::false_type { }; +template struct is_pair_like> : std::true_type { }; +template struct is_pair_like> : std::true_type { }; + +} // end namespace Impl + } // namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp index b095f5728e..09dcf60b11 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp @@ -525,7 +525,7 @@ void parallel_scan( const ExecutionPolicy & policy Kokkos::Profiling::endParallelScan(kpID); } #endif - + Kokkos::fence(); } template< class FunctorType, class ReturnType > @@ -560,7 +560,7 @@ void parallel_scan( const size_t work_count Kokkos::Profiling::endParallelScan(kpID); } #endif - + Kokkos::fence(); } template< class ExecutionPolicy, class FunctorType, class ReturnType > diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index 06aaa6546e..36bc6e4153 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -69,18 +69,19 @@ public: typedef Sum reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - Sum(value_type& value_): value(&value_) {} + Sum(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - Sum(const result_view_type& value_): value(value_.data()) {} + Sum(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -100,12 +101,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -116,18 +122,19 @@ public: typedef Prod reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - Prod(value_type& value_): value(&value_) {} + Prod(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - Prod(const result_view_type& value_): value(value_.data()) {} + Prod(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -147,12 +154,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -163,18 +175,19 @@ public: typedef Min reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - Min(value_type& value_): value(&value_) {} + Min(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - Min(const result_view_type& value_): value(value_.data()) {} + Min(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -196,12 +209,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -212,18 +230,19 @@ public: typedef Max reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - Max(value_type& value_): value(&value_) {} + Max(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - Max(const result_view_type& value_): value(value_.data()) {} + Max(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -246,12 +265,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -262,18 +286,19 @@ public: typedef LAnd reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - LAnd(value_type& value_): value(&value_) {} + LAnd(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - LAnd(const result_view_type& value_): value(value_.data()) {} + LAnd(const result_view_type& value_): value(value_),references_scalar_v(false) {} KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { @@ -292,12 +317,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -308,18 +338,19 @@ public: typedef LOr reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - LOr(value_type& value_): value(&value_) {} + LOr(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - LOr(const result_view_type& value_): value(value_.data()) {} + LOr(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -339,12 +370,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -355,18 +391,19 @@ public: typedef BAnd reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - BAnd(value_type& value_): value(&value_) {} + BAnd(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - BAnd(const result_view_type& value_): value(value_.data()) {} + BAnd(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -386,12 +423,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -402,18 +444,19 @@ public: typedef BOr reducer; typedef typename std::remove_cv::type value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - BOr(value_type& value_): value(&value_) {} + BOr(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - BOr(const result_view_type& value_): value(value_.data()) {} + BOr(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -433,12 +476,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -471,18 +519,19 @@ public: typedef MinLoc reducer; typedef ValLocScalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - MinLoc(value_type& value_): value(&value_) {} + MinLoc(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - MinLoc(const result_view_type& value_): value(value_.data()) {} + MinLoc(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required @@ -506,12 +555,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -526,18 +580,19 @@ public: typedef MaxLoc reducer; typedef ValLocScalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - MaxLoc(value_type& value_): value(&value_) {} + MaxLoc(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - MaxLoc(const result_view_type& value_): value(value_.data()) {} + MaxLoc(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -560,12 +615,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -596,18 +656,19 @@ public: typedef MinMax reducer; typedef MinMaxScalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - MinMax(value_type& value_): value(&value_) {} + MinMax(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - MinMax(const result_view_type& value_): value(value_.data()) {} + MinMax(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -638,12 +699,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; @@ -680,18 +746,19 @@ public: typedef MinMaxLoc reducer; typedef MinMaxLocScalar value_type; - typedef Kokkos::View > result_view_type; + typedef Kokkos::View result_view_type; private: - value_type* value; + result_view_type value; + bool references_scalar_v; public: KOKKOS_INLINE_FUNCTION - MinMaxLoc(value_type& value_): value(&value_) {} + MinMaxLoc(value_type& value_): value(&value_),references_scalar_v(true) {} KOKKOS_INLINE_FUNCTION - MinMaxLoc(const result_view_type& value_): value(value_.data()) {} + MinMaxLoc(const result_view_type& value_): value(value_),references_scalar_v(false) {} //Required KOKKOS_INLINE_FUNCTION @@ -728,12 +795,17 @@ public: KOKKOS_INLINE_FUNCTION value_type& reference() const { - return *value; + return *value.data(); } KOKKOS_INLINE_FUNCTION result_view_type view() const { - return result_view_type(value); + return value; + } + + KOKKOS_INLINE_FUNCTION + bool references_scalar() const { + return references_scalar_v; } }; } @@ -813,7 +885,7 @@ struct ParallelReduceReturnValue + struct ReducerHasTestReferenceFunction + { + template static std::true_type test_func( decltype(&E::references_scalar) ) ; + template static std::false_type test_func(...); + + enum { value = std::is_same(0))>::value }; + }; + + template::value> + struct ParallelReduceFence { + static void fence(const T&) { + Kokkos::fence(); + } + }; + template + struct ParallelReduceFence, false> { + static void fence(const View) {}; + }; + template + struct ParallelReduceFence { + static void fence(const T& reducer) { + if(reducer.references_scalar()) + Kokkos::fence(); + } + }; +} + /** \brief Parallel reduction * * parallel_reduce performs parallel reductions with arbitrary functions - i.e. @@ -959,6 +1062,7 @@ void parallel_reduce(const std::string& label, Kokkos::Impl::is_execution_policy::value >::type * = 0) { Impl::ParallelReduceAdaptor::execute(label,policy,functor,return_value); + Impl::ParallelReduceFence::fence(return_value); } template< class PolicyType, class FunctorType, class ReturnType > @@ -970,6 +1074,7 @@ void parallel_reduce(const PolicyType& policy, Kokkos::Impl::is_execution_policy::value >::type * = 0) { Impl::ParallelReduceAdaptor::execute("",policy,functor,return_value); + Impl::ParallelReduceFence::fence(return_value); } template< class FunctorType, class ReturnType > @@ -979,6 +1084,7 @@ void parallel_reduce(const size_t& policy, ReturnType& return_value) { typedef typename Impl::ParallelReducePolicyType::policy_type policy_type; Impl::ParallelReduceAdaptor::execute("",policy_type(0,policy),functor,return_value); + Impl::ParallelReduceFence::fence(return_value); } template< class FunctorType, class ReturnType > @@ -989,6 +1095,7 @@ void parallel_reduce(const std::string& label, ReturnType& return_value) { typedef typename Impl::ParallelReducePolicyType::policy_type policy_type; Impl::ParallelReduceAdaptor::execute(label,policy_type(0,policy),functor,return_value); + Impl::ParallelReduceFence::fence(return_value); } // ReturnValue as View or Reducer: take by copy to allow for inline construction @@ -1004,6 +1111,7 @@ void parallel_reduce(const std::string& label, >::type * = 0) { ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute(label,policy,functor,return_value_impl); + Impl::ParallelReduceFence::fence(return_value); } template< class PolicyType, class FunctorType, class ReturnType > @@ -1016,6 +1124,7 @@ void parallel_reduce(const PolicyType& policy, >::type * = 0) { ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute("",policy,functor,return_value_impl); + Impl::ParallelReduceFence::fence(return_value); } template< class FunctorType, class ReturnType > @@ -1026,6 +1135,7 @@ void parallel_reduce(const size_t& policy, typedef typename Impl::ParallelReducePolicyType::policy_type policy_type; ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute("",policy_type(0,policy),functor,return_value_impl); + Impl::ParallelReduceFence::fence(return_value); } template< class FunctorType, class ReturnType > @@ -1037,6 +1147,7 @@ void parallel_reduce(const std::string& label, typedef typename Impl::ParallelReducePolicyType::policy_type policy_type; ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute(label,policy_type(0,policy),functor,return_value_impl); + Impl::ParallelReduceFence::fence(return_value); } // No Return Argument diff --git a/lib/kokkos/core/src/Kokkos_PointerOwnership.hpp b/lib/kokkos/core/src/Kokkos_PointerOwnership.hpp new file mode 100644 index 0000000000..be76ec3def --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_PointerOwnership.hpp @@ -0,0 +1,74 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_POINTEROWNERSHIP_HPP +#define KOKKOS_IMPL_POINTEROWNERSHIP_HPP + +#include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +/// Trivial wrapper for raw pointers that express ownership. +template +using OwningRawPtr = T*; + +/// Trivial wrapper for raw pointers that do not express ownership. +template +using ObservingRawPtr = T*; + +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + + +#endif /* #ifndef KOKKOS_IMPL_POINTEROWNERSHIP_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_ROCm.hpp b/lib/kokkos/core/src/Kokkos_ROCm.hpp index 469d6b2787..96207e73c6 100644 --- a/lib/kokkos/core/src/Kokkos_ROCm.hpp +++ b/lib/kokkos/core/src/Kokkos_ROCm.hpp @@ -140,7 +140,14 @@ public: static bool wake() ; /** \brief Wait until all dispatched functors complete. A noop for OpenMP. */ - static void fence() ; + static void impl_static_fence(); + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE + static void fence(); + #else + void fence() const; + #endif + /// \brief Print configuration information to the given output stream. static void print_configuration( std::ostream & , const bool detail = false ); diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp index 01701e53a2..5821b0c0c5 100644 --- a/lib/kokkos/core/src/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -118,10 +118,16 @@ public: /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. + static void impl_static_fence() {} + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence() {} + #else + void fence() const {} + #endif /** \brief Return the maximum amount of concurrency. */ - static int concurrency() {return 1;}; + static int concurrency() {return 1;} //! Print configuration information to the given output stream. static void print_configuration( std::ostream & , const bool /* detail */ = false ) {} @@ -261,6 +267,20 @@ public: return *this; } + template + friend class TeamPolicyInternal; + + template< class ... OtherProperties > + TeamPolicyInternal(const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + + //---------------------------------------- #ifdef KOKKOS_ENABLE_DEPRECATED_CODE template< class FunctorType > @@ -302,7 +322,7 @@ public: 20*1024*1024); } /** \brief Specify league size, request team size */ - TeamPolicyInternal( execution_space & + TeamPolicyInternal( const execution_space & , int league_size_request #ifndef KOKKOS_ENABLE_DEPRECATED_CODE , int team_size_request @@ -320,7 +340,7 @@ public: #endif } - TeamPolicyInternal( execution_space & + TeamPolicyInternal( const execution_space & , int league_size_request , const Kokkos::AUTO_t & /* team_size_request */ , int /* vector_length_request */ = 1 ) diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp index 5045e9cbbc..1c3d58af08 100644 --- a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -50,393 +50,203 @@ #if defined( KOKKOS_ENABLE_TASKDAG ) #include +#include //---------------------------------------------------------------------------- #include #include -//---------------------------------------------------------------------------- - -namespace Kokkos { - -// Forward declarations used in Impl::TaskQueue - -template< typename Arg1 = void , typename Arg2 = void > -class Future ; - -template< typename Space > -class TaskScheduler ; - -template< typename Space > -void wait( TaskScheduler< Space > const & ); - -template< typename Space > -struct is_scheduler : public std::false_type {}; - -template< typename Space > -struct is_scheduler< TaskScheduler< Space > > : public std::true_type {}; - -} // namespace Kokkos - +#include #include +#include +#include +#include +#include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { -/*\brief Implementation data for task data management, access, and execution. - * - * CRTP Inheritance structure to allow static_cast from the - * task root type and a task's FunctorType. - * - * TaskBase< Space , ResultType , FunctorType > - * : TaskBase< Space , ResultType , void > - * , FunctorType - * { ... }; - * - * TaskBase< Space , ResultType , void > - * : TaskBase< Space , void , void > - * { ... }; - */ -template< typename Space , typename ResultType , typename FunctorType > -class TaskBase ; +template +class TaskExec; -} // namespace Impl -} // namespace Kokkos +} // end namespace Impl -//---------------------------------------------------------------------------- - -namespace Kokkos { - -/** - * - * Future< space > // value_type == void - * Future< value > // space == Default - * Future< value , space > - * - */ -template< typename Arg1 , typename Arg2 > -class Future { -private: - - template< typename > friend class TaskScheduler ; - template< typename , typename > friend class Future ; - template< typename , typename , typename > friend class Impl::TaskBase ; - - enum { Arg1_is_space = Kokkos::is_space< Arg1 >::value }; - enum { Arg2_is_space = Kokkos::is_space< Arg2 >::value }; - enum { Arg1_is_value = ! Arg1_is_space && - ! std::is_same< Arg1 , void >::value }; - enum { Arg2_is_value = ! Arg2_is_space && - ! std::is_same< Arg2 , void >::value }; - - static_assert( ! ( Arg1_is_space && Arg2_is_space ) - , "Future cannot be given two spaces" ); - - static_assert( ! ( Arg1_is_value && Arg2_is_value ) - , "Future cannot be given two value types" ); - - using ValueType = - typename std::conditional< Arg1_is_value , Arg1 , - typename std::conditional< Arg2_is_value , Arg2 , void - >::type >::type ; - - using Space = - typename std::conditional< Arg1_is_space , Arg1 , - typename std::conditional< Arg2_is_space , Arg2 , void - >::type >::type ; - - using task_base = Impl::TaskBase< void , void , void > ; - using queue_type = Impl::TaskQueue< Space > ; - - task_base * m_task ; - - KOKKOS_INLINE_FUNCTION explicit - Future( task_base * task ) : m_task(0) - { if ( task ) queue_type::assign( & m_task , task ); } - - //---------------------------------------- +template +class BasicTaskScheduler : public Impl::TaskSchedulerBase +{ public: - using execution_space = typename Space::execution_space ; - using value_type = ValueType ; + using scheduler_type = BasicTaskScheduler; + using execution_space = ExecSpace; + using queue_type = QueueType; + using memory_space = typename queue_type::memory_space; + using memory_pool = typename queue_type::memory_pool; + using specialization = Impl::TaskQueueSpecialization; + using member_type = typename specialization::member_type; + using team_scheduler_type = BasicTaskScheduler; + template + using runnable_task_type = Impl::Task; + template + using future_type = Kokkos::BasicFuture; + template + using future_type_for_functor = future_type; - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - bool is_null() const { return 0 == m_task ; } - - KOKKOS_INLINE_FUNCTION - int reference_count() const - { return 0 != m_task ? m_task->reference_count() : 0 ; } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - void clear() - { if ( m_task ) queue_type::assign( & m_task , (task_base*)0 ); } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - ~Future() { clear(); } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr Future() noexcept : m_task(0) {} - - KOKKOS_INLINE_FUNCTION - Future( Future && rhs ) - : m_task( rhs.m_task ) { rhs.m_task = 0 ; } - - KOKKOS_INLINE_FUNCTION - Future( const Future & rhs ) - : m_task(0) - { if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); } - - KOKKOS_INLINE_FUNCTION - Future & operator = ( Future && rhs ) - { - clear(); - m_task = rhs.m_task ; - rhs.m_task = 0 ; - return *this ; - } - - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future & rhs ) - { - if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - return *this ; - } - - //---------------------------------------- - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future( Future && rhs ) - : m_task( rhs.m_task ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future::value_type >::value - , "Assigned Futures must have the same value_type" ); - - rhs.m_task = 0 ; - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future( const Future & rhs ) - : m_task(0) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future::value_type >::value - , "Assigned Futures must have the same value_type" ); - - if ( rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future & operator = ( const Future & rhs ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future::value_type >::value - , "Assigned Futures must have the same value_type" ); - - if ( m_task || rhs.m_task ) queue_type::assign( & m_task , rhs.m_task ); - return *this ; - } - - template< class A1 , class A2 > - KOKKOS_INLINE_FUNCTION - Future & operator = ( Future && rhs ) - { - static_assert - ( std::is_same< Space , void >::value || - std::is_same< Space , typename Future::Space >::value - , "Assigned Futures must have the same space" ); - - static_assert - ( std::is_same< value_type , void >::value || - std::is_same< value_type , typename Future::value_type >::value - , "Assigned Futures must have the same value_type" ); - - clear(); - m_task = rhs.m_task ; - rhs.m_task = 0 ; - return *this ; - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - int is_ready() const noexcept - { return ( 0 == m_task ) || ( ((task_base*) task_base::LockTag) == m_task->m_wait ); } - - KOKKOS_INLINE_FUNCTION - const typename Impl::TaskResult< ValueType >::reference_type - get() const - { - if ( 0 == m_task ) { - Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()"); - } - return Impl::TaskResult< ValueType >::get( m_task ); - } -}; - -// Is a Future with the given execution space -template< typename , typename ExecSpace = void > -struct is_future : public std::false_type {}; - -template< typename Arg1 , typename Arg2 , typename ExecSpace > -struct is_future< Future , ExecSpace > - : public std::integral_constant - < bool , - ( std::is_same< ExecSpace , void >::value || - std::is_same< ExecSpace - , typename Future::execution_space >::value ) - > {}; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -enum class TaskPriority : int { High = 0 - , Regular = 1 - , Low = 2 }; - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -template< int TaskEnum , typename DepFutureType > -struct TaskPolicyData -{ - using execution_space = typename DepFutureType::execution_space ; - using scheduler_type = TaskScheduler< execution_space > ; - - enum : int { m_task_type = TaskEnum }; - - scheduler_type const * m_scheduler ; - DepFutureType const m_dependence ; - int m_priority ; - - TaskPolicyData() = delete ; - TaskPolicyData( TaskPolicyData && ) = default ; - TaskPolicyData( TaskPolicyData const & ) = default ; - TaskPolicyData & operator = ( TaskPolicyData && ) = default ; - TaskPolicyData & operator = ( TaskPolicyData const & ) = default ; - - KOKKOS_INLINE_FUNCTION - TaskPolicyData( DepFutureType const & arg_future - , Kokkos::TaskPriority const & arg_priority ) - : m_scheduler( 0 ) - , m_dependence( arg_future ) - , m_priority( static_cast( arg_priority ) ) - {} - - KOKKOS_INLINE_FUNCTION - TaskPolicyData( scheduler_type const & arg_scheduler - , Kokkos::TaskPriority const & arg_priority ) - : m_scheduler( & arg_scheduler ) - , m_dependence() - , m_priority( static_cast( arg_priority ) ) - {} - - KOKKOS_INLINE_FUNCTION - TaskPolicyData( scheduler_type const & arg_scheduler - , DepFutureType const & arg_future - , Kokkos::TaskPriority const & arg_priority ) - : m_scheduler( & arg_scheduler ) - , m_dependence( arg_future ) - , m_priority( static_cast( arg_priority ) ) - {} -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< typename ExecSpace > -class TaskScheduler -{ private: using track_type = Kokkos::Impl::SharedAllocationTracker ; - using queue_type = Kokkos::Impl::TaskQueue< ExecSpace > ; - using task_base = Impl::TaskBase< void , void , void > ; + using task_base = Impl::TaskBase; - track_type m_track ; - queue_type * m_queue ; + track_type m_track; + queue_type * m_queue; //---------------------------------------- + template + friend class Impl::TaskQueue; + template + friend struct Impl::TaskQueueSpecialization; + template + friend class Impl::TaskQueueSpecializationConstrained; + template + friend class Impl::TaskTeamMemberAdapter; + template + friend class Impl::TaskExec; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + BasicTaskScheduler( + track_type arg_track, + queue_type* arg_queue + ) + : m_track(std::move(arg_track)), + m_queue(std::move(arg_queue)) + { } + + KOKKOS_INLINE_FUNCTION + team_scheduler_type get_team_scheduler(int team_rank) const { + return { m_track, &m_queue->get_team_queue(team_rank) }; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + static constexpr task_base* _get_task_ptr(std::nullptr_t) { return nullptr; } + + template + KOKKOS_INLINE_FUNCTION + static constexpr task_base* _get_task_ptr(future_type&& f) + { + return f.m_task; + } + + template< int TaskEnum , typename DepTaskType , typename FunctorType > + KOKKOS_FUNCTION + Kokkos::BasicFuture + _spawn_impl( + DepTaskType* arg_predecessor_task, + TaskPriority arg_priority, + typename task_base::function_type arg_function, + typename task_base::destroy_type arg_destroy, + FunctorType&& arg_functor + ) + { + using functor_future_type = future_type_for_functor::type>; + using task_type = Impl::Task; + + //---------------------------------------- + // Give single-thread back-ends an opportunity to clear + // queue of ready tasks before allocating a new task + + // TODO @tasking @optimization DSH re-enable this, maybe? + // specialization::iff_single_thread_recursive_execute(scheduler); + + //---------------------------------------- + + functor_future_type f ; + + // Allocate task from memory pool + + const size_t alloc_size = + m_queue->template spawn_allocation_size< FunctorType >(); + + void* task_storage = m_queue->allocate(alloc_size); + + if (task_storage) { + + // Placement new construction + // Reference count starts at two: + // +1 for the matching decrement when task is complete + // +1 for the future + f.m_task = new (task_storage) task_type( std::forward(arg_functor) ); + + f.m_task->m_apply = arg_function; + //f.m_task->m_destroy = arg_destroy; + f.m_task->m_queue = m_queue; + f.m_task->m_next = arg_predecessor_task; + f.m_task->m_ref_count = 2; + f.m_task->m_alloc_size = alloc_size; + f.m_task->m_task_type = TaskEnum; + f.m_task->m_priority = (int16_t)arg_priority; + + Kokkos::memory_fence(); + + // The dependence (if any) is processed immediately + // within the schedule function, as such the dependence's + // reference count does not need to be incremented for + // the assignment. + + m_queue->schedule_runnable( f.m_task ); + // This task may be updated or executed at any moment, + // even during the call to 'schedule'. + } + + return f; + + } + public: - using execution_space = ExecSpace ; - using memory_space = typename queue_type::memory_space ; - using memory_pool = typename queue_type::memory_pool ; - using member_type = - typename Kokkos::Impl::TaskQueueSpecialization< ExecSpace >::member_type ; KOKKOS_INLINE_FUNCTION - TaskScheduler() : m_track(), m_queue(0) {} + BasicTaskScheduler() : m_track(), m_queue(0) {} KOKKOS_INLINE_FUNCTION - TaskScheduler( TaskScheduler && rhs ) - : m_track( rhs.m_track ), m_queue( rhs.m_queue ) {} + BasicTaskScheduler( BasicTaskScheduler && rhs ) noexcept + : m_track(rhs.m_track), // probably should be a move, but this is deprecated code anyway + m_queue(std::move(rhs.m_queue)) + { } KOKKOS_INLINE_FUNCTION - TaskScheduler( TaskScheduler const & rhs ) - : m_track( rhs.m_track ), m_queue( rhs.m_queue ) {} + BasicTaskScheduler( BasicTaskScheduler const & rhs ) + : m_track(rhs.m_track), + m_queue(rhs.m_queue) + { } KOKKOS_INLINE_FUNCTION - TaskScheduler & operator = ( TaskScheduler && rhs ) - { m_track = rhs.m_track ; m_queue = rhs.m_queue ; return *this ; } + BasicTaskScheduler& operator=(BasicTaskScheduler&& rhs) noexcept + { + m_track = rhs.m_track; // probably should be a move, but this is deprecated code anyway + m_queue = std::move(rhs.m_queue); + return *this; + } KOKKOS_INLINE_FUNCTION - TaskScheduler & operator = ( TaskScheduler const & rhs ) - { m_track = rhs.m_track ; m_queue = rhs.m_queue ; return *this ; } + BasicTaskScheduler& operator=(BasicTaskScheduler const& rhs) + { + m_track = rhs.m_track; + m_queue = rhs.m_queue; + return *this; + } - TaskScheduler( memory_pool const & arg_memory_pool ) - : m_track() - , m_queue(0) + explicit BasicTaskScheduler(memory_pool const & arg_memory_pool) noexcept + : m_track(), m_queue(0) { typedef Kokkos::Impl::SharedAllocationRecord < memory_space , typename queue_type::Destroy > @@ -455,13 +265,13 @@ public: m_track.assign_allocated_record_to_uninitialized( record ); } - TaskScheduler( memory_space const & arg_memory_space + BasicTaskScheduler( memory_space const & arg_memory_space , size_t const mempool_capacity , unsigned const mempool_min_block_size // = 1u << 6 , unsigned const mempool_max_block_size // = 1u << 10 , unsigned const mempool_superblock_size // = 1u << 12 ) - : TaskScheduler( memory_pool( arg_memory_space + : BasicTaskScheduler( memory_pool( arg_memory_space , mempool_capacity , mempool_min_block_size , mempool_max_block_size @@ -470,6 +280,12 @@ public: //---------------------------------------- + KOKKOS_INLINE_FUNCTION + queue_type& queue() const noexcept { + KOKKOS_EXPECTS(m_queue != nullptr); + return *m_queue; + } + KOKKOS_INLINE_FUNCTION memory_pool * memory() const noexcept { return m_queue ? &( m_queue->m_memory ) : (memory_pool*) 0 ; } @@ -486,216 +302,173 @@ public: size_t when_all_allocation_size( int narg ) const { return m_queue->when_all_allocation_size( narg ); } + //---------------------------------------- - template< int TaskEnum , typename DepFutureType , typename FunctorType > + template KOKKOS_FUNCTION static - Kokkos::Future< typename FunctorType::value_type , execution_space > - spawn( Impl::TaskPolicyData const & arg_policy - , typename task_base::function_type arg_function - , FunctorType && arg_functor - ) - { - using value_type = typename FunctorType::value_type ; - using future_type = Future< value_type , execution_space > ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + Kokkos::BasicFuture + spawn( + Impl::TaskPolicyWithScheduler&& arg_policy, + typename task_base::function_type arg_function, + typename task_base::destroy_type arg_destroy, + FunctorType&& arg_functor + ) + { + return std::move(arg_policy.scheduler()).template _spawn_impl( + _get_task_ptr(std::move(arg_policy.predecessor())), + arg_policy.priority(), + arg_function, + arg_destroy, + std::forward(arg_functor) + ); + } - queue_type * const queue = - arg_policy.m_scheduler ? arg_policy.m_scheduler->m_queue : ( - arg_policy.m_dependence.m_task - ? static_cast(arg_policy.m_dependence.m_task->m_queue) - : (queue_type*) 0 ); + template + KOKKOS_FUNCTION + future_type_for_functor::type> + spawn( + Impl::TaskPolicyWithPredecessor&& arg_policy, + FunctorType&& arg_functor + ) + { + using task_type = runnable_task_type; + typename task_type::function_type const ptr = task_type::apply; + typename task_type::destroy_type const dtor = task_type::destroy; - if ( 0 == queue ) { - Kokkos::abort("Kokkos spawn requires scheduler or non-null Future"); - } + return _spawn_impl( + _get_task_ptr(std::move(arg_policy).predecessor()), + arg_policy.priority(), + ptr, dtor, + std::forward(arg_functor) + ); + } - if ( arg_policy.m_dependence.m_task != 0 && - arg_policy.m_dependence.m_task->m_queue != queue ) { - Kokkos::abort("Kokkos spawn given incompatible scheduler and Future"); - } - - //---------------------------------------- - // Give single-thread back-ends an opportunity to clear - // queue of ready tasks before allocating a new task - - queue->iff_single_thread_recursive_execute(); - - //---------------------------------------- - - future_type f ; - - // Allocate task from memory pool - - const size_t alloc_size = - queue->template spawn_allocation_size< FunctorType >(); - - f.m_task = - reinterpret_cast< task_type * >(queue->allocate(alloc_size) ); - - if ( f.m_task ) { - - // Placement new construction - // Reference count starts at two: - // +1 for the matching decrement when task is complete - // +1 for the future - new ( f.m_task ) task_type( std::move(arg_functor) ); - - f.m_task->m_apply = arg_function ; - f.m_task->m_queue = queue ; - f.m_task->m_next = arg_policy.m_dependence.m_task ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = alloc_size ; - f.m_task->m_task_type = arg_policy.m_task_type ; - f.m_task->m_priority = arg_policy.m_priority ; - - Kokkos::memory_fence(); - - // The dependence (if any) is processed immediately - // within the schedule function, as such the dependence's - // reference count does not need to be incremented for - // the assignment. - - queue->schedule_runnable( f.m_task ); - // This task may be updated or executed at any moment, - // even during the call to 'schedule'. - } - - return f ; - } - - template< typename FunctorType , typename A1 , typename A2 > + template KOKKOS_FUNCTION static void - respawn( FunctorType * arg_self - , Future const & arg_dependence - , TaskPriority const & arg_priority - ) - { - // Precondition: task is in Executing state + respawn( + FunctorType* arg_self, + BasicFuture const & arg_dependence, + TaskPriority const & arg_priority + ) { + // Precondition: task is in Executing state - using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + using value_type = typename FunctorType::value_type ; + using task_type = Impl::Task; - task_type * const task = static_cast< task_type * >( arg_self ); + task_type * const task = static_cast< task_type * >( arg_self ); - task->m_priority = static_cast(arg_priority); + task->m_priority = static_cast(arg_priority); - task->add_dependence( arg_dependence.m_task ); + task->add_dependence( arg_dependence.m_task ); - // Postcondition: task is in Executing-Respawn state - } + // Postcondition: task is in Executing-Respawn state + } template< typename FunctorType > KOKKOS_FUNCTION static void - respawn( FunctorType * arg_self - , TaskScheduler const & - , TaskPriority const & arg_priority - ) - { - // Precondition: task is in Executing state + respawn( + FunctorType* arg_self, + BasicTaskScheduler const &, + TaskPriority const & arg_priority + ) + { + // Precondition: task is in Executing state - using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + using value_type = typename FunctorType::value_type; + using task_type = Impl::Task; - task_type * const task = static_cast< task_type * >( arg_self ); + task_type * const task = static_cast< task_type * >( arg_self ); - task->m_priority = static_cast(arg_priority); + task->m_priority = static_cast(arg_priority); - task->add_dependence( (task_base*) 0 ); + task->add_dependence( (task_base*) 0 ); - // Postcondition: task is in Executing-Respawn state - } + // Postcondition: task is in Executing-Respawn state + } //---------------------------------------- /**\brief Return a future that is complete * when all input futures are complete. */ - template< typename A1 , typename A2 > - KOKKOS_FUNCTION static - Future< execution_space > - when_all( Future< A1 , A2 > const arg[] , int narg ) - { - using future_type = Future< execution_space > ; + template + KOKKOS_FUNCTION + BasicFuture< void, scheduler_type > + when_all(BasicFuture const arg[], int narg) + { - future_type f ; + future_type f ; - if ( narg ) { + if ( narg ) { - queue_type * queue = 0 ; + queue_type* q = m_queue; - for ( int i = 0 ; i < narg ; ++i ) { - task_base * const t = arg[i].m_task ; - if ( 0 != t ) { - // Increment reference count to track subsequent assignment. - Kokkos::atomic_increment( &(t->m_ref_count) ); - if ( queue == 0 ) { - queue = static_cast< queue_type * >( t->m_queue ); - } - else if ( queue != static_cast< queue_type * >( t->m_queue ) ) { - Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); - } - } - } + //BasicTaskScheduler const* scheduler_ptr = nullptr; - if ( queue != 0 ) { - - size_t const alloc_size = queue->when_all_allocation_size( narg ); - - f.m_task = - reinterpret_cast< task_base * >( queue->allocate( alloc_size ) ); - - if ( f.m_task ) { - - // Reference count starts at two: - // +1 to match decrement when task completes - // +1 for the future - - new( f.m_task ) task_base(); - - f.m_task->m_queue = queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = alloc_size ; - f.m_task->m_dep_count = narg ; - f.m_task->m_task_type = task_base::Aggregate ; - - // Assign dependences, reference counts were already incremented - - task_base * volatile * const dep = - f.m_task->aggregate_dependences(); - - for ( int i = 0 ; i < narg ; ++i ) { dep[i] = arg[i].m_task ; } - - Kokkos::memory_fence(); - - queue->schedule_aggregate( f.m_task ); - // this when_all may be processed at any moment + for ( int i = 0 ; i < narg ; ++i ) { + task_base * const t = arg[i].m_task ; + if ( nullptr != t ) { + // Increment reference count to track subsequent assignment. + Kokkos::atomic_increment( &(t->m_ref_count) ); + if(q != static_cast< queue_type const* >(t->m_queue)) { + Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); } } } - return f ; + if ( q != 0 ) { // this should probably handle the queue == 0 case, but this is deprecated code anyway + + size_t const alloc_size = q->when_all_allocation_size( narg ); + + f.m_task = + reinterpret_cast< task_base * >( q->allocate( alloc_size ) ); + //f.m_scheduler = *scheduler_ptr; + + if ( f.m_task ) { + + // Reference count starts at two: + // +1 to match decrement when task completes + // +1 for the future + + new( f.m_task ) task_base(); + + f.m_task->m_queue = q; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = static_cast(alloc_size); + f.m_task->m_dep_count = narg ; + f.m_task->m_task_type = task_base::Aggregate ; + + // Assign dependences, reference counts were already incremented + + task_base * volatile * const dep = + f.m_task->aggregate_dependences(); + + for ( int i = 0 ; i < narg ; ++i ) { dep[i] = arg[i].m_task ; } + + Kokkos::memory_fence(); + + q->schedule_aggregate( f.m_task ); + // this when_all may be processed at any moment + } + } } + return f ; + } + template < class F > KOKKOS_FUNCTION - Future< execution_space > + BasicFuture< void, scheduler_type > when_all( int narg , F const func ) { using input_type = decltype( func(0) ); - using future_type = Future< execution_space > ; static_assert( is_future< input_type >::value , "Functor must return a Kokkos::Future" ); - future_type f ; + future_type f ; if ( 0 == narg ) return f ; @@ -711,12 +484,16 @@ public: // +1 for the future new( f.m_task ) task_base(); + //f.m_scheduler = *this; - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = alloc_size ; - f.m_task->m_dep_count = narg ; - f.m_task->m_task_type = task_base::Aggregate ; + //f.m_task->m_scheduler = &f.m_scheduler; + f.m_task->m_queue = m_queue; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = static_cast(alloc_size); + f.m_task->m_dep_count = narg ; + f.m_task->m_task_type = task_base::Aggregate ; + //f.m_task->m_apply = nullptr; + //f.m_task->m_destroy = nullptr; // Assign dependences, reference counts were already incremented @@ -727,9 +504,10 @@ public: const input_type arg_f = func(i); if ( 0 != arg_f.m_task ) { - if ( m_queue != static_cast< queue_type * >( arg_f.m_task->m_queue ) ) { - Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); - } + // Not scheduled, so task scheduler is not yet set + //if ( m_queue != static_cast< BasicTaskScheduler const * >( arg_f.m_task->m_scheduler )->m_queue ) { + // Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); + //} // Increment reference count to track subsequent assignment. Kokkos::atomic_increment( &(arg_f.m_task->m_ref_count) ); dep[i] = arg_f.m_task ; @@ -764,9 +542,9 @@ public: //---------------------------------------- - template< typename S > + template friend - void Kokkos::wait( Kokkos::TaskScheduler< S > const & ); + void wait(Kokkos::BasicTaskScheduler const&); }; @@ -780,84 +558,122 @@ namespace Kokkos { //---------------------------------------------------------------------------- // Construct a TaskTeam execution policy -template< typename T > -Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskTeam - , typename std::conditional< Kokkos::is_future< T >::value , T , - typename Kokkos::Future< typename T::execution_space > >::type - > +template +Impl::TaskPolicyWithPredecessor< + Impl::TaskType::TaskTeam, + Kokkos::BasicFuture +> KOKKOS_INLINE_FUNCTION -TaskTeam( T const & arg - , TaskPriority const & arg_priority = TaskPriority::Regular - ) +TaskTeam( + Kokkos::BasicFuture arg_future, + TaskPriority arg_priority = TaskPriority::Regular +) { - static_assert( Kokkos::is_future::value || - Kokkos::is_scheduler::value - , "Kokkos TaskTeam argument must be Future or TaskScheduler" ); - - return - Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskTeam - , typename std::conditional< Kokkos::is_future< T >::value , T , - typename Kokkos::Future< typename T::execution_space > >::type - >( arg , arg_priority ); + return { std::move(arg_future), arg_priority }; } -template< typename E , typename F > -Kokkos::Impl:: - TaskPolicyData< Kokkos::Impl::TaskBase::TaskTeam , F > +template +Impl::TaskPolicyWithScheduler< + Impl::TaskType::TaskTeam, Scheduler +> KOKKOS_INLINE_FUNCTION -TaskTeam( TaskScheduler const & arg_scheduler - , F const & arg_future - , typename std::enable_if< Kokkos::is_future::value , - TaskPriority >::type const & arg_priority = TaskPriority::Regular - ) +TaskTeam( + Scheduler arg_scheduler, + typename std::enable_if< + Kokkos::is_scheduler::value, + TaskPriority + >::type arg_priority = TaskPriority::Regular +) { - return - Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskTeam , F > - ( arg_scheduler , arg_future , arg_priority ); + return { std::move(arg_scheduler), arg_priority }; +} + +template< + class Scheduler, + class PredecessorFuture +> +Impl::TaskPolicyWithScheduler< + Kokkos::Impl::TaskType::TaskTeam, + Scheduler, + PredecessorFuture +> +KOKKOS_INLINE_FUNCTION +TaskTeam( + Scheduler arg_scheduler, + PredecessorFuture arg_future, + typename std::enable_if< + Kokkos::is_scheduler::value + && Kokkos::is_future::value, + TaskPriority + >::type arg_priority = TaskPriority::Regular +) +{ + static_assert( + std::is_same::value, + "Can't create a task policy from a scheduler and a future from a different scheduler" + ); + + return { std::move(arg_scheduler), std::move(arg_future), arg_priority }; } // Construct a TaskSingle execution policy -template< typename T > -Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskSingle - , typename std::conditional< Kokkos::is_future< T >::value , T , - typename Kokkos::Future< typename T::execution_space > >::type - > +template +Impl::TaskPolicyWithPredecessor< + Impl::TaskType::TaskSingle, + Kokkos::BasicFuture +> KOKKOS_INLINE_FUNCTION -TaskSingle( T const & arg - , TaskPriority const & arg_priority = TaskPriority::Regular - ) +TaskSingle( + Kokkos::BasicFuture arg_future, + TaskPriority arg_priority = TaskPriority::Regular +) { - static_assert( Kokkos::is_future::value || - Kokkos::is_scheduler::value - , "Kokkos TaskSingle argument must be Future or TaskScheduler" ); - - return - Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskSingle - , typename std::conditional< Kokkos::is_future< T >::value , T , - typename Kokkos::Future< typename T::execution_space > >::type - >( arg , arg_priority ); + return { std::move(arg_future), arg_priority }; } -template< typename E , typename F > -Kokkos::Impl:: - TaskPolicyData< Kokkos::Impl::TaskBase::TaskSingle , F > +template +Impl::TaskPolicyWithScheduler< + Impl::TaskType::TaskSingle, Scheduler +> KOKKOS_INLINE_FUNCTION -TaskSingle( TaskScheduler const & arg_scheduler - , F const & arg_future - , typename std::enable_if< Kokkos::is_future::value , - TaskPriority >::type const & arg_priority = TaskPriority::Regular - ) +TaskSingle( + Scheduler arg_scheduler, + typename std::enable_if< + Kokkos::is_scheduler::value, + TaskPriority + >::type arg_priority = TaskPriority::Regular +) { - return - Kokkos::Impl::TaskPolicyData - < Kokkos::Impl::TaskBase::TaskSingle , F > - ( arg_scheduler , arg_future , arg_priority ); + return { std::move(arg_scheduler), arg_priority }; +} + +template< + class Scheduler, + class PredecessorFuture +> +Impl::TaskPolicyWithScheduler< + Kokkos::Impl::TaskType::TaskSingle, + Scheduler, + PredecessorFuture +> +KOKKOS_INLINE_FUNCTION +TaskSingle( + Scheduler arg_scheduler, + PredecessorFuture arg_future, + typename std::enable_if< + Kokkos::is_scheduler::value + && Kokkos::is_future::value, + TaskPriority + >::type arg_priority = TaskPriority::Regular +) +{ + static_assert( + std::is_same::value, + "Can't create a task policy from a scheduler and a future from a different scheduler" + ); + + return { std::move(arg_scheduler), std::move(arg_future), arg_priority }; } //---------------------------------------------------------------------------- @@ -868,34 +684,31 @@ TaskSingle( TaskScheduler const & arg_scheduler * 2) With scheduler or dependence * 3) High, Normal, or Low priority */ -template< int TaskEnum - , typename DepFutureType - , typename FunctorType > -Future< typename FunctorType::value_type - , typename DepFutureType::execution_space > -host_spawn( Impl::TaskPolicyData const & arg_policy - , FunctorType && arg_functor - ) -{ - using exec_space = typename DepFutureType::execution_space ; - using scheduler = TaskScheduler< exec_space > ; +template +typename Scheduler::template future_type_for_functor::type> +host_spawn( + Impl::TaskPolicyWithScheduler arg_policy, + FunctorType&& arg_functor +) { + using scheduler_type = Scheduler; + using task_type = + typename scheduler_type::template runnable_task_type; - typedef Impl::TaskBase< exec_space - , typename FunctorType::value_type - , FunctorType - > task_type ; - - static_assert( TaskEnum == task_type::TaskTeam || - TaskEnum == task_type::TaskSingle - , "Kokkos host_spawn requires TaskTeam or TaskSingle" ); + static_assert( + TaskEnum == Impl::TaskType::TaskTeam || TaskEnum == Impl::TaskType::TaskSingle, + "Kokkos host_spawn requires TaskTeam or TaskSingle" + ); // May be spawning a Cuda task, must use the specialization // to query on-device function pointer. - typename task_type::function_type const ptr = - Kokkos::Impl::TaskQueueSpecialization< exec_space >:: - template get_function_pointer< task_type >(); + typename task_type::function_type ptr; + typename task_type::destroy_type dtor; + Kokkos::Impl::TaskQueueSpecialization< scheduler_type >:: + template get_function_pointer< task_type >(ptr, dtor); - return scheduler::spawn( arg_policy , ptr , std::move(arg_functor) ); + return scheduler_type::spawn( + std::move(arg_policy), ptr, dtor, std::forward(arg_functor) + ); } /**\brief A task spawns a task with options @@ -904,39 +717,38 @@ host_spawn( Impl::TaskPolicyData const & arg_policy * 2) With scheduler or dependence * 3) High, Normal, or Low priority */ -template< int TaskEnum - , typename DepFutureType - , typename FunctorType > -Future< typename FunctorType::value_type - , typename DepFutureType::execution_space > +template +typename Scheduler::template future_type_for_functor::type> KOKKOS_INLINE_FUNCTION -task_spawn( Impl::TaskPolicyData const & arg_policy - , FunctorType && arg_functor - ) +task_spawn( + Impl::TaskPolicyWithScheduler arg_policy, + FunctorType&& arg_functor +) { - using exec_space = typename DepFutureType::execution_space ; - using scheduler = TaskScheduler< exec_space > ; + using scheduler_type = Scheduler; - typedef Impl::TaskBase< exec_space - , typename FunctorType::value_type - , FunctorType - > task_type ; + using task_type = + typename scheduler_type::template runnable_task_type; -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) && \ - defined( KOKKOS_ENABLE_CUDA ) + #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) && \ + defined( KOKKOS_ENABLE_CUDA ) - static_assert( ! std::is_same< Kokkos::Cuda , exec_space >::value - , "Error calling Kokkos::task_spawn for Cuda space within Host code" ); + static_assert( ! std::is_same< Kokkos::Cuda , typename Scheduler::execution_space >::value + , "Error calling Kokkos::task_spawn for Cuda space within Host code" ); -#endif + #endif - static_assert( TaskEnum == task_type::TaskTeam || - TaskEnum == task_type::TaskSingle - , "Kokkos host_spawn requires TaskTeam or TaskSingle" ); + static_assert( + TaskEnum == Impl::TaskType::TaskTeam || TaskEnum == Impl::TaskType::TaskSingle, + "Kokkos task_spawn requires TaskTeam or TaskSingle" + ); typename task_type::function_type const ptr = task_type::apply ; + typename task_type::destroy_type const dtor = task_type::destroy ; - return scheduler::spawn( arg_policy , ptr , std::move(arg_functor) ); + return scheduler_type::spawn(std::move(arg_policy), ptr, dtor, + std::forward(arg_functor) + ); } /**\brief A task respawns itself with options @@ -956,36 +768,42 @@ respawn( FunctorType * arg_self Kokkos::is_scheduler::value , "Kokkos respawn argument must be Future or TaskScheduler" ); - TaskScheduler< typename T::execution_space >:: - respawn( arg_self , arg , arg_priority ); + T::scheduler_type::respawn( + arg_self , arg , arg_priority + ); } //---------------------------------------------------------------------------- -template< typename A1 , typename A2 > -KOKKOS_INLINE_FUNCTION -Future< typename Future< A1 , A2 >::execution_space > -when_all( Future< A1 , A2 > const arg[] - , int narg - ) -{ - return TaskScheduler< typename Future::execution_space >:: - when_all( arg , narg ); -} +//template +//KOKKOS_INLINE_FUNCTION +//BasicFuture +//when_all(BasicFuture const arg[], int narg) +//{ +// return BasicFuture::scheduler_type::when_all(arg, narg); +//} //---------------------------------------------------------------------------- // Wait for all runnable tasks to complete -template< typename ExecSpace > +template inline -void wait( TaskScheduler< ExecSpace > const & scheduler ) -{ scheduler.m_queue->execute(); } +void wait(BasicTaskScheduler const& scheduler) +{ + using scheduler_type = BasicTaskScheduler; + scheduler_type::specialization::execute(scheduler); + //scheduler.m_queue->execute(); +} } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +//////////////////////////////////////////////////////////////////////////////// +// END OLD CODE +//////////////////////////////////////////////////////////////////////////////// + #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp new file mode 100644 index 0000000000..79d502c729 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler_fwd.hpp @@ -0,0 +1,249 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TASKSCHEDULER_FWD_HPP +#define KOKKOS_TASKSCHEDULER_FWD_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// Forward declarations used in Impl::TaskQueue + +template +class BasicFuture; + +template +class SimpleTaskScheduler; + +template +class BasicTaskScheduler; + +template< typename Space > +struct is_scheduler : public std::false_type {}; + +template +struct is_scheduler> : public std::true_type {}; + +template +struct is_scheduler> : public std::true_type {}; + +enum class TaskPriority : int { + High = 0, + Regular = 1, + Low = 2 +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template +class MemoryPool; + +namespace Impl { + +template +class TaskNode; + +class TaskBase; + +/*\brief Implementation data for task data management, access, and execution. + * (Deprecated) + * CRTP Inheritance structure to allow static_cast from the + * task root type and a task's FunctorType. + * + * TaskBase< Space , ResultType , FunctorType > + * : TaskBase< Space , ResultType , void > + * , FunctorType + * { ... }; + * + * TaskBase< Space , ResultType , void > + * : TaskBase< Space , void , void > + * { ... }; + */ +template< typename Space , typename ResultType , typename FunctorType > +class Task; + +class TaskQueueBase; + +template< typename Space, typename MemorySpace> +class TaskQueue; + +template< typename ExecSpace, typename MemorySpace> +class TaskQueueMultiple; + +template< + typename ExecSpace, typename MemSpace, typename TaskQueueTraits, + class MemoryPool = Kokkos::MemoryPool> +> +class SingleTaskQueue; + +template< typename ExecSpace, typename MemSpace, typename TaskQueueTraits, class MemoryPool> +class MultipleTaskQueue; + +struct TaskQueueTraitsLockBased; + +template +struct TaskQueueTraitsChaseLev; + +template< typename ResultType > +struct TaskResult; + +struct TaskSchedulerBase; + +template +struct default_tasking_memory_space_for_execution_space +{ + using type = typename ExecSpace::memory_space; +}; + +#if defined( KOKKOS_ENABLE_CUDA ) +template <> +struct default_tasking_memory_space_for_execution_space +{ + using type = Kokkos::CudaUVMSpace; +}; +#endif + +template +using default_tasking_memory_space_for_execution_space_t = + typename default_tasking_memory_space_for_execution_space::type; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< typename Space > +using DeprecatedTaskScheduler = BasicTaskScheduler< + Space, + Impl::TaskQueue> +>; + +template< typename Space > +using DeprecatedTaskSchedulerMultiple = BasicTaskScheduler< + Space, + Impl::TaskQueueMultiple> +>; + +template< typename Space > +using TaskScheduler = SimpleTaskScheduler< + Space, + Impl::SingleTaskQueue< + Space, + Impl::default_tasking_memory_space_for_execution_space_t, + Impl::TaskQueueTraitsLockBased + > +>; + +template< typename Space > +using TaskSchedulerMultiple = SimpleTaskScheduler< + Space, + Impl::MultipleTaskQueue< + Space, + Impl::default_tasking_memory_space_for_execution_space_t, + Impl::TaskQueueTraitsLockBased, + Kokkos::MemoryPool< + Kokkos::Device< + Space, + Impl::default_tasking_memory_space_for_execution_space_t + > + > + > +>; + +template< typename Space > +using ChaseLevTaskScheduler = SimpleTaskScheduler< + Space, + Impl::MultipleTaskQueue< + Space, + Impl::default_tasking_memory_space_for_execution_space_t, + Impl::TaskQueueTraitsChaseLev<>, + Kokkos::MemoryPool< + Kokkos::Device< + Space, + Impl::default_tasking_memory_space_for_execution_space_t + > + > + > +>; + +template +void wait(BasicTaskScheduler const&); + +namespace Impl { + +struct TaskSchedulerBase { }; + +class TaskQueueBase { }; + +template +class TaskQueueSpecializationConstrained { }; + +template +struct TaskQueueSpecialization : TaskQueueSpecializationConstrained { }; + +template +struct TaskPolicyData; + + +} // end namespace Impl + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_TASKSCHEDULER_FWD_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Threads.hpp b/lib/kokkos/core/src/Kokkos_Threads.hpp index d5e684e4ea..03dab1acaf 100644 --- a/lib/kokkos/core/src/Kokkos_Threads.hpp +++ b/lib/kokkos/core/src/Kokkos_Threads.hpp @@ -105,7 +105,13 @@ public: /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. + static void impl_static_fence(); + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence(); + #else + void fence() const; + #endif /** \brief Return the maximum amount of concurrency. */ static int concurrency(); diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp index 754a0ab8c0..3fe8e6f067 100644 --- a/lib/kokkos/core/src/Kokkos_View.hpp +++ b/lib/kokkos/core/src/Kokkos_View.hpp @@ -74,7 +74,11 @@ template< class DataType , class ArrayLayout struct ViewDataAnalysis ; template< class , class ... > -class ViewMapping { public: enum { is_assignable = false }; }; +class ViewMapping { + public: + enum { is_assignable_data_type = false }; + enum { is_assignable = false }; +}; @@ -97,6 +101,7 @@ std::size_t count_valid_integers(const IntType i0, } +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE KOKKOS_INLINE_FUNCTION void runtime_check_rank_device(const size_t dyn_rank, const bool is_void_spec, @@ -109,8 +114,6 @@ void runtime_check_rank_device(const size_t dyn_rank, const size_t i6, const size_t i7 ){ -#ifndef KOKKOS_ENABLE_DEPRECATED_CODE - if ( is_void_spec ) { const size_t num_passed_args = count_valid_integers(i0, i1, i2, i3, i4, i5, i6, i7); @@ -121,10 +124,25 @@ void runtime_check_rank_device(const size_t dyn_rank, } } -#endif } +#else +KOKKOS_INLINE_FUNCTION +void runtime_check_rank_device(const size_t , + const bool , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t ){ + +} +#endif #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE KOKKOS_INLINE_FUNCTION void runtime_check_rank_host(const size_t dyn_rank, const bool is_void_spec, @@ -137,7 +155,6 @@ void runtime_check_rank_host(const size_t dyn_rank, const size_t i6, const size_t i7, const std::string & label ){ -#ifndef KOKKOS_ENABLE_DEPRECATED_CODE if ( is_void_spec ) { const size_t num_passed_args = count_valid_integers(i0, i1, i2, i3, @@ -150,8 +167,20 @@ void runtime_check_rank_host(const size_t dyn_rank, Kokkos::abort(message.c_str()) ; } } -#endif } +#else +KOKKOS_INLINE_FUNCTION +void runtime_check_rank_host(const size_t , + const bool , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , + const size_t , const std::string &){} +#endif #endif } /* namespace Impl */ @@ -362,8 +391,8 @@ public: typedef typename MemorySpace::size_type size_type ; enum { is_hostspace = std::is_same< MemorySpace , HostSpace >::value }; - enum { is_managed = MemoryTraits::Unmanaged == 0 }; - enum { is_random_access = MemoryTraits::RandomAccess == 1 }; + enum { is_managed = MemoryTraits::is_unmanaged == 0 }; + enum { is_random_access = MemoryTraits::is_random_access == 1 }; //------------------------------------ }; @@ -1965,7 +1994,10 @@ public: template< class RT , class ... RP > KOKKOS_INLINE_FUNCTION - View( const View & rhs ) + View( const View & rhs, + typename std::enable_if::traits , typename traits::specialize >::is_assignable_data_type>::type* = 0 + ) : m_track( rhs.m_track , traits::is_managed ) , m_map() { @@ -1977,7 +2009,9 @@ public: template< class RT , class ... RP > KOKKOS_INLINE_FUNCTION - View & operator = ( const View & rhs ) + typename std::enable_if::traits , typename traits::specialize >::is_assignable_data_type, + View>::type & operator = ( const View & rhs ) { typedef typename View::traits SrcTraits ; typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , typename traits::specialize > Mapping ; @@ -1994,7 +2028,7 @@ public: template< class RT , class ... RP , class Arg0 , class ... Args > KOKKOS_INLINE_FUNCTION View( const View< RT , RP... > & src_view - , const Arg0 & arg0 , Args ... args ) + , const Arg0 arg0 , Args ... args ) : m_track( src_view.m_track , traits::is_managed ) , m_map() { @@ -2077,7 +2111,7 @@ public: } // Copy the input allocation properties with possibly defaulted properties - alloc_prop prop( arg_prop ); + alloc_prop prop_copy( arg_prop ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) @@ -2087,18 +2121,18 @@ public: // Fence using the trait's executon space (which will be Kokkos::Cuda) // to avoid incomplete type errors from usng Kokkos::Cuda directly. if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ Kokkos::Impl::SharedAllocationRecord<> * - record = m_map.allocate_shared( prop , arg_layout ); + record = m_map.allocate_shared( prop_copy , arg_layout ); //------------------------------------------------------------ #if defined( KOKKOS_ENABLE_CUDA ) if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { - traits::device_type::memory_space::execution_space::fence(); + typename traits::device_type::memory_space::execution_space().fence(); } #endif //------------------------------------------------------------ diff --git a/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp b/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp index 33a0579df5..dd5e29a400 100644 --- a/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp @@ -55,7 +55,7 @@ class WorkGraphExec; namespace Kokkos { template< class ... Properties > -class WorkGraphPolicy +class WorkGraphPolicy: public Kokkos::Impl::PolicyTraits { public: @@ -64,7 +64,6 @@ public: using traits = Kokkos::Impl::PolicyTraits; using index_type = typename traits::index_type; using member_type = index_type; - using work_tag = typename traits::work_tag; using execution_space = typename traits::execution_space; using memory_space = typename execution_space::memory_space; using graph_type = Kokkos::Crs; @@ -217,7 +216,7 @@ public: using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this, policy_type(0, m_queue.size())); closure.execute(); - execution_space::fence(); + execution_space().fence(); } { // execute-after counts @@ -225,7 +224,7 @@ public: using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this,policy_type(0,m_graph.entries.size())); closure.execute(); - execution_space::fence(); + execution_space().fence(); } { // Scheduling ready tasks @@ -233,7 +232,7 @@ public: using closure_type = Kokkos::Impl::ParallelFor; const closure_type closure(*this,policy_type(0,m_graph.numRows())); closure.execute(); - execution_space::fence(); + execution_space().fence(); } } }; @@ -256,4 +255,8 @@ public: #include "Threads/Kokkos_Threads_WorkGraphPolicy.hpp" #endif +#ifdef KOKKOS_ENABLE_HPX +#include "HPX/Kokkos_HPX_WorkGraphPolicy.hpp" +#endif + #endif /* #define KOKKOS_WORKGRAPHPOLICY_HPP */ diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile index c2dbddf45e..ae8dc17510 100644 --- a/lib/kokkos/core/src/Makefile +++ b/lib/kokkos/core/src/Makefile @@ -40,6 +40,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) CONDITIONAL_COPIES += copy-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + CONDITIONAL_COPIES += copy-hpx +endif + ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) CONDITIONAL_COPIES += copy-rocm endif @@ -91,6 +95,10 @@ copy-openmp: mkdir mkdir -p $(PREFIX)/include/OpenMP $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP +copy-hpx: mkdir + mkdir -p $(PREFIX)/include/HPX + $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_HPX) $(PREFIX)/include/HPX + copy-rocm: mkdir mkdir -p $(PREFIX)/include/ROCm $(CP) $(COPY_FLAG) $(KOKKOS_HEADERS_ROCM) $(PREFIX)/include/ROCm diff --git a/lib/kokkos/core/src/Makefile.generate_build_files b/lib/kokkos/core/src/Makefile.generate_build_files index cc856ee9a3..651b9d5fe9 100644 --- a/lib/kokkos/core/src/Makefile.generate_build_files +++ b/lib/kokkos/core/src/Makefile.generate_build_files @@ -84,6 +84,7 @@ generate_build_settings: $(KOKKOS_CONFIG_HEADER) $(KOKKOS_PKGCONFIG) @$(call kokkos_append_var,KOKKOS_HEADERS_IMPL,'STRING "Kokkos headers impl list"') @$(call kokkos_append_var,KOKKOS_HEADERS_CUDA,'STRING "Kokkos headers Cuda list"') @$(call kokkos_append_var,KOKKOS_HEADERS_OPENMP,'STRING "Kokkos headers OpenMP list"') + @$(call kokkos_append_var,KOKKOS_HEADERS_HPX,'STRING "Kokkos headers HPX list"') @$(call kokkos_append_var,KOKKOS_HEADERS_ROCM,'STRING "Kokkos headers ROCm list"') @$(call kokkos_append_var,KOKKOS_HEADERS_THREADS,'STRING "Kokkos headers Threads list"') @$(call kokkos_append_var,KOKKOS_HEADERS_QTHREADS,'STRING "Kokkos headers QThreads list"') @@ -103,11 +104,13 @@ generate_build_settings: $(KOKKOS_CONFIG_HEADER) $(KOKKOS_PKGCONFIG) @$(call kokkos_append_string,"#Internal settings which need to propagated for Kokkos examples") @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_CUDA,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_OPENMP,'STRING ""') + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_HPX,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_PTHREADS,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_SERIAL,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_ROCM,'STRING ""') + @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_HPX,'STRING ""') @$(call kokkos_append_var,KOKKOS_INTERNAL_USE_QTHREADS,'STRING ""') # Not in original cmake gen - @$(call kokkos_append_cmakefile "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS KOKKOS_INTERNAL_USE_SERIAL)") + @$(call kokkos_append_cmakefile "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_HPX KOKKOS_INTERNAL_USE_PTHREADS KOKKOS_INTERNAL_USE_SERIAL)") @$(call kokkos_append_makefile,"") @$(call kokkos_append_makefile,"#Fake kokkos-clean target") @$(call kokkos_append_makefile,"kokkos-clean:") diff --git a/lib/kokkos/core/src/Makefile.generate_header_lists b/lib/kokkos/core/src/Makefile.generate_header_lists index cd308bf8f4..afbefb3806 100644 --- a/lib/kokkos/core/src/Makefile.generate_header_lists +++ b/lib/kokkos/core/src/Makefile.generate_header_lists @@ -22,6 +22,10 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + KOKKOS_HEADERS_HPX += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) +endif + ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) KOKKOS_HEADERS_ROCM += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp) endif diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp index e57b61d7cb..1946c10741 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp @@ -472,6 +472,10 @@ int OpenMP::concurrency() { return Impl::g_openmp_hardware_max_threads; } +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE +void OpenMP::fence() const {} +#endif + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE void OpenMP::initialize( int thread_count , int, int ) diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp index 43fa7888cf..5178199ac2 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp @@ -184,8 +184,13 @@ int OpenMP::impl_thread_pool_rank() noexcept #endif } +inline +void OpenMP::impl_static_fence( OpenMP const& instance ) noexcept {} + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE inline void OpenMP::fence( OpenMP const& instance ) noexcept {} +#endif inline bool OpenMP::is_asynchronous( OpenMP const& instance ) noexcept diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index e0bb572a3b..ae6b49f650 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -128,11 +128,10 @@ public: OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); @@ -228,11 +227,10 @@ public: OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); @@ -703,11 +701,10 @@ public: ); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); @@ -840,11 +837,10 @@ public: ); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); @@ -1005,11 +1001,10 @@ public: , thread_local_size ); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { HostThreadTeamData & data = *(m_instance->get_thread_data()); diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 2f2c768460..3b1c187c6d 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -48,6 +48,8 @@ #include #include +#include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -55,200 +57,44 @@ namespace Kokkos { namespace Impl { -template class TaskQueue< Kokkos::OpenMP > ; +template class TaskQueue< Kokkos::OpenMP, typename Kokkos::OpenMP::memory_space > ; -class HostThreadTeamDataSingleton : private HostThreadTeamData { -private: - - HostThreadTeamDataSingleton() : HostThreadTeamData() - { - Kokkos::OpenMP::memory_space space ; - const size_t num_pool_reduce_bytes = 32 ; - const size_t num_team_reduce_bytes = 32 ; - const size_t num_team_shared_bytes = 1024 ; - const size_t num_thread_local_bytes = 1024 ; - const size_t alloc_bytes = - HostThreadTeamData::scratch_size( num_pool_reduce_bytes - , num_team_reduce_bytes - , num_team_shared_bytes - , num_thread_local_bytes ); - - HostThreadTeamData::scratch_assign - ( space.allocate( alloc_bytes ) - , alloc_bytes - , num_pool_reduce_bytes - , num_team_reduce_bytes - , num_team_shared_bytes - , num_thread_local_bytes ); - } - - ~HostThreadTeamDataSingleton() - { - Kokkos::OpenMP::memory_space space ; - space.deallocate( HostThreadTeamData::scratch_buffer() - , HostThreadTeamData::scratch_bytes() ); - } - -public: - - static HostThreadTeamData & singleton() - { - static HostThreadTeamDataSingleton s ; - return s ; - } -}; - -//---------------------------------------------------------------------------- - -void TaskQueueSpecialization< Kokkos::OpenMP >::execute - ( TaskQueue< Kokkos::OpenMP > * const queue ) +HostThreadTeamData& HostThreadTeamDataSingleton::singleton() { - using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< execution_space > ; - - static task_root_type * const end = - (task_root_type *) task_root_type::EndTag ; - - - HostThreadTeamData & team_data_single = - HostThreadTeamDataSingleton::singleton(); - - Impl::OpenMPExec * instance = t_openmp_instance; -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); -#else - const int pool_size = OpenMP::impl_thread_pool_size(); -#endif - - const int team_size = 1; // Threads per core - instance->resize_thread_data( 0 /* global reduce buffer */ - , 512 * team_size /* team reduce buffer */ - , 0 /* team shared buffer */ - , 0 /* thread local buffer */ - ); - - #pragma omp parallel num_threads(pool_size) - { - Impl::HostThreadTeamData & self = *(instance->get_thread_data()); - - // Organizing threads into a team performs a barrier across the - // entire pool to insure proper initialization of the team - // rendezvous mechanism before a team rendezvous can be performed. - - if ( self.organize_team( team_size ) ) { - - Member single_exec( team_data_single ); - Member team_exec( self ); - - // Loop until all queues are empty and no tasks in flight - - task_root_type * task = 0 ; - - do { - // Each team lead attempts to acquire either a thread team task - // or a single thread task for the team. - - if ( 0 == team_exec.team_rank() ) { - - bool leader_loop = false ; - - do { - - if ( 0 != task && end != task ) { - // team member #0 completes the previously executed task, - // completion may delete the task - queue->complete( task ); - } - - // If 0 == m_ready_count then set task = 0 - - task = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; - - // Attempt to acquire a task - // Loop by priority and then type - for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); - } - } - - // If still tasks are still executing - // and no task could be acquired - // then continue this leader loop - leader_loop = end == task ; - - if ( ( ! leader_loop ) && - ( 0 != task ) && - ( task_root_type::TaskSingle == task->m_task_type ) ) { - - // if a single thread task then execute now - - (*task->m_apply)( task , & single_exec ); - - leader_loop = true ; - } - } while ( leader_loop ); - } - - // Team lead either found 0 == m_ready_count or a team task - // Team lead broadcast acquired task: - - team_exec.team_broadcast( task , 0); - - if ( 0 != task ) { // Thread Team Task - - (*task->m_apply)( task , & team_exec ); - - // The m_apply function performs a barrier - } - } while( 0 != task ); - } - self.disband_team(); - } + static HostThreadTeamDataSingleton s; + return s; } -void TaskQueueSpecialization< Kokkos::OpenMP >:: - iff_single_thread_recursive_execute - ( TaskQueue< Kokkos::OpenMP > * const queue ) +HostThreadTeamDataSingleton::HostThreadTeamDataSingleton() + : HostThreadTeamData() { - using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< execution_space > ; + Kokkos::OpenMP::memory_space space ; + const size_t num_pool_reduce_bytes = 32 ; + const size_t num_team_reduce_bytes = 32 ; + const size_t num_team_shared_bytes = 1024 ; + const size_t num_thread_local_bytes = 1024 ; + const size_t alloc_bytes = + HostThreadTeamData::scratch_size( num_pool_reduce_bytes + , num_team_reduce_bytes + , num_team_shared_bytes + , num_thread_local_bytes ); -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - if ( 1 == OpenMP::thread_pool_size() ) -#else - if ( 1 == OpenMP::impl_thread_pool_size() ) -#endif - { + HostThreadTeamData::scratch_assign + ( space.allocate( alloc_bytes ) + , alloc_bytes + , num_pool_reduce_bytes + , num_team_reduce_bytes + , num_team_shared_bytes + , num_thread_local_bytes ); +} - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - - HostThreadTeamData & team_data_single = - HostThreadTeamDataSingleton::singleton(); - - Member single_exec( team_data_single ); - - task_root_type * task = end ; - - do { - - task = end ; - - // Loop by priority and then type - for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); - } - } - - if ( end == task ) break ; - - (*task->m_apply)( task , & single_exec ); - - queue->complete( task ); - - } while(1); - } +HostThreadTeamDataSingleton::~HostThreadTeamDataSingleton() +{ + Kokkos::OpenMP::memory_space space ; + space.deallocate( + HostThreadTeamData::scratch_buffer(), + static_cast(HostThreadTeamData::scratch_bytes()) + ); } }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index b99c149b06..4029c015b3 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -47,38 +47,388 @@ #include #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) +#include + +#include +#include + +#include +#include + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { -template<> -class TaskQueueSpecialization< Kokkos::OpenMP > +class HostThreadTeamDataSingleton : private HostThreadTeamData { +private: + + HostThreadTeamDataSingleton(); + ~HostThreadTeamDataSingleton(); + +public: + + static HostThreadTeamData & singleton(); + +}; + +// Hack this as a partial specialization for now +// TODO @tasking @cleanup DSH Make this the general class template and make the old code the partial specialization +template +class TaskQueueSpecialization< + SimpleTaskScheduler +> { public: - using execution_space = Kokkos::OpenMP ; - using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; - using task_base_type = Kokkos::Impl::TaskBase< void , void , void > ; - using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; + using execution_space = Kokkos::OpenMP; + using scheduler_type = SimpleTaskScheduler; + using member_type = TaskTeamMemberAdapter< + Kokkos::Impl::HostThreadTeamMember, + scheduler_type + >; + using memory_space = Kokkos::HostSpace; - // Must specify memory space - using memory_space = Kokkos::HostSpace ; - - static - void iff_single_thread_recursive_execute( queue_type * const ); + enum : int { max_league_size = HostThreadTeamData::max_pool_members }; // Must provide task queue execution function - static void execute( queue_type * const ); + static void execute(scheduler_type const& scheduler) + { + using task_base_type = typename scheduler_type::task_base_type; - template< typename TaskType > - static - typename TaskType::function_type - get_function_pointer() { return TaskType::apply ; } + // Unused; ChaseLev queue still needs worker ID even in single case (so we need to use + // the thread data from inside of the parallel region. Team size is fixed at 1 for now + // anyway + //HostThreadTeamData& team_data_single = HostThreadTeamDataSingleton::singleton(); + + // TODO @tasking @generalization DSH use scheduler.get_execution_space().impl() (or something like that) instead of the thread-local variable + Impl::OpenMPExec* instance = t_openmp_instance; + const int pool_size = get_max_team_count(scheduler.get_execution_space()); + + // TODO @tasking @new_feature DSH allow team sizes other than 1 + const int team_size = 1; // Threads per core + instance->resize_thread_data( + 0, /* global reduce buffer */ + 512 * team_size, /* team reduce buffer */ + 0, /* team shared buffer */ + 0 /* thread local buffer */ + ); + assert(pool_size % team_size == 0); + + auto& queue = scheduler.queue(); + + //queue.initialize_team_queues(pool_size / team_size); + + #pragma omp parallel num_threads(pool_size) + { + Impl::HostThreadTeamData & self = *(instance->get_thread_data()); + + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. + + // organize_team() returns true if this is an active team member + if(self.organize_team(team_size)) { + + member_type single_exec(scheduler, self); + member_type team_exec(scheduler, self); + + auto& team_scheduler = team_exec.scheduler(); + + auto current_task = OptionalRef(nullptr); + + while(not queue.is_done()) { + + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. + if(team_exec.team_rank() == 0) { + + // loop while both: + // - the queue is not done + // - the most recently popped task is a single task or empty + while(not queue.is_done()) { + + current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info()); + + if(current_task) { + + if(current_task->is_team_runnable()) { + // break out of the team leader loop to run the team task + break; + } + else { + KOKKOS_ASSERT(current_task->is_single_runnable()); + current_task->as_runnable_task().run(single_exec); + // Respawns are handled in the complete function + queue.complete( + (*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info() + ); + } + + } // end if current_task is not null + + current_task = nullptr; + + } // end team leader loop + + } + + // Otherwise, make sure everyone in the team has the same task + team_exec.team_broadcast(current_task, 0); + + if(current_task) { + KOKKOS_ASSERT(current_task->is_team_runnable()); + current_task->as_runnable_task().run(team_exec); + + if(team_exec.team_rank() == 0) { + // Respawns are handled in the complete function + queue.complete( + (*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info() + ); + } + + } + + } + } + self.disband_team(); + } // end pragma omp parallel + } + + static uint32_t + get_max_team_count(execution_space const& espace) { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + return static_cast(espace.thread_pool_size()); +#else + return static_cast(espace.impl_thread_pool_size()); +#endif + } + + // TODO @tasking @optimization DSH specialize this for trivially destructible types + template + static void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } }; -extern template class TaskQueue< Kokkos::OpenMP > ; + +template +class TaskQueueSpecializationConstrained< + Scheduler, + typename std::enable_if< + std::is_same::value + >::type +> +{ +public: + + using execution_space = Kokkos::OpenMP; + using scheduler_type = Scheduler; + using member_type = TaskTeamMemberAdapter< + Kokkos::Impl::HostThreadTeamMember, + scheduler_type + >; + using memory_space = Kokkos::HostSpace ; + + enum : int { max_league_size = HostThreadTeamData::max_pool_members }; + + static + void iff_single_thread_recursive_execute( scheduler_type const& scheduler ) { + using task_base_type = typename scheduler_type::task_base; + using queue_type = typename scheduler_type::queue_type; + +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + if ( 1 == OpenMP::thread_pool_size() ) +#else + if ( 1 == OpenMP::impl_thread_pool_size() ) +#endif + { + + task_base_type * const end = (task_base_type *) task_base_type::EndTag ; + + HostThreadTeamData & team_data_single = + HostThreadTeamDataSingleton::singleton(); + + member_type single_exec( scheduler, team_data_single ); + + task_base_type * task = end ; + + do { + + task = end ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & scheduler.m_queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task , & single_exec ); + + scheduler.m_queue->complete( task ); + + } while(1); + } + + } + + // Must provide task queue execution function + static void execute(scheduler_type const& scheduler) + { + using task_base_type = typename scheduler_type::task_base; + using queue_type = typename scheduler_type::queue_type; + + static task_base_type * const end = + (task_base_type *) task_base_type::EndTag ; + + constexpr task_base_type* no_more_tasks_sentinel = nullptr; + + + HostThreadTeamData & team_data_single = + HostThreadTeamDataSingleton::singleton(); + + Impl::OpenMPExec * instance = t_openmp_instance; +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + const int pool_size = OpenMP::thread_pool_size(); +#else + const int pool_size = OpenMP::impl_thread_pool_size(); +#endif + + const int team_size = 1; // Threads per core + instance->resize_thread_data( 0 /* global reduce buffer */ + , 512 * team_size /* team reduce buffer */ + , 0 /* team shared buffer */ + , 0 /* thread local buffer */ + ); + assert(pool_size % team_size == 0); + auto& queue = scheduler.queue(); + queue.initialize_team_queues(pool_size / team_size); + +#pragma omp parallel num_threads(pool_size) + { + Impl::HostThreadTeamData & self = *(instance->get_thread_data()); + + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. + + // organize_team() returns true if this is an active team member + if ( self.organize_team( team_size ) ) { + + member_type single_exec(scheduler, team_data_single); + member_type team_exec(scheduler, self); + + auto& team_queue = team_exec.scheduler().queue(); + + // Loop until all queues are empty and no tasks in flight + + task_base_type * task = no_more_tasks_sentinel; + + + do { + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. + + if ( 0 == team_exec.team_rank() ) { + + bool leader_loop = false ; + + do { + + if ( task != no_more_tasks_sentinel && task != end ) { + // team member #0 completes the previously executed task, + // completion may delete the task + team_queue.complete( task ); + } + + // If 0 == m_ready_count then set task = 0 + + if( *((volatile int *) & team_queue.m_ready_count) > 0 ) { + task = end; + // Attempt to acquire a task + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & team_queue.m_ready[i][j] ); + } + } + } + else { + // returns nullptr if and only if all other queues have a ready + // count of 0 also. Otherwise, returns a task from another queue + // or `end` if one couldn't be popped + task = team_queue.attempt_to_steal_task(); + #if 0 + if(task != no_more_tasks_sentinel && task != end) { + std::printf("task stolen on rank %d\n", team_exec.league_rank()); + } + #endif + } + + // If still tasks are still executing + // and no task could be acquired + // then continue this leader loop + if(task == end) { + // this means that the ready task count was not zero, but we + // couldn't pop a task (because, for instance, someone else + // got there before us + leader_loop = true; + } + else if ( ( task != no_more_tasks_sentinel ) && + ( task_base_type::TaskSingle == task->m_task_type ) ) { + + // if a single thread task then execute now + + (*task->m_apply)(task, &single_exec); + + leader_loop = true; + } + else { + leader_loop = false; + } + } while ( leader_loop ); + } + + // Team lead either found 0 == m_ready_count or a team task + // Team lead broadcast acquired task: + + team_exec.team_broadcast( task , 0); + + if ( task != no_more_tasks_sentinel ) { // Thread Team Task + + (*task->m_apply)( task , & team_exec ); + + // The m_apply function performs a barrier + } + } while( task != no_more_tasks_sentinel ); + } + self.disband_team(); + } // end pragma omp parallel + } + + template< typename TaskType > + static void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } +}; + +extern template class TaskQueue< Kokkos::OpenMP, typename Kokkos::OpenMP::memory_space > ; }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp index e8fbc467e0..38b062bdc0 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp @@ -74,6 +74,21 @@ public: return *this; } + template + friend class TeamPolicyInternal; + + template< class ... OtherProperties > + TeamPolicyInternal(const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_alloc = p.m_team_alloc; + m_team_iter = p.m_team_iter; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } //---------------------------------------- #ifdef KOKKOS_ENABLE_DEPRECATED_CODE @@ -208,7 +223,7 @@ public: } /** \brief Specify league size, request team size */ - TeamPolicyInternal( typename traits::execution_space & + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , int team_size_request , int /* vector_length_request */ = 1 ) @@ -217,14 +232,18 @@ public: , m_chunk_size(0) { init( league_size_request , team_size_request ); } - TeamPolicyInternal( typename traits::execution_space & + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & /* team_size_request */ , int /* vector_length_request */ = 1) : m_team_scratch_size { 0 , 0 } , m_thread_scratch_size { 0 , 0 } , m_chunk_size(0) +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE { init( league_size_request , traits::execution_space::thread_pool_size(2) ); } +#else + { init( league_size_request , traits::execution_space::impl_thread_pool_size(2) ); } +#endif TeamPolicyInternal( int league_size_request , int team_size_request diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp index 879d5d2d24..0742575cb8 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp @@ -76,11 +76,10 @@ public: void execute() { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::thread_pool_size()) #else - const int pool_size = OpenMP::impl_thread_pool_size(); + #pragma omp parallel num_threads(OpenMP::impl_thread_pool_size()) #endif - #pragma omp parallel num_threads(pool_size) { // Spin until COMPLETED_TOKEN. // END_TOKEN indicates no work is currently available. diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp index fc31a91b22..c93a88606d 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp @@ -697,13 +697,13 @@ namespace Impl { const iType increment; inline - TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const iType& count): + TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, iType count): start( thread_.team_rank() ), end( count ), increment( thread_.team_size() ) {} inline - TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const iType& begin_, const iType& end_): + TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, iType begin_, iType end_): start( begin_+thread_.team_rank() ), end( end_ ), increment( thread_.team_size() ) @@ -718,13 +718,13 @@ namespace Impl { const index_type increment; inline - ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const index_type& count): + ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, index_type count): start( thread_.m_vector_lane ), end( count ), increment( thread_.m_vector_length ) {} inline - ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const index_type& begin_, const index_type& end_): + ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, index_type begin_, index_type end_): start( begin_+thread_.m_vector_lane ), end( end_ ), increment( thread_.m_vector_length ) @@ -734,28 +734,28 @@ namespace Impl { template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct - TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& count) { + TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, iType count) { return Impl::TeamThreadRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct - TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& begin, const iType& end) { + TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, iType begin, iType end) { return Impl::TeamThreadRangeBoundariesStruct(thread,begin,end); } template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& count) { + ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType count) { return Impl::ThreadVectorRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& begin, const iType& end) { + ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, iType begin, iType end) { return Impl::ThreadVectorRangeBoundariesStruct(thread,begin,end); } diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp index 5ad90436af..7b1b63befe 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp @@ -51,7 +51,6 @@ #include -#include #include #include diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp index 205e6a2955..3e81883278 100644 --- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp +++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Exec.hpp @@ -227,7 +227,7 @@ struct ROCmParallelLaunch< DriverType //#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) // ROCM_SAFE_CALL( rocmGetLastError() ); -// Kokkos::ROCm::fence(); +// Kokkos::ROCm().fence(); //#endif } } diff --git a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp index edd1c12e45..48654555b2 100644 --- a/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp +++ b/lib/kokkos/core/src/ROCm/Kokkos_ROCm_Parallel.hpp @@ -86,6 +86,21 @@ public: return *this; } + template + friend class TeamPolicyInternal; + + template< class ... OtherProperties > + TeamPolicyInternal(const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + TeamPolicyInternal() : m_league_size( 0 ) , m_team_size( 0 ) @@ -1099,7 +1114,7 @@ public: ROCmParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute - ROCM::fence(); + ROCM().fence(); if ( m_result_ptr ) { const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ); @@ -1494,14 +1509,14 @@ namespace Kokkos { template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct - TeamThreadRange(const Impl::ROCmTeamMember& thread, const iType& count) { + TeamThreadRange(const Impl::ROCmTeamMember& thread, iType count) { return Impl::TeamThreadRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct::type,Impl::ROCmTeamMember> - TeamThreadRange(const Impl::ROCmTeamMember& thread, const iType1& begin, const iType2& end) { + TeamThreadRange(const Impl::ROCmTeamMember& thread, iType1 begin, iType2 end) { typedef typename std::common_type< iType1, iType2 >::type iType; return Impl::TeamThreadRangeBoundariesStruct(thread,begin,end); } @@ -1509,14 +1524,14 @@ Impl::TeamThreadRangeBoundariesStruct KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::ROCmTeamMember& thread, const iType& count) { + ThreadVectorRange(const Impl::ROCmTeamMember& thread, iType count) { return Impl::ThreadVectorRangeBoundariesStruct(thread,count); } template KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct - ThreadVectorRange(const Impl::ROCmTeamMember& thread, const iType& arg_begin, const iType& arg_end) { + ThreadVectorRange(const Impl::ROCmTeamMember& thread, iType arg_begin, iType arg_end) { return Impl::ThreadVectorRangeBoundariesStruct(thread,arg_begin,arg_end); } diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 559d6f2fcb..347778f289 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -804,6 +804,10 @@ int Threads::concurrency() { return impl_thread_pool_size(0); #endif } +#ifndef KOKKOS_ENABLE_DEPRECATED_CODE +void Threads::fence() const +{ Impl::ThreadsExec::fence() ; } +#endif #ifdef KOKKOS_ENABLE_DEPRECATED_CODE Threads & Threads::instance(int) diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index 61d7667d58..7af9d9e065 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -649,8 +649,12 @@ inline bool Threads::wake() { return Impl::ThreadsExec::wake() ; } #endif +inline void Threads::impl_static_fence() +{ Impl::ThreadsExec::fence() ; } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE inline void Threads::fence() { Impl::ThreadsExec::fence() ; } +#endif } /* namespace Kokkos */ diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index e88abdba50..9d6c0fa8cf 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -72,9 +72,12 @@ private: enum { TEAM_REDUCE_SIZE = 512 }; +public: typedef Kokkos::Threads execution_space ; - typedef execution_space::scratch_memory_space space ; + typedef execution_space::scratch_memory_space scratch_memory_space ; +private: + typedef execution_space::scratch_memory_space space ; ThreadsExec * const m_exec ; ThreadsExec * const * m_team_base ; ///< Base for team fan-in space m_team_shared ; @@ -228,14 +231,20 @@ public: } #endif + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer ) const noexcept + { team_reduce(reducer,reducer.reference()); } + template< typename ReducerType > KOKKOS_INLINE_FUNCTION typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - team_reduce( const ReducerType & ) const + team_reduce( const ReducerType &, const typename ReducerType::value_type ) const {} #else - team_reduce( const ReducerType & reducer ) const + team_reduce( const ReducerType & reducer, const typename ReducerType::value_type contribution ) const { typedef typename ReducerType::value_type value_type; // Make sure there is enough scratch space: @@ -247,7 +256,7 @@ public: type * const local_value = ((type*) m_exec->scratch_memory()); // Set this thread's contribution - *local_value = reducer.reference() ; + *local_value = contribution ; // Fence to make sure the base team member has access: memory_fence(); @@ -277,58 +286,7 @@ public: } #endif - template< class ValueType, class JoinOp > - KOKKOS_INLINE_FUNCTION ValueType - team_reduce( const ValueType & value - , const JoinOp & op_in ) const - #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return ValueType(); } - #else - { - typedef ValueType value_type; - const JoinLambdaAdapter op(op_in); - #endif -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - // Make sure there is enough scratch space: - typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE - , value_type , void >::type type ; - - if ( 0 == m_exec ) return value ; - - type * const local_value = ((type*) m_exec->scratch_memory()); - - // Set this thread's contribution - *local_value = value ; - - // Fence to make sure the base team member has access: - memory_fence(); - - if ( team_fan_in() ) { - // The last thread to synchronize returns true, all other threads wait for team_fan_out() - type * const team_value = ((type*) m_team_base[0]->scratch_memory()); - - // Join to the team value: - for ( int i = 1 ; i < m_team_size ; ++i ) { - op.join( *team_value , *((type*) m_team_base[i]->scratch_memory()) ); - } - - // Team base thread may "lap" member threads so copy out to their local value. - for ( int i = 1 ; i < m_team_size ; ++i ) { - *((type*) m_team_base[i]->scratch_memory()) = *team_value ; - } - - // Fence to make sure all team members have access - memory_fence(); - } - - team_fan_out(); - - // Value was changed by the team base - return *((type volatile const *) local_value); - } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering + /** \brief Intra-team exclusive prefix sum with team_rank() ordering * with intra-team non-deterministic ordering accumulation. * * The global inter-team accumulation value will, at the end of the @@ -645,6 +603,22 @@ public: return *this; } + template + friend class TeamPolicyInternal; + + template< class ... OtherProperties > + TeamPolicyInternal(const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_alloc = p.m_team_alloc; + m_team_iter = p.m_team_iter; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + } + //---------------------------------------- #ifdef KOKKOS_ENABLE_DEPRECATED_CODE @@ -734,7 +708,7 @@ public: inline int team_iter() const { return m_team_iter ; } /** \brief Specify league size, request team size */ - TeamPolicyInternal( typename traits::execution_space & + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , int team_size_request , int vector_length_request = 1 ) @@ -747,7 +721,7 @@ public: { init(league_size_request,team_size_request); (void) vector_length_request; } /** \brief Specify league size, request team size */ - TeamPolicyInternal( typename traits::execution_space & + TeamPolicyInternal( const typename traits::execution_space & , int league_size_request , const Kokkos::AUTO_t & /* team_size_request */ , int /* vector_length_request */ = 1 ) @@ -757,7 +731,11 @@ public: , m_team_scratch_size { 0 , 0 } , m_thread_scratch_size { 0 , 0 } , m_chunk_size(0) +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE { init(league_size_request,traits::execution_space::thread_pool_size(2)); } +#else + { init(league_size_request,traits::execution_space::impl_thread_pool_size(2)); } +#endif TeamPolicyInternal( int league_size_request , int team_size_request @@ -924,6 +902,23 @@ TeamThreadRange( const Impl::ThreadsExecTeamMember& thread, const iType1 & begin return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, iType(begin), iType(end) ); } +template< typename iType > +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember > +TeamVectorRange( const Impl::ThreadsExecTeamMember& thread, const iType& count ) +{ + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, count ); +} + +template< typename iType1, typename iType2 > +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::ThreadsExecTeamMember> +TeamVectorRange( const Impl::ThreadsExecTeamMember& thread, const iType1 & begin, const iType2 & end ) +{ + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::ThreadsExecTeamMember >( thread, iType(begin), iType(end) ); +} template KOKKOS_INLINE_FUNCTION @@ -974,15 +969,18 @@ typename std::enable_if< !Kokkos::is_reducer< ValueType >::value >::type parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, ValueType& result) { - result = ValueType(); + ValueType intermediate; + Sum sum(intermediate); + sum.init(intermediate); for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { ValueType tmp = ValueType(); lambda(i,tmp); - result+=tmp; + intermediate+=tmp; } - result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd()); + loop_boundaries.thread.team_reduce(sum,intermediate); + result = sum.reference(); } template< typename iType, class Lambda, typename ReducerType > @@ -991,36 +989,14 @@ typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { - reducer.init(reducer.reference()); + typename ReducerType::value_type value; + reducer.init(value); for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,reducer.reference()); + lambda(i,value); } - loop_boundaries.thread.team_reduce(reducer); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, - const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - - init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter(join)); + loop_boundaries.thread.team_reduce(reducer,value); } } //namespace Kokkos @@ -1068,25 +1044,6 @@ parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& result ) { - -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,result); - } -} /** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) * for each i=0..N-1. diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp index 42269176ed..022a5fc188 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp @@ -52,7 +52,6 @@ #include -#include #include #include diff --git a/lib/kokkos/core/src/eti/CMakeLists.txt b/lib/kokkos/core/src/eti/CMakeLists.txt index a4db7a7eb6..a7e7717a6e 100644 --- a/lib/kokkos/core/src/eti/CMakeLists.txt +++ b/lib/kokkos/core/src/eti/CMakeLists.txt @@ -4,6 +4,9 @@ endif() if (KOKKOS_ENABLE_OPENMP) add_subdirectory(OpenMP) endif() +if (KOKKOS_ENABLE_HPX) + add_subdirectory(HPX) +endif() if (KOKKOS_ENABLE_ROCM) add_subdirectory(ROCm) endif() diff --git a/lib/kokkos/core/src/eti/HPX/CMakeLists.txt b/lib/kokkos/core/src/eti/HPX/CMakeLists.txt new file mode 100644 index 0000000000..131a2d2e6e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/CMakeLists.txt @@ -0,0 +1,148 @@ +set(D "${CMAKE_CURRENT_SOURCE_DIR}") +set(ETI_SOURCES +${ETI_SOURCES} +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp +${D}/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp +PARENT_SCOPE) diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..905c97c54e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..a7632852ce --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..cff22240cf --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..2b667c674f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..cd1a445d81 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..3d805d5134 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..3883d581b6 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..55f3e200a5 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..ed6d57260b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..ed1954e683 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..fb8dadb8d0 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..16a0ed3e9c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..f846f94a96 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..f4b51a1d78 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..622b3119bd --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..de871103dd --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..720e075aea --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..4c57c457c2 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..5a37da22c4 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..93a96ee554 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..dcfcc8a0e3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..7082701282 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..cbbd7c9ef3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..22d6fc5387 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..d44e95e67e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..ae79919c42 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..0c671ad593 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..24dd1c8354 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..6e2de8a02e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..38840ac9e6 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..bcb105628b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..8730f92f20 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..785996558b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..3ae193ca65 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..81f91019d6 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..d34a4870b9 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..0da5ed1770 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..444dad079b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..3f36a1d714 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..51c964b92d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..1a26522ff5 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..9bd9af3fe3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..dd5a325535 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..30a44c0a80 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..0b73280c6e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..3997d8ca58 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..6cbaa59223 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..351001c8d1 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..d37e34af30 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..7609d9478f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..30f0c1d882 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..4c4109e298 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..189245d352 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..921a8e88c7 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..7e492aa25f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..13b1a78d7c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..03fa72c21c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..10a46bcd9d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..4c23c7e796 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..1bc7ab41f7 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutLeft,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..0206838af6 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..78b67a4a2a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..564f530d9b --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..b5ae4ae52a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..b2c91a1aa1 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..18e3f2b9b9 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutRight,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..e3d08c6e38 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..5001fc2781 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..fd45308d15 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..d2fca73151 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..c7fafd4aec --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..046aafa6ad --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutRight, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutLeft, Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutStride,Experimental::HPX,int64_t) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutStride,Experimental::HPX,int64_t) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..60f78b7a57 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..304a5afc0d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..8aeaf8a1f8 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..26ff7aefed --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..518d000eea --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..36b3b4fab8 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..df5c890a49 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..b120215692 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..9b5e4c2e5f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..74ad489303 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..bc9dbc65c1 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..fbd98c8011 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..d52c5306d0 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..5cc29daaca --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double**,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double**,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..7e63d80236 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double***,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double***,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..11447c11b5 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..bafe266044 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double*****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double*****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..e4ef20c370 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(double********,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(double********,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..fb00c3bfd3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..12718353e8 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..c9ab75062d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..71380c21a2 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..9787086a80 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..81072d77cb --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..363b05bace --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..ce1bc89e01 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..4af590818c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..ad399eff76 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..661edef668 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..48cb4a34b1 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..d2f88bb243 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..58ce6f1911 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float**,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float**,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..bc4efab1e4 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float***,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float***,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..6225cf9720 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..e50472d850 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float*****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float*****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..5ad427acc5 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(float********,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(float********,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..4ae2437fc8 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..02a2b8e1d9 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..ff693c9b4f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..d96960d4a7 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..05c3ef68eb --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..d96f47ece0 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..208933899e --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..aa7d9b8f15 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..e43a1783fd --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..6706074819 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..cd7082dcb3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..8735d58605 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..ec371dcba7 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..354da99794 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t**,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t**,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..bbc32aba03 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t***,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t***,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..addbbb291a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..dbebda1594 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t*****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t*****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..f8a89b4226 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int64_t********,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int64_t********,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp new file mode 100644 index 0000000000..7f0b9fc346 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp new file mode 100644 index 0000000000..4a31e60a3a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp new file mode 100644 index 0000000000..e876da3a6c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp new file mode 100644 index 0000000000..a7ee2c554d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp new file mode 100644 index 0000000000..4769c235bc --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp new file mode 100644 index 0000000000..3ac618b5dd --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutLeft,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutLeft,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp new file mode 100644 index 0000000000..825bee722f --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp new file mode 100644 index 0000000000..44e24e57f3 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp new file mode 100644 index 0000000000..0b18c7e5c0 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp new file mode 100644 index 0000000000..951d770305 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp new file mode 100644 index 0000000000..a0e80d764d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp new file mode 100644 index 0000000000..d8cd0155af --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutRight,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutRight,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp new file mode 100644 index 0000000000..c4bd8a043a --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp new file mode 100644 index 0000000000..566eb71e4d --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int**,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int**,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp new file mode 100644 index 0000000000..4b99a8fd0c --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int***,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int***,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp new file mode 100644 index 0000000000..6cf55bb5b4 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp new file mode 100644 index 0000000000..932a322bac --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int*****,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int*****,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp new file mode 100644 index 0000000000..f46a156a93 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp @@ -0,0 +1,54 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Kokkos is licensed under 3-clause BSD terms of use: +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#define KOKKOS_IMPL_COMPILING_LIBRARY true +#include +namespace Kokkos { +namespace Impl { +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutRight, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutLeft, Experimental::HPX,int) +KOKKOS_IMPL_VIEWCOPY_ETI_INST(int********,LayoutStride,LayoutStride,Experimental::HPX,int) +KOKKOS_IMPL_VIEWFILL_ETI_INST(int********,LayoutStride,Experimental::HPX,int) + +} +} diff --git a/lib/kokkos/core/src/eti/HPX/Makefile.eti_HPX b/lib/kokkos/core/src/eti/HPX/Makefile.eti_HPX new file mode 100644 index 0000000000..904f32fb82 --- /dev/null +++ b/lib/kokkos/core/src/eti/HPX/Makefile.eti_HPX @@ -0,0 +1,288 @@ +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_int64_t_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_int64_t_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_float_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_float_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int_double_LayoutStride_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutLeft_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutRight_Rank8.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank1.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank2.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank3.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank4.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank5.cpp +Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_ETI_PATH)/HPX/Kokkos_HPX_ViewCopyETIInst_int64_t_double_LayoutStride_Rank8.cpp diff --git a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp index d27c2e1306..50af5ec82e 100644 --- a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp @@ -56,11 +56,12 @@ template < typename ExecutionSpace = void , typename IndexType = void , typename IterationPattern = void , typename LaunchBounds = void + , typename MyWorkItemProperty = Kokkos::Experimental::WorkItemProperty::None_t > struct PolicyTraitsBase { using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType, - IterationPattern, LaunchBounds>; + IterationPattern, LaunchBounds, MyWorkItemProperty>; using execution_space = ExecutionSpace; using schedule_type = Schedule; @@ -68,8 +69,23 @@ struct PolicyTraitsBase using index_type = IndexType; using iteration_pattern = IterationPattern; using launch_bounds = LaunchBounds; + using work_item_property = MyWorkItemProperty; }; +template +struct SetWorkItemProperty +{ + static_assert( std::is_same::value + , "Kokkos Error: More than one work item property given" ); + using type = PolicyTraitsBase< typename PolicyBase::execution_space + , typename PolicyBase::schedule_type + , typename PolicyBase::work_tag + , typename PolicyBase::index_type + , typename PolicyBase::iteration_pattern + , typename PolicyBase::launch_bounds + , Property + >; +}; template struct SetExecutionSpace @@ -82,6 +98,7 @@ struct SetExecutionSpace , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -96,6 +113,7 @@ struct SetSchedule , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -110,6 +128,7 @@ struct SetWorkTag , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -124,6 +143,7 @@ struct SetIndexType , IndexType , typename PolicyBase::iteration_pattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -139,6 +159,7 @@ struct SetIterationPattern , typename PolicyBase::index_type , IterationPattern , typename PolicyBase::launch_bounds + , typename PolicyBase::work_item_property >; }; @@ -154,6 +175,7 @@ struct SetLaunchBounds , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern , LaunchBounds + , typename PolicyBase::work_item_property >; }; @@ -170,8 +192,9 @@ struct AnalyzePolicy : public , typename std::conditional< std::is_integral::value , SetIndexType > , typename std::conditional< is_iteration_pattern::value, SetIterationPattern , typename std::conditional< is_launch_bounds::value , SetLaunchBounds + , typename std::conditional< Experimental::is_work_item_property::value, SetWorkItemProperty , SetWorkTag - >::type >::type >::type >::type >::type>::type::type + >::type >::type >::type >::type >::type>::type>::type::type , Traits... > {}; @@ -208,13 +231,15 @@ struct AnalyzePolicy , typename Base::launch_bounds >::type; + using work_item_property = typename Base::work_item_property; + using type = PolicyTraitsBase< execution_space , schedule_type , work_tag , index_type , iteration_pattern , launch_bounds - >; + , work_item_property>; }; template diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index 3d99b07568..63067c137a 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -53,6 +53,13 @@ #include #endif +#include +#include + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + namespace Kokkos { //---------------------------------------------------------------------------- @@ -326,7 +333,165 @@ bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, con } //---------------------------------------------------------------------------- -} // namespace Kokkos +namespace Impl { +// memory-ordered versions are in the Impl namespace + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong_fallback( + T* dest, T compare, T val, memory_order_seq_cst_t, MemoryOrderFailure +) +{ + Kokkos::memory_fence(); + auto rv = Kokkos::atomic_compare_exchange_strong( + dest, compare, val + ); + Kokkos::memory_fence(); + return rv; +} + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong_fallback( + T* dest, T compare, T val, memory_order_acquire_t, MemoryOrderFailure +) +{ + auto rv = Kokkos::atomic_compare_exchange_strong( + dest, compare, val + ); + Kokkos::memory_fence(); + return rv; +} + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong_fallback( + T* dest, T compare, T val, memory_order_release_t, MemoryOrderFailure +) +{ + Kokkos::memory_fence(); + return Kokkos::atomic_compare_exchange_strong( + dest, compare, val + ); +} + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong_fallback( + T* dest, T compare, T val, memory_order_relaxed_t, MemoryOrderFailure +) +{ + return Kokkos::atomic_compare_exchange_strong( + dest, compare, val + ); +} + +#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + +#if defined(__CUDA_ARCH__) + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__ +#else + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline +#endif + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +bool _atomic_compare_exchange_strong( + T* dest, T compare, T val, + MemoryOrderSuccess, + MemoryOrderFailure, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_same< + typename MemoryOrderSuccess::memory_order, + typename std::remove_cv::type + >::value + && std::is_same< + typename MemoryOrderFailure::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + return __atomic_compare_exchange_n( + dest, &compare, val, /* weak = */ false, + MemoryOrderSuccess::gnu_constant, + MemoryOrderFailure::gnu_constant + ); +} + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +bool _atomic_compare_exchange_strong( + T* dest, T compare, T val, + MemoryOrderSuccess order_success, + MemoryOrderFailure order_failure, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_same< + typename MemoryOrderSuccess::memory_order, + typename std::remove_cv::type + >::value + && std::is_same< + typename MemoryOrderFailure::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + return _atomic_compare_exchange_fallback( + dest, compare, val, + order_success, order_failure + ); +} + +#else + +template +KOKKOS_INLINE_FUNCTION +bool _atomic_compare_exchange_strong( + T* dest, T compare, T val, + MemoryOrderSuccess order_success, + MemoryOrderFailure order_failure +) { + return _atomic_compare_exchange_strong_fallback( + dest, compare, val, order_success, order_failure + ); +} + +#endif + +// TODO static asserts in overloads that don't make sense (as listed in https://gcc.gnu.org/onlinedocs/gcc-5.2.0/gcc/_005f_005fatomic-Builtins.html) +template +KOKKOS_FORCEINLINE_FUNCTION +bool atomic_compare_exchange_strong( + T* dest, T compare, T val, + MemoryOrderSuccess order_success, + MemoryOrderFailure order_failure +) { + return _atomic_compare_exchange_strong(dest, compare, val, order_success, order_failure); +} + + +} // end namespace Impl + +} // namespace Kokkos + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp new file mode 100644 index 0000000000..3abc8ed4b7 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp @@ -0,0 +1,418 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + +#include +#include +#ifndef KOKKOS_ATOMIC_COMPARE_EXCHANGE_WEAK_HPP +#define KOKKOS_ATOMIC_COMPARE_EXCHANGE_WEAK_HPP + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +namespace Kokkos { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// Cuda sm_70 or greater supports C++-like semantics directly + +#if defined( KOKKOS_ENABLE_CUDA ) + +#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) + + +#if __CUDA_ARCH__ >= 700 +// See: https://github.com/ogiroux/freestanding +# define kokkos_cuda_internal_cas_release_32(ptr, old, expected, desired) \ + asm volatile("atom.cas.release.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +# define kokkos_cuda_internal_cas_acquire_32(ptr, old, expected, desired) \ + asm volatile("atom.cas.acquire.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +# define kokkos_cuda_internal_cas_acq_rel_32(ptr, old, expected, desired) \ + asm volatile("atom.cas.acq_rel.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +# define kokkos_cuda_internal_cas_relaxed_32(ptr, old, expected, desired) \ + asm volatile("atom.cas.relaxed.sys.b32 %0, [%1], %2, %3;" : "=r"(old) : "l"(ptr), "r"(expected), "r"(desired) : "memory") +# define kokkos_cuda_internal_fence_seq_cst() asm volatile("fence.sc.sys;" : : : "memory") +# define kokkos_cuda_internal_fence_acq_rel() asm volatile("fence.acq_rel.sys;" : : : "memory") +#else +# define kokkos_cuda_internal_fence_acq_rel() asm volatile("membar.sys;" : : : "memory") +# define kokkos_cuda_internal_fence_seq_cst() asm volatile("membar.sys;" : : : "memory") +#endif + + +// 32-bit version +template ::type = 0 +> +__inline__ __device__ +bool +atomic_compare_exchange_weak( + T volatile* const dest, + T* const expected, + T const desired, + std::memory_order success_order = std::memory_order_seq_cst, + std::memory_order failure_order = std::memory_order_seq_cst +) { + // TODO assert that success_order >= failure_order + // See: https://github.com/ogiroux/freestanding + int32_t tmp = 0; + int32_t old = 0; + memcpy(&tmp, &desired, sizeof(T)); + memcpy(&old, expected, sizeof(T)); + int32_t old_tmp = old; +#if __CUDA_ARCH__ >= 700 + switch(success_order) { + case std::memory_order_seq_cst: + // sequentially consistent is just an acquire with a seq_cst fence + kokkos_cuda_internal_fence_seq_cst(); + kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_acquire: + kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_consume: + // same as acquire on PTX compatible platforms + kokkos_cuda_internal_cas_acquire_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_acq_rel: + kokkos_cuda_internal_cas_acq_rel_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_release: + kokkos_cuda_internal_cas_release_32((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_relaxed: + kokkos_cuda_internal_cas_relaxed_32((T*)dest, old, old_tmp, tmp); + break; + }; +#else + // All of the orders that require a fence before the relaxed atomic operation: + if( + success_order == std::memory_order_release + || success_order == std::memory_order_acq_rel + ) { + kokkos_cuda_internal_fence_acq_rel(); + } + else if(success_order == std::memory_order_seq_cst) { + kokkos_cuda_internal_fence_seq_cst(); + } + // This is relaxed: + // Cuda API requires casting away volatile + atomicCAS((T*)dest, old_tmp, tmp); +#endif + bool const rv = (old == old_tmp); +#if __CUDA_ARCH__ < 700 + if(rv) { + if( + success_order == std::memory_order_acquire + || success_order == std::memory_order_consume + || success_order == std::memory_order_acq_rel + ) { + kokkos_cuda_internal_fence_acq_rel(); + } + else if(success_order == std::memory_order_seq_cst) { + kokkos_cuda_internal_fence_seq_cst(); + } + } + else { + if( + failure_order == std::memory_order_acquire + || failure_order == std::memory_order_consume + || failure_order == std::memory_order_acq_rel + ) { + kokkos_cuda_internal_fence_acq_rel(); + } + else if(failure_order == std::memory_order_seq_cst) { + kokkos_cuda_internal_fence_seq_cst(); + } + } +#endif + memcpy(expected, &old, sizeof(T)); + return rv; +} + +// 64-bit version +template ::type = 0 +> +bool +atomic_compare_exchange_weak( + T volatile* const dest, + T* const expected, + T const desired, + std::memory_order success_order = std::memory_order_seq_cst, + std::memory_order failure_order = std::memory_order_seq_cst +) { + // TODO assert that success_order >= failure_order + // See: https://github.com/ogiroux/freestanding + int64_t tmp = 0; + int64_t old = 0; + memcpy(&tmp, &desired, sizeof(T)); + memcpy(&old, expected, sizeof(T)); + int64_t old_tmp = old; +#if __CUDA_ARCH__ >= 700 + switch(success_order) { + case std::memory_order_seq_cst: + // sequentially consistent is just an acquire with a seq_cst fence + kokkos_cuda_internal_fence_seq_cst(); + kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_acquire: + kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_consume: + // same as acquire on PTX compatible platforms + kokkos_cuda_internal_cas_acquire_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_acq_rel: + kokkos_cuda_internal_cas_acq_rel_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_release: + kokkos_cuda_internal_cas_release_64((T*)dest, old, old_tmp, tmp); + break; + case std::memory_order_relaxed: + kokkos_cuda_internal_cas_relaxed_64((T*)dest, old, old_tmp, tmp); + break; + }; +#else + // Cuda API requires casting away volatile + atomicCAS((T*)dest, old_tmp, tmp); +#endif + bool const rv = (old == old_tmp); + memcpy(expected, &old, sizeof(T)); + return rv; +} + +#endif // defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) + +#endif // defined( KOKKOS_ENABLE_CUDA ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +// GCC native CAS supports int, long, unsigned int, unsigned long. +// Intel native CAS support int and long with the same interface as GCC. +#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) +#if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) + +inline +int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_val_compare_and_swap(dest,compare,val); +} + +inline +long atomic_compare_exchange( volatile long * const dest, const long compare, const long val ) +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_val_compare_and_swap(dest,compare,val); +} + +#if defined( KOKKOS_ENABLE_GNU_ATOMICS ) + +// GCC supports unsigned + +inline +unsigned int atomic_compare_exchange( volatile unsigned int * const dest, const unsigned int compare, const unsigned int val ) +{ return __sync_val_compare_and_swap(dest,compare,val); } + +inline +unsigned long atomic_compare_exchange( volatile unsigned long * const dest , + const unsigned long compare , + const unsigned long val ) +{ return __sync_val_compare_and_swap(dest,compare,val); } + +#endif + +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) +{ + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + + tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) ); + return tmp.t ; +} + +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) == sizeof(long) , const T & >::type val ) +{ + union U { + long i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + + tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) ); + return tmp.t ; +} + +#if defined( KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 ) +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest, const T & compare, + typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && + sizeof(T) != sizeof(long) && + sizeof(T) == sizeof(Impl::cas128_t), const T & >::type val ) +{ + union U { + Impl::cas128_t i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {}; + } tmp ; + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + + tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) ); + return tmp.t ; +} +#endif + +template < typename T > +inline +T atomic_compare_exchange( volatile T * const dest , const T compare , + typename Kokkos::Impl::enable_if< + ( sizeof(T) != 4 ) + && ( sizeof(T) != 8 ) + #if defined(KOKKOS_ENABLE_ASM) && defined ( KOKKOS_ENABLE_ISA_X86_64 ) + && ( sizeof(T) != 16 ) + #endif + , const T >::type& val ) +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + + while( !Impl::lock_address_host_space( (void*) dest ) ); + T return_val = *dest; + if( return_val == compare ) { + // Don't use the following line of code here: + // + //const T tmp = *dest = val; + // + // Instead, put each assignment in its own statement. This is + // because the overload of T::operator= for volatile *this should + // return void, not volatile T&. See Kokkos #177: + // + // https://github.com/kokkos/kokkos/issues/177 + *dest = val; + const T tmp = *dest; + #ifndef KOKKOS_COMPILER_CLANG + (void) tmp; + #endif + } + Impl::unlock_address_host_space( (void*) dest ); + return return_val; +} +//---------------------------------------------------------------------------- + +#elif defined( KOKKOS_ENABLE_OPENMP_ATOMICS ) + +template< typename T > +KOKKOS_INLINE_FUNCTION +T atomic_compare_exchange( volatile T * const dest, const T compare, const T val ) +{ + T retval; +#pragma omp critical + { + retval = dest[0]; + if ( retval == compare ) + dest[0] = val; + } + return retval; +} + +#elif defined( KOKKOS_ENABLE_SERIAL_ATOMICS ) + +template< typename T > +KOKKOS_INLINE_FUNCTION +T atomic_compare_exchange( volatile T * const dest_v, const T compare, const T val ) +{ + T* dest = const_cast(dest_v); + T retval = *dest; + if (retval == compare) *dest = val; + return retval; +} + +#endif +#endif +#endif // !defined ROCM_ATOMICS + +template +KOKKOS_INLINE_FUNCTION +bool atomic_compare_exchange_strong(volatile T* const dest, const T compare, const T val) +{ + return compare == atomic_compare_exchange(dest, compare, val); +} +//---------------------------------------------------------------------------- + +} // namespace Kokkos + +#endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index d6fab81133..495fd48477 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -90,10 +90,12 @@ __inline__ __device__ T atomic_fetch_add( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { - union U { + // to work around a bug in the clang cuda compiler, the name here needs to be + // different from the one internal to the other overloads + union U1 { int i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + KOKKOS_INLINE_FUNCTION U1() {}; } assume , oldval , newval ; oldval.t = *dest ; @@ -113,10 +115,12 @@ T atomic_fetch_add( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { - union U { + // to work around a bug in the clang cuda compiler, the name here needs to be + // different from the one internal to the other overloads + union U2 { unsigned long long int i ; T t ; - KOKKOS_INLINE_FUNCTION U() {}; + KOKKOS_INLINE_FUNCTION U2() {}; } assume , oldval , newval ; oldval.t = *dest ; @@ -176,7 +180,7 @@ T atomic_fetch_add( volatile T * const dest , #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) -#if defined( KOKKOS_ENABLE_ASM ) && defined ( KOKKOS_ENABLE_ISA_X86_64 ) +#if defined( KOKKOS_ENABLE_ASM ) && (defined(KOKKOS_ENABLE_ISA_X86_64) || defined(KOKKOS_KNL_USE_ASM_WORKAROUND)) inline int atomic_fetch_add( volatile int * dest , const int val ) { diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index 48dc8731ef..7a4f95cd99 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -89,7 +89,11 @@ __inline__ __device__ T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { - union { int i ; T t ; } oldval , assume , newval ; + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; @@ -108,7 +112,11 @@ T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { - union { unsigned long long int i ; T t ; } oldval , assume , newval ; + union U { + unsigned long long int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; @@ -211,7 +219,11 @@ inline T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { - union { int i ; T t ; } assume , oldval , newval ; + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; #if defined( KOKKOS_ENABLE_RFO_PREFETCH ) _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); @@ -238,7 +250,11 @@ T atomic_fetch_sub( volatile T * const dest , _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); #endif - union { long i ; T t ; } assume , oldval , newval ; + union U { + long i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp index a3a18166af..c1a7d80364 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -156,13 +156,17 @@ T atomic_fetch_oper( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { - union { unsigned long long int i ; T t ; } oldval , assume , newval ; + union U { + unsigned long long int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; do { assume.i = oldval.i ; - newval.t = Oper::apply(assume.t, val) ; + newval.t = op.apply(assume.t, val) ; oldval.i = Kokkos::atomic_compare_exchange( (unsigned long long int*)dest , assume.i , newval.i ); } while ( assume.i != oldval.i ); @@ -175,7 +179,11 @@ T atomic_oper_fetch( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T >::type val ) { - union { unsigned long long int i ; T t ; } oldval , assume , newval ; + union U { + unsigned long long int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; @@ -193,13 +201,17 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T >::type val ) { - union { int i ; T t ; } oldval , assume , newval ; + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; do { assume.i = oldval.i ; - newval.t = Oper::apply(assume.t, val) ; + newval.t = op.apply(assume.t, val) ; oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i ); } while ( assume.i != oldval.i ); @@ -211,7 +223,11 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int), const T >::type val ) { - union { int i ; T t ; } oldval , assume , newval ; + union U { + int i ; + T t ; + KOKKOS_INLINE_FUNCTION U() {} + } oldval , assume , newval ; oldval.t = *dest ; diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp new file mode 100644 index 0000000000..2db74b9f1e --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Load.hpp @@ -0,0 +1,266 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP +#define KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP + +#include +#if defined(KOKKOS_ATOMIC_HPP) + +#include + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +namespace Kokkos { +namespace Impl { + +// Olivier's implementation helpfully binds to the same builtins as GNU, so +// we make this code common across multiple options +#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + +#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__ +#else + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline +#endif + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +T _atomic_load( + T* ptr, MemoryOrder, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_same< + typename MemoryOrder::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + return __atomic_load_n(ptr, MemoryOrder::gnu_constant); +} + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +T _atomic_load( + T* ptr, MemoryOrder, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_default_constructible::value + && std::is_same< + typename MemoryOrder::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + T rv{}; + __atomic_load(ptr, &rv, MemoryOrder::gnu_constant); + return rv; +} + +#undef KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH + +#elif defined(__CUDA_ARCH__) + +// Not compiling for Volta or later, or Cuda ASM atomics were manually disabled + +template +__device__ __inline__ +T _relaxed_atomic_load_impl( + T* ptr, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ), + void const** + >::type = nullptr +) { + return *ptr; +} + +template +struct NoOpOper { + __device__ __inline__ + static constexpr T apply(T const&, T const&) noexcept { } +}; + +template +__device__ __inline__ +T _relaxed_atomic_load_impl( + T* ptr, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ), + void const** + >::type = nullptr +) { + T rv{}; + // TODO remove a copy operation here? + Kokkos::atomic_oper_fetch(NoOpOper{}, &rv, rv); + return rv; +} + +template +__device__ __inline__ +T _atomic_load(T* ptr, memory_order_seq_cst_t) { + Kokkos::memory_fence(); + T rv = Impl::_relaxed_atomic_load_impl(ptr); + Kokkos::memory_fence(); + return rv; +} + +template +__device__ __inline__ +T _atomic_load(T* ptr, memory_order_acquire_t) { + T rv = Impl::_relaxed_atomic_load_impl(ptr); + Kokkos::memory_fence(); + return rv; +} + +template +__device__ __inline__ +T _atomic_load(T* ptr, memory_order_relaxed_t) { + return _relaxed_atomic_load_impl(ptr); +} + +#elif defined(KOKKOS_ENABLE_OPENMP_ATOMICS) + +template +inline +T _atomic_load(T* ptr, MemoryOrder) +{ + // AFAICT, all OpenMP atomics are sequentially consistent, so memory order doesn't matter + T retval{ }; +#pragma omp atomic read + { + retval = *ptr; + } + return retval; +} + +#elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS) + +template +inline +T _atomic_load(T* ptr, MemoryOrder) +{ + return *ptr; +} + +#endif // end of all atomic implementations + + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_seq_cst_t) { + return _atomic_load(ptr, Impl::memory_order_seq_cst); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_acquire_t) { + return _atomic_load(ptr, Impl::memory_order_acquire); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_relaxed_t) { + return _atomic_load(ptr, Impl::memory_order_relaxed); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_release_t) { + static_assert( + sizeof(T) == 0, // just something that will always be false, but only on instantiation + "atomic_load with memory order release doesn't make any sense!" + ); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr, Impl::memory_order_acq_rel_t) { + static_assert( + sizeof(T) == 0, // just something that will always be false, but only on instantiation + "atomic_load with memory order acq_rel doesn't make any sense!" + ); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +T atomic_load(T* ptr) { + // relaxed by default! + return _atomic_load(ptr, Impl::memory_order_relaxed); +} + +} // end namespace Impl +} // end namespace Kokkos + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +#endif // defined(KOKKOS_ATOMIC_HPP) +#endif //KOKKOS_IMPL_KOKKOS_ATOMIC_LOAD_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp new file mode 100644 index 0000000000..7b9c08551c --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Memory_Order.hpp @@ -0,0 +1,122 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP +#define KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP + +#include + +#include + +namespace Kokkos { +namespace Impl { + +/** @file + * Provides strongly-typed analogs of the standard memory order enumerators. + * In addition to (very slightly) reducing the constant propagation burden on + * the compiler, this allows us to give compile-time errors for things that + * don't make sense, like atomic_load with memory order release. + */ + +struct memory_order_seq_cst_t { + using memory_order = memory_order_seq_cst_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_SEQ_CST; +#endif + static constexpr auto std_constant = std::memory_order_seq_cst; +}; +constexpr memory_order_seq_cst_t memory_order_seq_cst = { }; + +struct memory_order_relaxed_t { + using memory_order = memory_order_relaxed_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_RELAXED; +#endif + static constexpr auto std_constant = std::memory_order_relaxed; +}; +constexpr memory_order_relaxed_t memory_order_relaxed = { }; + +struct memory_order_acquire_t { + using memory_order = memory_order_acquire_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_ACQUIRE; +#endif + static constexpr auto std_constant = std::memory_order_acquire; +}; +constexpr memory_order_acquire_t memory_order_acquire = { }; + +struct memory_order_release_t { + using memory_order = memory_order_release_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_RELEASE; +#endif + static constexpr auto std_constant = std::memory_order_release; +}; +constexpr memory_order_release_t memory_order_release = { }; + +struct memory_order_acq_rel_t { + using memory_order = memory_order_acq_rel_t; +#if defined(KOKKOS_ENABLE_GNU_ATOMICS) \ + || defined(KOKKOS_ENABLE_INTEL_ATOMICS) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + static constexpr auto gnu_constant = __ATOMIC_ACQ_REL; +#endif + static constexpr auto std_constant = std::memory_order_acq_rel; +}; +constexpr memory_order_acq_rel_t memory_order_acq_rel = { }; + + +// Intentionally omit consume (for now) + +} // end namespace Impl +} // end namespace Kokkos + +#endif //KOKKOS_KOKKOS_ATOMIC_MEMORY_ORDER_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp new file mode 100644 index 0000000000..066f90480d --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Store.hpp @@ -0,0 +1,258 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP +#define KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP + +#include +#if defined(KOKKOS_ATOMIC_HPP) + +#include + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +namespace Kokkos { +namespace Impl { + +// Olivier's implementation helpfully binds to the same builtins as GNU, so +// we make this code common across multiple options +#if (defined(KOKKOS_ENABLE_GNU_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || (defined(KOKKOS_ENABLE_INTEL_ATOMICS) && !defined(__CUDA_ARCH__)) \ + || defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + +#if defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA_ASM_ATOMICS) + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH __inline__ __device__ +#else + #define KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH inline +#endif + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +void _atomic_store( + T* ptr, T val, MemoryOrder, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_same< + typename MemoryOrder::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + __atomic_store_n(ptr, val, MemoryOrder::gnu_constant); +} + +template +KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH +void _atomic_store( + T* ptr, T val, MemoryOrder, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ) + && std::is_default_constructible::value + && std::is_same< + typename MemoryOrder::memory_order, + typename std::remove_cv::type + >::value, + void const** + >::type = nullptr +) { + __atomic_store(ptr, &val, MemoryOrder::gnu_constant); +} + +#undef KOKKOS_INTERNAL_INLINE_DEVICE_IF_CUDA_ARCH + +#elif defined(__CUDA_ARCH__) + +// Not compiling for Volta or later, or Cuda ASM atomics were manually disabled + +template +__device__ __inline__ +void _relaxed_atomic_store_impl( + T* ptr, T val, + typename std::enable_if< + ( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ), + void const** + >::type = nullptr +) { + *ptr = val; +} + +template +struct StoreOper { + __device__ __inline__ + static constexpr T apply(T const&, T const& val) noexcept { return val; } +}; + +template +__device__ __inline__ +void _relaxed_atomic_store_impl( + T* ptr, T val, + typename std::enable_if< + !( + sizeof(T) == 1 + || sizeof(T) == 2 + || sizeof(T) == 4 + || sizeof(T) == 8 + ), + void const** + >::type = nullptr +) { + Kokkos::atomic_oper_fetch(StoreOper{}, &rv, (T&&)val); +} + +template +__device__ __inline__ +void _atomic_store(T* ptr, T val, memory_order_seq_cst_t) { + Kokkos::memory_fence(); + Impl::_relaxed_atomic_store_impl(ptr, val); + Kokkos::memory_fence(); + return rv; +} + +template +__device__ __inline__ +void _atomic_store(T* ptr, T val, memory_order_release_t) { + Kokkos::memory_fence(); + _relaxed_atomic_store_impl(ptr, val); +} + +template +__device__ __inline__ +void _atomic_store(T* ptr, T val, memory_order_relaxed_t) { + _relaxed_atomic_store_impl(ptr, val); +} + +#elif defined(KOKKOS_ENABLE_OPENMP_ATOMICS) + +template +inline +void _atomic_store(T* ptr, T val, MemoryOrder) +{ + // AFAICT, all OpenMP atomics are sequentially consistent, so memory order doesn't matter +#pragma omp atomic write + { + *ptr = val; + } +} + +#elif defined(KOKKOS_ENABLE_SERIAL_ATOMICS) + +template +inline +void _atomic_store(T* ptr, T val, MemoryOrder) +{ + *ptr = val; +} + +#endif // end of all atomic implementations + + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_seq_cst_t) { + _atomic_store(ptr, val, Impl::memory_order_seq_cst); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_release_t) { + _atomic_store(ptr, val, Impl::memory_order_release); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_relaxed_t) { + _atomic_store(ptr, val, Impl::memory_order_relaxed); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_acquire_t) { + static_assert( + sizeof(T) == 0, // just something that will always be false, but only on instantiation + "atomic_store with memory order acquire doesn't make any sense!" + ); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val, Impl::memory_order_acq_rel_t) { + static_assert( + sizeof(T) == 0, // just something that will always be false, but only on instantiation + "atomic_store with memory order acq_rel doesn't make any sense!" + ); +} + +template +KOKKOS_FORCEINLINE_FUNCTION +void atomic_store(T* ptr, T val) { + // relaxed by default! + _atomic_store(ptr, Impl::memory_order_relaxed); +} + +} // end namespace Impl +} // end namespace Kokkos + +#if defined(KOKKOS_ENABLE_CUDA) +#include +#endif + +#endif // defined(KOKKOS_ATOMIC_HPP) +#endif //KOKKOS_IMPL_KOKKOS_ATOMIC_STORE_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp b/lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp new file mode 100644 index 0000000000..f86e68cb1d --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ChaseLev.hpp @@ -0,0 +1,314 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP +#define KOKKOS_IMPL_LOCKFREEDEQUE_HPP + +#include +#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA + +#include + +#include +#include +#include // KOKKOS_EXPECTS +#include // KOKKOS_EXPECTS + +#include // atomic_compare_exchange, atomic_fence +#include "Kokkos_LIFO.hpp" + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +struct fixed_size_circular_buffer { +public: + + using node_type = NodeType; + using size_type = SizeType; + +private: + + node_type* m_buffer[CircularBufferSize] = { nullptr }; + +public: + + fixed_size_circular_buffer() = default; + fixed_size_circular_buffer(fixed_size_circular_buffer const&) = delete; + fixed_size_circular_buffer(fixed_size_circular_buffer&&) = default; + fixed_size_circular_buffer& operator=(fixed_size_circular_buffer const&) = delete; + fixed_size_circular_buffer& operator=(fixed_size_circular_buffer&&) = default; + ~fixed_size_circular_buffer() = default; + + KOKKOS_FORCEINLINE_FUNCTION + static constexpr size_type size() noexcept { + return size_type(CircularBufferSize); + } + + KOKKOS_FORCEINLINE_FUNCTION + node_type* operator[](size_type idx) const noexcept { + return m_buffer[idx % size()]; + } + + KOKKOS_FORCEINLINE_FUNCTION + node_type*& operator[](size_type idx) noexcept { + return m_buffer[idx % size()]; + } +}; + +template +struct non_owning_variable_size_circular_buffer { +public: + + using node_type = NodeType; + using size_type = SizeType; + +private: + + ObservingRawPtr m_buffer = nullptr; + size_type m_size = 0; + +public: + + KOKKOS_INLINE_FUNCTION + non_owning_variable_size_circular_buffer( + ObservingRawPtr buffer, + size_type arg_size + ) noexcept + : m_buffer(buffer), + m_size(arg_size) + { } + + non_owning_variable_size_circular_buffer() = default; + non_owning_variable_size_circular_buffer(non_owning_variable_size_circular_buffer const&) = delete; + non_owning_variable_size_circular_buffer(non_owning_variable_size_circular_buffer&&) = default; + non_owning_variable_size_circular_buffer& operator=(non_owning_variable_size_circular_buffer const&) = delete; + non_owning_variable_size_circular_buffer& operator=(non_owning_variable_size_circular_buffer&&) = default; + ~non_owning_variable_size_circular_buffer() = default; + + KOKKOS_FORCEINLINE_FUNCTION + constexpr size_type size() const noexcept { + return m_size; + } + + KOKKOS_FORCEINLINE_FUNCTION + node_type* operator[](size_type idx) const noexcept { + return m_buffer[idx % size()]; + } + + KOKKOS_FORCEINLINE_FUNCTION + node_type*& operator[](size_type idx) noexcept { + return m_buffer[idx % size()]; + } +}; + +/** Based on "Correct and Efficient Work-Stealing for Weak Memory Models," + * PPoPP '13, https://www.di.ens.fr/~zappa/readings/ppopp13.pdf + * + */ +template < + class T, + class CircularBufferT, + class SizeType = int32_t +> +struct ChaseLevDeque { +public: + + using size_type = SizeType; + using value_type = T; + // Still using intrusive linked list for waiting queue + using node_type = SimpleSinglyLinkedListNode<>; + +private: + + // TODO @tasking @new_feature DSH variable size circular buffer? + + CircularBufferT m_array; + size_type m_top = 0; + size_type m_bottom = 0; + + +public: + + template < + class _ignore=void, + class=typename std::enable_if< + std::is_default_constructible::value + >::type + > + ChaseLevDeque() : m_array() { } + + explicit + ChaseLevDeque(CircularBufferT buffer) + : m_array(std::move(buffer)) + { } + + KOKKOS_INLINE_FUNCTION + bool empty() const { + // TODO @tasking @memory_order DSH memory order + return m_top > m_bottom - 1; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef + pop() { + auto b = m_bottom - 1; // atomic load relaxed + auto& a = m_array; // atomic load relaxed + m_bottom = b; // atomic store relaxed + Kokkos::memory_fence(); // memory order seq_cst + auto t = m_top; // atomic load relaxed + OptionalRef return_value; + if(t <= b) { + /* non-empty queue */ + return_value = *static_cast(a[b]); // relaxed load + if(t == b) { + /* single last element in the queue. */ + if(not Impl::atomic_compare_exchange_strong(&m_top, t, t+1, memory_order_seq_cst, memory_order_relaxed)) { + /* failed race, someone else stole it */ + return_value = nullptr; + } + m_bottom = b + 1; // memory order relaxed + } + } else { + /* empty queue */ + m_bottom = b + 1; // memory order relaxed + } + return return_value; + } + + KOKKOS_INLINE_FUNCTION + bool push(node_type&& node) + { + // Just forward to the lvalue version + return push(node); + } + + KOKKOS_INLINE_FUNCTION + bool push(node_type& node) + { + auto b = m_bottom; // memory order relaxed + auto t = Impl::atomic_load(&m_top, memory_order_acquire); + auto& a = m_array; + if(b - t > a.size() - 1) { + /* queue is full, resize */ + //m_array = a->grow(); + //a = m_array; + return false; + } + a[b] = &node; // relaxed + Impl::atomic_store(&m_bottom, b + 1, memory_order_release); + return true; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef + steal() { + auto t = m_top; // TODO @tasking @memory_order DSH: atomic load acquire + Kokkos::memory_fence(); // seq_cst fence, so why does the above need to be acquire? + auto b = Impl::atomic_load(&m_bottom, memory_order_acquire); + OptionalRef return_value; + if(t < b) { + /* Non-empty queue */ + auto& a = m_array; // TODO @tasking @memory_order DSH: technically consume ordered, but acquire should be fine + Kokkos::load_fence(); // TODO @tasking @memory_order DSH memory order instead of fence + return_value = *static_cast(a[t]); // relaxed + if(not Impl::atomic_compare_exchange_strong(&m_top, t, t+1, memory_order_seq_cst, memory_order_relaxed)) { + return_value = nullptr; + } + } + return return_value; + } + +}; + +/* + // The atomicity of this load was more important in the paper's version + // because that version had a circular buffer that could grow. We're + // essentially using the memory order in this version as a fence, which + // may be unnecessary + auto buffer_ptr = (node_type***)&m_array.buffer; + auto a = Impl::atomic_load(buffer_ptr, memory_order_acquire); // technically consume ordered, but acquire should be fine + return_value = *static_cast(a[t % m_array->size]); // relaxed; we'd have to replace the m_array->size if we ever allow growth +*/ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +struct TaskQueueTraitsChaseLev { + + template + using ready_queue_type = ChaseLevDeque< + Task, + fixed_size_circular_buffer, CircularBufferSize, int32_t>, + int32_t + >; + + template + using waiting_queue_type = SingleConsumeOperationLIFO; + + template + using intrusive_task_base_type = + typename ready_queue_type::node_type; + + static constexpr auto ready_queue_insertion_may_fail = true; + +}; + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined KOKKOS_ENABLE_TASKDAG */ +#endif /* #ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index 82fdee4399..0d472e98bb 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -85,7 +85,8 @@ setenv("MEMKIND_HBW_NODES", "1", 0); } // Protect declarations, to prevent "unused variable" warnings. -#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET ) +#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) ||\ + defined( KOKKOS_ENABLE_OPENMPTARGET ) || defined ( KOKKOS_ENABLE_HPX ) const int num_threads = args.num_threads; #endif #if defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET ) @@ -160,6 +161,21 @@ setenv("MEMKIND_HBW_NODES", "1", 0); } #endif +#if defined( KOKKOS_ENABLE_HPX ) + if( std::is_same< Kokkos::Experimental::HPX , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Experimental::HPX , Kokkos::HostSpace::execution_space >::value ) { + if(num_threads>0) { + Kokkos::Experimental::HPX::impl_initialize(num_threads); + } else { + Kokkos::Experimental::HPX::impl_initialize(); + } + //std::cout << "Kokkos::initialize() fyi: HPX enabled and initialized" << std::endl ; + } + else { + //std::cout << "Kokkos::initialize() fyi: HPX enabled but not initialized" << std::endl ; + } +#endif + #if defined( KOKKOS_ENABLE_SERIAL ) // Prevent "unused variable" warning for 'args' input struct. If // Serial::initialize() ever needs to take arguments from the input @@ -268,6 +284,8 @@ void finalize_internal( const bool all_spaces = false ) Kokkos::Cuda::impl_finalize(); #endif } +#else + (void)all_spaces; #endif #if defined( KOKKOS_ENABLE_ROCM ) @@ -298,6 +316,15 @@ void finalize_internal( const bool all_spaces = false ) } #endif +#if defined( KOKKOS_ENABLE_HPX ) + if( std::is_same< Kokkos::Experimental::HPX , Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Experimental::HPX , Kokkos::HostSpace::execution_space >::value || + all_spaces ) { + if(Kokkos::Experimental::HPX::impl_is_initialized()) + Kokkos::Experimental::HPX::impl_finalize(); + } +#endif + #if defined( KOKKOS_ENABLE_THREADS ) if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value || @@ -331,34 +358,38 @@ void fence_internal() #if defined( KOKKOS_ENABLE_CUDA ) if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value ) { - Kokkos::Cuda::fence(); + Kokkos::Cuda::impl_static_fence(); } #endif #if defined( KOKKOS_ENABLE_ROCM ) if( std::is_same< Kokkos::Experimental::ROCm , Kokkos::DefaultExecutionSpace >::value ) { - Kokkos::Experimental::ROCm::fence(); + Kokkos::Experimental::ROCm().fence(); } #endif #if defined( KOKKOS_ENABLE_OPENMP ) if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { - Kokkos::OpenMP::fence(); + Kokkos::OpenMP::impl_static_fence(); } #endif +#if defined( KOKKOS_ENABLE_HPX ) + Kokkos::Experimental::HPX::impl_static_fence(); +#endif + #if defined( KOKKOS_ENABLE_THREADS ) if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { - Kokkos::Threads::fence(); + Kokkos::Threads::impl_static_fence(); } #endif #if defined( KOKKOS_ENABLE_SERIAL ) if( std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Serial , Kokkos::HostSpace::execution_space >::value ) { - Kokkos::Serial::fence(); + Kokkos::Serial::impl_static_fence(); } #endif @@ -708,6 +739,12 @@ void print_configuration( std::ostream & out , const bool detail ) msg << "yes" << std::endl; #else msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_HPX: "; +#ifdef KOKKOS_ENABLE_HPX + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; #endif msg << " KOKKOS_ENABLE_THREADS: "; #ifdef KOKKOS_ENABLE_THREADS @@ -957,6 +994,9 @@ void print_configuration( std::ostream & out , const bool detail ) #ifdef KOKKOS_ENABLE_OPENMP OpenMP::print_configuration(msg, detail); #endif +#ifdef KOKKOS_ENABLE_HPX + Experimental::HPX::print_configuration(msg, detail); +#endif #if defined( KOKKOS_ENABLE_THREADS ) Threads::print_configuration(msg, detail); #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_EBO.hpp b/lib/kokkos/core/src/impl/Kokkos_EBO.hpp new file mode 100644 index 0000000000..69bb74e2c5 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_EBO.hpp @@ -0,0 +1,343 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_EBO_HPP +#define KOKKOS_EBO_HPP + +//---------------------------------------------------------------------------- + +#include + +#include +//---------------------------------------------------------------------------- + + +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +struct NotOnDeviceCtorDisambiguator { }; + +template +struct NoCtorsNotOnDevice : std::false_type { }; + +template +struct DefaultCtorNotOnDevice : std::false_type { }; + +template <> +struct DefaultCtorNotOnDevice<> : std::true_type { }; + +template class CtorNotOnDevice = NoCtorsNotOnDevice> +struct EBOBaseImpl; + +template class CtorNotOnDevice> +struct EBOBaseImpl { + + /* + * Workaround for constexpr in C++11: we need to still call T(args...), but we + * can't do so in the body of a constexpr function (in C++11), and there's no + * data member to construct into. But we can construct into an argument + * of a delegating constructor... + */ + // TODO @minor DSH the destructor gets called too early with this workaround + struct _constexpr_14_workaround_tag { }; + struct _constexpr_14_workaround_no_device_tag { }; + KOKKOS_FORCEINLINE_FUNCTION + constexpr EBOBaseImpl(_constexpr_14_workaround_tag, T&&) noexcept { } + inline constexpr EBOBaseImpl(_constexpr_14_workaround_no_device_tag, T&&) noexcept { } + + template < + class... Args, + class _ignored = void, + typename std::enable_if< + std::is_void<_ignored>::value + && std::is_constructible::value + && !CtorNotOnDevice::value, + int + >::type = 0 + > + KOKKOS_FORCEINLINE_FUNCTION + constexpr explicit + EBOBaseImpl( + Args&&... args + ) noexcept(noexcept(T(std::forward(args)...))) + // still call the constructor + : EBOBaseImpl(_constexpr_14_workaround_tag{}, T(std::forward(args)...)) + { } + + template < + class... Args, + class _ignored=void, + typename std::enable_if< + std::is_void<_ignored>::value + && std::is_constructible::value + && CtorNotOnDevice::value, + long + >::type = 0 + > + inline constexpr explicit + EBOBaseImpl( + Args&&... args + ) noexcept(noexcept(T(std::forward(args)...))) + // still call the constructor + : EBOBaseImpl(_constexpr_14_workaround_no_device_tag{}, T(std::forward(args)...)) + { } + + KOKKOS_FORCEINLINE_FUNCTION + constexpr EBOBaseImpl(EBOBaseImpl const&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + constexpr EBOBaseImpl(EBOBaseImpl&&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + EBOBaseImpl& operator=(EBOBaseImpl const&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + EBOBaseImpl& operator=(EBOBaseImpl&&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + ~EBOBaseImpl() = default; + + KOKKOS_INLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + T& _ebo_data_member() & { + return *reinterpret_cast(this); + } + + KOKKOS_INLINE_FUNCTION + constexpr + T const& _ebo_data_member() const & { + return *reinterpret_cast(this); + } + + KOKKOS_INLINE_FUNCTION + T volatile& _ebo_data_member() volatile & { + return *reinterpret_cast(this); + } + + KOKKOS_INLINE_FUNCTION + T const volatile& _ebo_data_member() const volatile & { + return *reinterpret_cast(this); + } + + KOKKOS_INLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + T&& _ebo_data_member() && { + return std::move(*reinterpret_cast(this)); + } + +}; + +template class CTorsNotOnDevice> +struct EBOBaseImpl { + + T m_ebo_object; + + template < + class... Args, + class _ignored=void, + typename std::enable_if< + std::is_void<_ignored>::value + && !CTorsNotOnDevice::value + && std::is_constructible::value, + int + >::type = 0 + > + KOKKOS_FORCEINLINE_FUNCTION + constexpr explicit + EBOBaseImpl( + Args&&... args + ) noexcept(noexcept(T(std::forward(args)...))) + : m_ebo_object(std::forward(args)...) + { } + + template < + class... Args, + class _ignored=void, + typename std::enable_if< + std::is_void<_ignored>::value + && CTorsNotOnDevice::value + && std::is_constructible::value, + long + >::type = 0 + > + inline + constexpr explicit + EBOBaseImpl( + Args&&... args + ) noexcept(noexcept(T(std::forward(args)...))) + : m_ebo_object(std::forward(args)...) + { } + + + // TODO @tasking @minor DSH noexcept in the right places? + + KOKKOS_FORCEINLINE_FUNCTION + constexpr + EBOBaseImpl(EBOBaseImpl const&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + constexpr + EBOBaseImpl(EBOBaseImpl&&) noexcept = default; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + EBOBaseImpl& operator=(EBOBaseImpl const&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + EBOBaseImpl& operator=(EBOBaseImpl&&) = default; + + KOKKOS_FORCEINLINE_FUNCTION + ~EBOBaseImpl() = default; + + KOKKOS_INLINE_FUNCTION + T& _ebo_data_member() & { + return m_ebo_object; + } + + KOKKOS_INLINE_FUNCTION + T const& _ebo_data_member() const & { + return m_ebo_object; + } + + KOKKOS_INLINE_FUNCTION + T volatile& _ebo_data_member() volatile & { + return m_ebo_object; + } + + KOKKOS_INLINE_FUNCTION + T const volatile& _ebo_data_member() const volatile & { + return m_ebo_object; + } + + KOKKOS_INLINE_FUNCTION + T&& _ebo_data_member() && { + return m_ebo_object; + } + +}; + +/** + * + * @tparam T + */ +template class CtorsNotOnDevice=NoCtorsNotOnDevice> +struct StandardLayoutNoUniqueAddressMemberEmulation + : EBOBaseImpl::value, CtorsNotOnDevice> +{ +private: + + using ebo_base_t = EBOBaseImpl::value, CtorsNotOnDevice>; + +public: + + using ebo_base_t::ebo_base_t; + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + T& no_unique_address_data_member() & { + return this->ebo_base_t::_ebo_data_member(); + } + + KOKKOS_FORCEINLINE_FUNCTION + constexpr + T const& no_unique_address_data_member() const & { + return this->ebo_base_t::_ebo_data_member(); + } + + KOKKOS_FORCEINLINE_FUNCTION + T volatile& no_unique_address_data_member() volatile & { + return this->ebo_base_t::_ebo_data_member(); + } + + KOKKOS_FORCEINLINE_FUNCTION + T const volatile& no_unique_address_data_member() const volatile & { + return this->ebo_base_t::_ebo_data_member(); + } + + KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_CONSTEXPR_14 + T&& no_unique_address_data_member() && { + return this->ebo_base_t::_ebo_data_member(); + } +}; + +/** + * + * @tparam T + */ +template class CtorsNotOnDevice=NoCtorsNotOnDevice> +class NoUniqueAddressMemberEmulation + : private StandardLayoutNoUniqueAddressMemberEmulation +{ +private: + + using base_t = StandardLayoutNoUniqueAddressMemberEmulation; + +public: + + using base_t::base_t; + using base_t::no_unique_address_data_member; + +}; + + +} // end namespace Impl +} // end namespace Kokkos + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + +#endif /* #ifndef KOKKOS_EBO_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp index e7d5f9344c..3d634fe5d1 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp @@ -51,6 +51,10 @@ #include #endif +#ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE +# define KOKKOS_ABORT_MESSAGE_BUFFER_SIZE 2048 +#endif // ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE + namespace Kokkos { namespace Impl { @@ -83,6 +87,50 @@ void abort( const char * const message ) { } + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + +#if !defined(NDEBUG) || defined(KOKKOS_ENFORCE_CONTRACTS) || defined(KOKKOS_DEBUG) +# define KOKKOS_EXPECTS(...) \ + { \ + if(!bool(__VA_ARGS__)) { \ + ::Kokkos::abort( \ + "Kokkos contract violation:\n " \ + " Expected precondition `" #__VA_ARGS__ "` evaluated false." \ + ); \ + } \ + } +# define KOKKOS_ENSURES(...) \ + { \ + if(!bool(__VA_ARGS__)) { \ + ::Kokkos::abort( \ + "Kokkos contract violation:\n " \ + " Ensured postcondition `" #__VA_ARGS__ "` evaluated false." \ + ); \ + } \ + } +// some projects already define this for themselves, so don't mess them up +# ifndef KOKKOS_ASSERT +# define KOKKOS_ASSERT(...) \ + { \ + if(!bool(__VA_ARGS__)) { \ + ::Kokkos::abort( \ + "Kokkos contract violation:\n " \ + " Asserted condition `" #__VA_ARGS__ "` evaluated false." \ + ); \ + } \ + } +# endif // ifndef KOKKOS_ASSERT +#else // not debug mode +# define KOKKOS_EXPECTS(...) +# define KOKKOS_ENSURES(...) +# ifndef KOKKOS_ASSERT +# define KOKKOS_ASSERT(...) +# endif // ifndef KOKKOS_ASSERT +#endif // end debug mode ifdefs + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp b/lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp new file mode 100644 index 0000000000..3053d8d9d0 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp @@ -0,0 +1,307 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP +#define KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP + +#include +#include + +#include +#include + +namespace Kokkos { +namespace Impl { + +template < + class DeviceType, + size_t Size, + size_t Align=1, + class SizeType = typename DeviceType::execution_space::size_type +> +class FixedBlockSizeMemoryPool + : private MemorySpaceInstanceStorage +{ +public: + + using memory_space = typename DeviceType::memory_space; + using size_type = SizeType; + +private: + + using memory_space_storage_base = MemorySpaceInstanceStorage; + using tracker_type = Kokkos::Impl::SharedAllocationTracker; + using record_type = Kokkos::Impl::SharedAllocationRecord; + + struct alignas(Align) Block { union { char ignore; char data[Size]; }; }; + + static constexpr auto actual_size = sizeof(Block); + + // TODO shared allocation tracker + // TODO @optimization put the index values on different cache lines (CPU) or pages (GPU)? + + tracker_type m_tracker = { }; + size_type m_num_blocks = 0; + size_type m_first_free_idx = 0; + size_type m_last_free_idx = 0; + Kokkos::OwningRawPtr m_first_block = nullptr; + Kokkos::OwningRawPtr m_free_indices = nullptr; + + enum : size_type { IndexInUse = ~size_type(0) }; + +public: + + FixedBlockSizeMemoryPool( + memory_space const& mem_space, + size_type num_blocks + ) : memory_space_storage_base(mem_space), + m_tracker(), + m_num_blocks(num_blocks), + m_first_free_idx(0), + m_last_free_idx(num_blocks) + { + // TODO alignment? + auto block_record = record_type::allocate( + mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(Block) + ); + KOKKOS_ASSERT(intptr_t(block_record->data()) % Align == 0); + m_tracker.assign_allocated_record_to_uninitialized(block_record); + m_first_block = (Block*)block_record->data(); + + auto idx_record = record_type::allocate( + mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type) + ); + KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0); + m_tracker.assign_allocated_record_to_uninitialized(idx_record); + m_free_indices = (size_type*)idx_record->data(); + + for(size_type i = 0; i < num_blocks; ++i) { + m_free_indices[i] = i; + } + + Kokkos::memory_fence(); + } + + // For compatibility with MemoryPool<> + FixedBlockSizeMemoryPool( + memory_space const& mem_space, + size_t mempool_capacity, + unsigned, unsigned, unsigned + ) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size) + { /* forwarding ctor, must be empty */ } + + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default; + + + KOKKOS_INLINE_FUNCTION + void* allocate(size_type alloc_size) const noexcept + { + KOKKOS_EXPECTS(alloc_size <= Size); + auto free_idx_counter = Kokkos::atomic_fetch_add((volatile size_type*)&m_first_free_idx, size_type(1)); + auto free_idx_idx = free_idx_counter % m_num_blocks; + + // We don't have exclusive access to m_free_indices[free_idx_idx] because + // the allocate counter might have lapped us since we incremented it + auto current_free_idx = m_free_indices[free_idx_idx]; + size_type free_idx = IndexInUse; + free_idx = + Kokkos::atomic_compare_exchange(&m_free_indices[free_idx_idx], current_free_idx, free_idx); + Kokkos::memory_fence(); + + // TODO figure out how to decrement here? + + if(free_idx == IndexInUse) { + return nullptr; + } + else { + return (void*)&m_first_block[free_idx]; + } + } + + KOKKOS_INLINE_FUNCTION + void deallocate(void* ptr, size_type alloc_size) const noexcept + { + // figure out which block we are + auto offset = intptr_t(ptr) - intptr_t(m_first_block); + + KOKKOS_EXPECTS(offset % actual_size == 0 && offset/actual_size < m_num_blocks); + + Kokkos::memory_fence(); + auto last_idx_idx = Kokkos::atomic_fetch_add((volatile size_type*)&m_last_free_idx, size_type(1)); + last_idx_idx %= m_num_blocks; + m_free_indices[last_idx_idx] = offset / actual_size; + } + +}; + +#if 0 +template < + class DeviceType, + size_t Size, + size_t Align=1, + class SizeType = typename DeviceType::execution_space::size_type +> +class FixedBlockSizeChaseLevMemoryPool + : private MemorySpaceInstanceStorage +{ +public: + + using memory_space = typename DeviceType::memory_space; + using size_type = SizeType; + +private: + + using memory_space_storage_base = MemorySpaceInstanceStorage; + using tracker_type = Kokkos::Impl::SharedAllocationTracker; + using record_type = Kokkos::Impl::SharedAllocationRecord; + + struct alignas(Align) Block { union { char ignore; char data[Size]; }; }; + + static constexpr auto actual_size = sizeof(Block); + + tracker_type m_tracker = { }; + size_type m_num_blocks = 0; + size_type m_first_free_idx = 0; + size_type m_last_free_idx = 0; + + + enum : size_type { IndexInUse = ~size_type(0) }; + +public: + + FixedBlockSizeMemoryPool( + memory_space const& mem_space, + size_type num_blocks + ) : memory_space_storage_base(mem_space), + m_tracker(), + m_num_blocks(num_blocks), + m_first_free_idx(0), + m_last_free_idx(num_blocks) + { + // TODO alignment? + auto block_record = record_type::allocate( + mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(Block) + ); + KOKKOS_ASSERT(intptr_t(block_record->data()) % Align == 0); + m_tracker.assign_allocated_record_to_uninitialized(block_record); + m_first_block = (Block*)block_record->data(); + + auto idx_record = record_type::allocate( + mem_space, "FixedBlockSizeMemPool_blocks", num_blocks * sizeof(size_type) + ); + KOKKOS_ASSERT(intptr_t(idx_record->data()) % alignof(size_type) == 0); + m_tracker.assign_allocated_record_to_uninitialized(idx_record); + m_free_indices = (size_type*)idx_record->data(); + + for(size_type i = 0; i < num_blocks; ++i) { + m_free_indices[i] = i; + } + + Kokkos::memory_fence(); + } + + // For compatibility with MemoryPool<> + FixedBlockSizeMemoryPool( + memory_space const& mem_space, + size_t mempool_capacity, + unsigned, unsigned, unsigned + ) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size) + { /* forwarding ctor, must be empty */ } + + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default; + + + KOKKOS_INLINE_FUNCTION + void* allocate(size_type alloc_size) const noexcept + { + KOKKOS_EXPECTS(alloc_size <= Size); + auto free_idx_counter = Kokkos::atomic_fetch_add((volatile size_type*)&m_first_free_idx, size_type(1)); + auto free_idx_idx = free_idx_counter % m_num_blocks; + + // We don't have exclusive access to m_free_indices[free_idx_idx] because + // the allocate counter might have lapped us since we incremented it + auto current_free_idx = m_free_indices[free_idx_idx]; + size_type free_idx = IndexInUse; + free_idx = + Kokkos::atomic_compare_exchange(&m_free_indices[free_idx_idx], current_free_idx, free_idx); + Kokkos::memory_fence(); + + // TODO figure out how to decrement here? + + if(free_idx == IndexInUse) { + return nullptr; + } + else { + return (void*)&m_first_block[free_idx]; + } + } + + KOKKOS_INLINE_FUNCTION + void deallocate(void* ptr, size_type alloc_size) const noexcept + { + // figure out which block we are + auto offset = intptr_t(ptr) - intptr_t(m_first_block); + + KOKKOS_EXPECTS(offset % actual_size == 0 && offset/actual_size < m_num_blocks); + + Kokkos::memory_fence(); + auto last_idx_idx = Kokkos::atomic_fetch_add((volatile size_type*)&m_last_free_idx, size_type(1)); + last_idx_idx %= m_num_blocks; + m_free_indices[last_idx_idx] = offset / actual_size; + } + +}; +#endif + +} // end namespace Impl +} // end namespace Kokkos + +#endif //KOKKOS_IMPL_KOKKOS_FIXEDBUFFERMEMORYPOOL_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp index 7d4ffb85c1..ea3480b48b 100644 --- a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp @@ -1432,7 +1432,10 @@ namespace Impl { template struct JoinLambdaAdapter::enable_if( & JoinOp::join ) )> { typedef ValueType value_type; - typedef StaticAssertSame assert_value_types_match; + static_assert( + std::is_same::value, + "JoinLambdaAdapter static_assert Fail: ValueType != JoinOp::value_type"); + const JoinOp& lambda; KOKKOS_INLINE_FUNCTION JoinLambdaAdapter(const JoinOp& lambda_):lambda(lambda_) {} diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp index d8cb7593bf..848746d265 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -420,15 +420,19 @@ SharedAllocationRecord< Kokkos::HostSpace , void >::get_record( void * alloc_ptr } // Iterate records to print orphaned memory ... +#ifdef KOKKOS_DEBUG void SharedAllocationRecord< Kokkos::HostSpace , void >:: print_records( std::ostream & s , const Kokkos::HostSpace & , bool detail ) { -#ifdef KOKKOS_DEBUG SharedAllocationRecord< void , void >::print_host_accessible_records( s , "HostSpace" , & s_root_record , detail ); -#else - throw_runtime_exception("SharedAllocationRecord::print_records only works with KOKKOS_DEBUG enabled"); -#endif } +#else +void SharedAllocationRecord< Kokkos::HostSpace , void >:: +print_records( std::ostream & , const Kokkos::HostSpace & , bool ) +{ + throw_runtime_exception("SharedAllocationRecord::print_records only works with KOKKOS_DEBUG enabled"); +} +#endif } // namespace Impl } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp new file mode 100644 index 0000000000..21b95f6985 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.cpp @@ -0,0 +1,134 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include "Kokkos_Core.hpp" +#include "Kokkos_HostSpace_deepcopy.hpp" + +namespace Kokkos { + +namespace Impl { + +#ifndef KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT +#define KOKKOS_IMPL_HOST_DEEP_COPY_SERIAL_LIMIT 10*8192 +#endif + +void hostspace_parallel_deepcopy(void * dst, const void * src, ptrdiff_t n) { + if((n policy_t; + + // Both src and dst are aligned the same way with respect to 8 byte words + if(reinterpret_cast(src)%8 == reinterpret_cast(dst)%8) { + char* dst_c = reinterpret_cast(dst); + const char* src_c = reinterpret_cast(src); + int count = 0; + // get initial bytes copied + while(reinterpret_cast(dst_c)%8!=0) { + *dst_c=*src_c; + dst_c++; src_c++; count++; + } + + // copy the bulk of the data + double* dst_p = reinterpret_cast(dst_c); + const double* src_p = reinterpret_cast(src_c); + Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_double",policy_t(0,(n-count)/8),[=](const ptrdiff_t i) { + dst_p[i] = src_p[i]; + }); + + // get final data copied + dst_c += ((n-count)/8) * 8; + src_c += ((n-count)/8) * 8; + char* dst_end = reinterpret_cast(dst)+n; + while(dst_c != dst_end) { + *dst_c = *src_c; + dst_c++; src_c++; + } + return; + } + + // Both src and dst are aligned the same way with respect to 4 byte words + if(reinterpret_cast(src)%4 == reinterpret_cast(dst)%4) { + char* dst_c = reinterpret_cast(dst); + const char* src_c = reinterpret_cast(src); + int count = 0; + // get initial bytes copied + while(reinterpret_cast(dst_c)%4!=0) { + *dst_c=*src_c; + dst_c++; src_c++; count++; + } + + // copy the bulk of the data + int32_t* dst_p = reinterpret_cast(dst_c); + const int32_t* src_p = reinterpret_cast(src_c); + Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_int",policy_t(0,(n-count)/4),[=](const ptrdiff_t i) { + dst_p[i] = src_p[i]; + }); + + // get final data copied + dst_c += ((n-count)/4) * 4; + src_c += ((n-count)/4) * 4; + char* dst_end = reinterpret_cast(dst)+n; + while(dst_c != dst_end) { + *dst_c = *src_c; + dst_c++; src_c++; + } + return; + } + + // Src and dst are not aligned the same way, we can only to byte wise copy. + { + char* dst_p = reinterpret_cast(dst); + const char* src_p = reinterpret_cast(src); + Kokkos::parallel_for("Kokkos::Impl::host_space_deepcopy_char",policy_t(0,n),[=](const ptrdiff_t i) { + dst_p[i] = src_p[i]; + }); + } +} + +} // namespace Impl + +} // namespace Kokkos + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp new file mode 100644 index 0000000000..b8aea95363 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace_deepcopy.hpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#include + +namespace Kokkos { + +namespace Impl { + +void hostspace_parallel_deepcopy(void * dst, const void * src, ptrdiff_t n); + +} // namespace Impl + +} // namespace Kokkos + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index fff48e87f6..f44a13c574 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -52,6 +52,8 @@ #include #include +#include // std::numeric_limits + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -477,6 +479,9 @@ class HostThreadTeamMember { public: using scratch_memory_space = typename HostExecSpace::scratch_memory_space ; + using execution_space = HostExecSpace; + using thread_team_member = HostThreadTeamMember; + using host_thread_team_member = HostThreadTeamMember; private: @@ -490,8 +495,8 @@ public: constexpr HostThreadTeamMember( HostThreadTeamData & arg_data ) noexcept : m_scratch( arg_data.team_shared() , arg_data.team_shared_bytes() ) , m_data( arg_data ) - , m_league_rank(0) - , m_league_size(1) + , m_league_rank(arg_data.m_league_rank) + , m_league_size(arg_data.m_league_size) {} constexpr HostThreadTeamMember( HostThreadTeamData & arg_data @@ -630,6 +635,12 @@ public: KOKKOS_INLINE_FUNCTION typename std::enable_if< is_reducer< ReducerType >::value >::type team_reduce( ReducerType const & reducer ) const noexcept + { team_reduce(reducer,reducer.reference()); } + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer, typename ReducerType::value_type contribution ) const noexcept #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) { if ( 1 < m_data.m_team_size ) { @@ -640,7 +651,7 @@ public: // Non-root copies to their local buffer: /*reducer.copy( (value_type*) m_data.team_reduce_local() , reducer.data() );*/ - *((value_type*) m_data.team_reduce_local()) = reducer.reference(); + *((value_type*) m_data.team_reduce_local()) = contribution; } // Root does not overwrite shared memory until all threads arrive @@ -656,12 +667,13 @@ public: value_type * const src = (value_type*) m_data.team_member(i)->team_reduce_local(); - reducer.join( reducer.reference(), *src); + reducer.join( contribution, *src); } // Copy result to root member's buffer: // reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() ); - *((value_type*) m_data.team_reduce()) = reducer.reference(); + *((value_type*) m_data.team_reduce()) = contribution; + reducer.reference() = contribution; m_data.team_rendezvous_release(); // This thread released all other threads from 'team_rendezvous' // with a return value of 'false' @@ -670,6 +682,8 @@ public: // Copy from root member's buffer: reducer.reference() = *((value_type*) m_data.team_reduce()); } + } else { + reducer.reference() = contribution; } } #else @@ -795,50 +809,105 @@ public: namespace Kokkos { -template +template KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct > -TeamThreadRange( Impl::HostThreadTeamMember const & member - , iType const & count ) +Impl::TeamThreadRangeBoundariesStruct +TeamThreadRange( + Member const & member, + iType count, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) { return Impl::TeamThreadRangeBoundariesStruct - >(member,0,count); + (member,0,count); } -template +template KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct - < typename std::common_type< iType1, iType2 >::type - , Impl::HostThreadTeamMember > -TeamThreadRange( Impl::HostThreadTeamMember const & member - , iType1 const & begin , iType2 const & end ) +Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type< iType1, iType2 >::type, Member +> +TeamThreadRange( + Member const & member, + iType1 begin, + iType2 end, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) { return Impl::TeamThreadRangeBoundariesStruct < typename std::common_type< iType1, iType2 >::type - , Impl::HostThreadTeamMember >( member , begin , end ); + , Member >( member , begin , end ); } -template +template KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange - ( Impl::HostThreadTeamMember const & member - , const iType & count ) +Impl::TeamThreadRangeBoundariesStruct +TeamVectorRange( + Member const & member, + iType count, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) { - return Impl::ThreadVectorRangeBoundariesStruct >(member,count); + return + Impl::TeamThreadRangeBoundariesStruct + (member,0,count); } -template +template KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct > -ThreadVectorRange - ( Impl::HostThreadTeamMember const & member - , const iType & arg_begin - , const iType & arg_end ) +Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type< iType1, iType2 >::type, Member +> +TeamVectorRange( + Member const & member, + iType1 begin, + iType2 end, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) { - return Impl::ThreadVectorRangeBoundariesStruct >(member,arg_begin,arg_end); + return + Impl::TeamThreadRangeBoundariesStruct + < typename std::common_type< iType1, iType2 >::type + , Member >( member , begin , end ); +} + +template +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct +ThreadVectorRange( + Member const & member, + iType count, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) +{ + return Impl::ThreadVectorRangeBoundariesStruct(member,count); +} + +template +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct +ThreadVectorRange( + Member const & member, + iType arg_begin, + iType arg_end, + typename std::enable_if< + Impl::is_thread_team_member::value + >::type const** = nullptr +) +{ + return Impl::ThreadVectorRangeBoundariesStruct(member,arg_begin,arg_end); } //---------------------------------------------------------------------------- @@ -848,11 +917,14 @@ ThreadVectorRange * * The range [0..N) is mapped to all threads of the the calling thread team. */ -template +template KOKKOS_INLINE_FUNCTION void parallel_for - ( Impl::TeamThreadRangeBoundariesStruct > const & loop_boundaries + ( Impl::TeamThreadRangeBoundariesStruct const & loop_boundaries , Closure const & closure + , typename std::enable_if< + Impl::is_host_thread_team_member::value + >::type const** = nullptr ) { for( iType i = loop_boundaries.start @@ -862,11 +934,14 @@ void parallel_for } } -template +template KOKKOS_INLINE_FUNCTION void parallel_for - ( Impl::ThreadVectorRangeBoundariesStruct > const & loop_boundaries + ( Impl::ThreadVectorRangeBoundariesStruct const & loop_boundaries , Closure const & closure + , typename std::enable_if< + Impl::is_host_thread_team_member::value + >::type const** = nullptr ) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP @@ -881,40 +956,47 @@ void parallel_for //---------------------------------------------------------------------------- -template< typename iType, class Space, class Closure, class Reducer > +template< typename iType, class Closure, class Reducer, class Member > KOKKOS_INLINE_FUNCTION -typename std::enable_if< Kokkos::is_reducer< Reducer >::value >::type +typename std::enable_if< + Kokkos::is_reducer< Reducer >::value + && Impl::is_host_thread_team_member::value +>::type parallel_reduce - ( Impl::TeamThreadRangeBoundariesStruct > + ( Impl::TeamThreadRangeBoundariesStruct const & loop_boundaries , Closure const & closure , Reducer const & reducer ) { - reducer.init( reducer.reference() ); + typename Reducer::value_type value; + reducer.init( value ); for( iType i = loop_boundaries.start ; i < loop_boundaries.end ; i += loop_boundaries.increment ) { - closure( i , reducer.reference() ); + closure( i , value ); } - - loop_boundaries.thread.team_reduce( reducer ); + + loop_boundaries.thread.team_reduce( reducer, value ); } -template< typename iType, class Space, typename Closure, typename ValueType > +template< typename iType, typename Closure, typename ValueType, typename Member > KOKKOS_INLINE_FUNCTION -typename std::enable_if< ! Kokkos::is_reducer::value >::type +typename std::enable_if< + ! Kokkos::is_reducer::value + && Impl::is_host_thread_team_member::value +>::type parallel_reduce - ( Impl::TeamThreadRangeBoundariesStruct > + ( Impl::TeamThreadRangeBoundariesStruct const & loop_boundaries , Closure const & closure , ValueType & result ) { - Sum reducer( result ); - - reducer.init( result ); + ValueType val; + Sum reducer( val ); + reducer.init( val ); for( iType i = loop_boundaries.start ; i < loop_boundaries.end @@ -923,6 +1005,7 @@ parallel_reduce } loop_boundaries.thread.team_reduce( reducer ); + result = reducer.reference(); } /*template< typename iType, class Space @@ -958,11 +1041,14 @@ void parallel_reduce * calling thread team and a summation of val is * performed and put into result. */ -template< typename iType, class Space , class Lambda, typename ValueType > +template< typename iType, class Lambda, typename ValueType, typename Member > KOKKOS_INLINE_FUNCTION -typename std::enable_if< ! Kokkos::is_reducer::value >::type +typename std::enable_if< + ! Kokkos::is_reducer::value + && Impl::is_host_thread_team_member::value +>::type parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, ValueType& result) { @@ -974,11 +1060,14 @@ parallel_reduce } } -template< typename iType, class Space , class Lambda, typename ReducerType > +template< typename iType, class Lambda, typename ReducerType, typename Member > KOKKOS_INLINE_FUNCTION -typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +typename std::enable_if< + Kokkos::is_reducer< ReducerType >::value + && Impl::is_host_thread_team_member::value +>::type parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, + (const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { @@ -990,41 +1079,15 @@ parallel_reduce } } -/** \brief Intra-thread vector parallel_reduce. - * - * Executes lambda(iType i, ValueType & val) for each i=[0..N) - * - * The range [0..N) is mapped to all vector lanes of the the - * calling thread and a reduction of val is performed using - * JoinType(ValueType& val, const ValueType& update) - * and put into init_result. - * The input value of init_result is used as initializer for - * temporary variables of ValueType. Therefore * the input - * value should be the neutral element with respect to the - * join operation (e.g. '0 for +-' or * '1 for *'). - */ -template< typename iType, class Space - , class Lambda, class JoinType , typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& result) -{ - for( iType i = loop_boundaries.start ; - i < loop_boundaries.end ; - i += loop_boundaries.increment ) { - lambda(i,result); - } -} - //---------------------------------------------------------------------------- -template< typename iType, class Space, class Closure > +template< typename iType, class Closure, class Member > KOKKOS_INLINE_FUNCTION -void parallel_scan - ( Impl::TeamThreadRangeBoundariesStruct > const & loop_boundaries +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +parallel_scan + ( Impl::TeamThreadRangeBoundariesStruct const & loop_boundaries , Closure const & closure ) { @@ -1056,10 +1119,13 @@ void parallel_scan } -template< typename iType, class Space, class ClosureType > +template< typename iType, class ClosureType, class Member > KOKKOS_INLINE_FUNCTION -void parallel_scan - ( Impl::ThreadVectorRangeBoundariesStruct > const & loop_boundaries +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +parallel_scan + ( Impl::ThreadVectorRangeBoundariesStruct const & loop_boundaries , ClosureType const & closure ) { @@ -1083,47 +1149,65 @@ void parallel_scan //---------------------------------------------------------------------------- -template< class Space > +template< class Member > KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct > -PerTeam(const Impl::HostThreadTeamMember & member ) +Impl::ThreadSingleStruct +PerTeam( + Member const& member, + typename std::enable_if::value>::type const** = nullptr +) { - return Impl::ThreadSingleStruct >(member); + return Impl::ThreadSingleStruct(member); } -template< class Space > +template< class Member > KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct > -PerThread(const Impl::HostThreadTeamMember & member) +Impl::VectorSingleStruct +PerThread( + Member const& member, + typename std::enable_if::value>::type const** = nullptr +) { - return Impl::VectorSingleStruct >(member); + return Impl::VectorSingleStruct(member); } -template< class Space , class FunctorType > +template< class Member , class FunctorType > KOKKOS_INLINE_FUNCTION -void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember > & single , const FunctorType & functor ) +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +single( const Impl::ThreadSingleStruct & single , const FunctorType & functor ) { // 'single' does not perform a barrier. if ( single.team_member.team_rank() == 0 ) functor(); } -template< class Space , class FunctorType , typename ValueType > +template< class Member, class FunctorType , typename ValueType > KOKKOS_INLINE_FUNCTION -void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember > & single , const FunctorType & functor , ValueType & val ) +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +single( const Impl::ThreadSingleStruct & single , const FunctorType & functor , ValueType & val ) { single.team_member.team_broadcast( functor , val , 0 ); } -template< class Space , class FunctorType > +template< class Member, class FunctorType > KOKKOS_INLINE_FUNCTION -void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember > & , const FunctorType & functor ) +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +single( const Impl::VectorSingleStruct & , const FunctorType & functor ) { functor(); } -template< class Space , class FunctorType , typename ValueType > +template< class Member, class FunctorType , typename ValueType > KOKKOS_INLINE_FUNCTION -void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember > & , const FunctorType & functor , ValueType & val ) +typename std::enable_if< + Impl::is_host_thread_team_member::value +>::type +single( const Impl::VectorSingleStruct & , const FunctorType & functor , ValueType & val ) { functor(val); } diff --git a/lib/kokkos/core/src/impl/Kokkos_LIFO.hpp b/lib/kokkos/core/src/impl/Kokkos_LIFO.hpp new file mode 100644 index 0000000000..43e9783beb --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_LIFO.hpp @@ -0,0 +1,431 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_LIFO_HPP +#define KOKKOS_IMPL_LIFO_HPP + +#include +#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA + +#include + +#include +#include +#include // KOKKOS_EXPECTS +#include + +#include // atomic_compare_exchange, atomic_fence + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +struct LockBasedLIFOCommon +{ + + using value_type = T; + + using node_type = SimpleSinglyLinkedListNode<>; + + static constexpr uintptr_t LockTag = ~uintptr_t(0); + static constexpr uintptr_t EndTag = ~uintptr_t(1); + + OwningRawPtr m_head = (node_type*)EndTag; + + KOKKOS_INLINE_FUNCTION + bool _try_push_node(node_type& node) { + + KOKKOS_EXPECTS(!node.is_enqueued()); + + auto* volatile & next = LinkedListNodeAccess::next_ptr(node); + + // store the head of the queue in a local variable + auto* old_head = m_head; + + // retry until someone locks the queue or we successfully compare exchange + while (old_head != (node_type*)LockTag) { + + // TODO @tasking @memory_order DSH this should have a memory order and not a memory fence + + // set task->next to the head of the queue + next = old_head; + + // fence to emulate acquire semantics on next and release semantics on + // the store of m_head + // Do not proceed until 'next' has been stored. + Kokkos::memory_fence(); + + // store the old head + auto* const old_head_tmp = old_head; + + // attempt to swap task with the old head of the queue + // as if this were done atomically: + // if(m_head == old_head) { + // m_head = &node; + // } + // old_head = m_head; + old_head = ::Kokkos::atomic_compare_exchange(&m_head, old_head, &node); + + if(old_head_tmp == old_head) return true; + } + + // Failed, replace 'task->m_next' value since 'task' remains + // not a member of a queue. + + // TODO @tasking @memory_order DSH this should have a memory order and not a memory fence + LinkedListNodeAccess::mark_as_not_enqueued(node); + + // fence to emulate acquire semantics on next + // Do not proceed until 'next' has been stored. + ::Kokkos::memory_fence(); + + return false; + } + + bool _is_empty() const noexcept { + // TODO @tasking @memory_order DSH make this an atomic load with memory order + return (volatile node_type*)this->m_head == (node_type*)EndTag; + } + +}; + +//------------------------------------------------------------------------------ +//------------------------------------------------------------------------------ + +template +class LockBasedLIFO + : private LockBasedLIFOCommon +{ + +private: + + using base_t = LockBasedLIFOCommon; + using node_type = typename base_t::node_type; + +public: + + using value_type = typename base_t::value_type; // = T + using intrusive_node_base_type = SimpleSinglyLinkedListNode<>; + +public: + + + LockBasedLIFO() = default; + LockBasedLIFO(LockBasedLIFO const&) = delete; + LockBasedLIFO(LockBasedLIFO&&) = delete; + LockBasedLIFO& operator=(LockBasedLIFO const&) = delete; + LockBasedLIFO& operator=(LockBasedLIFO&&) = delete; + + ~LockBasedLIFO() = default; + + + bool empty() const noexcept { + // TODO @tasking @memory_order DSH memory order + return this->_is_empty(); + } + + KOKKOS_INLINE_FUNCTION + OptionalRef pop(bool abort_on_locked = false) + { + // Put this in here to avoid requiring value_type to be complete until now. + static_assert( + std::is_base_of::value, + "Intrusive linked-list value_type must be derived from intrusive_node_base_type" + ); + + // We can't use the static constexpr LockTag directly because + // atomic_compare_exchange needs to bind a reference to that, and you + // can't do that with static constexpr variables. + auto* const lock_tag = (node_type*)base_t::LockTag; + + // TODO @tasking @memory_order DSH shouldn't this be a relaxed atomic load? + // start with the return value equal to the head + auto* rv = this->m_head; + + // Retry until the lock is acquired or the queue is empty. + while(rv != (node_type*)base_t::EndTag) { + + // The only possible values for the queue are + // (1) lock, (2) end, or (3) a valid task. + // Thus zero will never appear in the queue. + // + // If queue is locked then just read by guaranteeing the CAS will fail. + KOKKOS_ASSERT(rv != nullptr); + + if(rv == lock_tag) { + // TODO @tasking @memory_order DSH this should just be an atomic load followed by a continue + // just set rv to nullptr for now, effectively turning the + // atomic_compare_exchange below into a load + rv = nullptr; + if(abort_on_locked) { + break; + } + } + + auto* const old_rv = rv; + + // TODO @tasking @memory_order DSH this should be a weak compare exchange in a loop + rv = Kokkos::atomic_compare_exchange(&(this->m_head), old_rv, lock_tag); + + if(rv == old_rv) { + // CAS succeeded and queue is locked + // + // This thread has locked the queue and removed 'rv' from the queue. + // Extract the next entry of the queue from 'rv->m_next' + // and mark 'rv' as popped from a queue by setting + // 'rv->m_next = nullptr'. + // + // Place the next entry in the head of the queue, + // which also unlocks the queue. + // + // This thread has exclusive access to + // the queue and the popped task's m_next. + + // TODO @tasking @memory_order DSH check whether the volatile is needed here + auto* volatile& next = LinkedListNodeAccess::next_ptr(*rv); //->m_next; + + // This algorithm is not lockfree because a adversarial scheduler could + // context switch this thread at this point and the rest of the threads + // calling this method would never make forward progress + + // TODO @tasking @memory_order DSH I think this needs to be a atomic store release (and the memory fence needs to be removed) + // TODO @tasking DSH prove that this doesn't need to be a volatile store + // Lock is released here + this->m_head = next; + + // Mark rv as popped by assigning nullptr to the next + LinkedListNodeAccess::mark_as_not_enqueued(*rv); + + Kokkos::memory_fence(); + + return OptionalRef{ *static_cast(rv) }; + } + + // Otherwise, the CAS got a value that didn't match (either because + // another thread locked the queue and we observed the lock tag or because + // another thread replaced the head and now we want to try to lock the + // queue with that as the popped item. Either way, try again. + } + + // Return an empty OptionalRef by calling the default constructor + return { }; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef + steal() + { + // TODO @tasking @optimization DSH do this with fewer retries + return pop(/* abort_on_locked = */ true); + } + + KOKKOS_INLINE_FUNCTION + bool push(node_type& node) + { + while(!this->_try_push_node(node)) { /* retry until success */ } + // for consistency with push interface on other queue types: + return true; + } + + KOKKOS_INLINE_FUNCTION + bool push(node_type&& node) + { + // Just forward to the lvalue version + return push(node); + } + +}; + + +/** @brief A Multiple Producer, Single Consumer Queue with some special semantics + * + * This multi-producer, single consumer queue has the following semantics: + * + * - Any number of threads may call `try_emplace`/`try_push` + * + These operations are lock-free. + * - Exactly one thread calls `consume()`, and the call occurs exactly once + * in the lifetime of the queue. + * + This operation is lock-free (and wait-free w.r.t. producers) + * - Any calls to `try_push` that happen-before the call to + * `consume()` will succeed and return an true, such that the `consume()` + * call will visit that node. + * - Any calls to `try_push` for which the single call to `consume()` + * happens-before those calls will return false and the node given as + * an argument to `try_push` will not be visited by consume() + * + * + * @tparam T The type of items in the queue + * + */ +template +class SingleConsumeOperationLIFO + : private LockBasedLIFOCommon +{ +private: + + using base_t = LockBasedLIFOCommon; + using node_type = typename base_t::node_type; + + // Allows us to reuse the existing infrastructure for + static constexpr auto ConsumedTag = base_t::LockTag; + +public: + + using value_type = typename base_t::value_type; // = T + + KOKKOS_INLINE_FUNCTION + SingleConsumeOperationLIFO() noexcept = default; + + SingleConsumeOperationLIFO(SingleConsumeOperationLIFO const&) = delete; + SingleConsumeOperationLIFO(SingleConsumeOperationLIFO&&) = delete; + SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO const&) = delete; + SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO&&) = delete; + + KOKKOS_INLINE_FUNCTION + ~SingleConsumeOperationLIFO() = default; + + KOKKOS_INLINE_FUNCTION + bool empty() const noexcept { + // TODO @tasking @memory_order DSH memory order + return this->_is_empty(); + } + + KOKKOS_INLINE_FUNCTION + bool is_consumed() const noexcept { + // TODO @tasking @memory_order DSH memory order? + return this->m_head == (node_type*)ConsumedTag; + } + + KOKKOS_INLINE_FUNCTION + bool try_push(node_type& node) + { + return this->_try_push_node(node); + // Ensures: (return value is true) || (node.is_enqueued() == false); + } + + template + KOKKOS_INLINE_FUNCTION + void consume(Function&& f) { + auto* const consumed_tag = (node_type*)ConsumedTag; + + // Swap the Consumed tag into the head of the queue: + + // (local variable used for assertion only) + // TODO @tasking @memory_order DSH this should have memory order release, I think + Kokkos::memory_fence(); + auto old_head = Kokkos::atomic_exchange(&(this->m_head), consumed_tag); + + // Assert that the queue wasn't consumed before this + // This can't be an expects clause because the acquire fence on the read + // would be a side-effect + KOKKOS_ASSERT(old_head != consumed_tag); + + // We now have exclusive access to the queue; loop over it and call + // the user function + while(old_head != (node_type*)base_t::EndTag) { + + // get the Node to make the call with + auto* call_arg = old_head; + + // advance the head + old_head = LinkedListNodeAccess::next_ptr(*old_head); + + // Mark as popped before proceeding + LinkedListNodeAccess::mark_as_not_enqueued(*call_arg); + + // Call the user function + auto& arg = *static_cast(call_arg); + f(std::move(arg)); + + } + + } + +}; + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct TaskQueueTraitsLockBased +{ + + // TODO @tasking @documentation DSH document what concepts these match + + template + using ready_queue_type = LockBasedLIFO; + + template + using waiting_queue_type = SingleConsumeOperationLIFO; + + template + using intrusive_task_base_type = + typename ready_queue_type::intrusive_node_base_type; + + static constexpr auto ready_queue_insertion_may_fail = false; + +}; + + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined KOKKOS_ENABLE_TASKDAG */ +#endif /* #ifndef KOKKOS_IMPL_LIFO_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp b/lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp new file mode 100644 index 0000000000..78a6faca90 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_LinkedListNode.hpp @@ -0,0 +1,206 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_LINKEDLISTNODE_HPP +#define KOKKOS_IMPL_LINKEDLISTNODE_HPP + +#include +#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using CUDA + +#include + +#include +#include +#include // KOKKOS_EXPECTS + +#include // atomic_compare_exchange, atomic_fence + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct LinkedListNodeAccess; + +template < + uintptr_t NotEnqueuedValue = 0, + template class PointerTemplate = std::add_pointer +> +struct SimpleSinglyLinkedListNode +{ + +private: + + using pointer_type = typename PointerTemplate::type; + + pointer_type m_next = reinterpret_cast(NotEnqueuedValue); + + // These are private because they are an implementation detail of the queue + // and should not get added to the value type's interface via the intrusive + // wrapper. + + KOKKOS_INLINE_FUNCTION + void mark_as_not_enqueued() noexcept { + // TODO @tasking @memory_order DSH make this an atomic store with memory order + m_next = (pointer_type)NotEnqueuedValue; + } + + KOKKOS_INLINE_FUNCTION + void mark_as_not_enqueued() volatile noexcept { + // TODO @tasking @memory_order DSH make this an atomic store with memory order + m_next = (pointer_type)NotEnqueuedValue; + } + + KOKKOS_INLINE_FUNCTION + pointer_type& _next_ptr() noexcept { + return m_next; + } + + KOKKOS_INLINE_FUNCTION + pointer_type volatile& _next_ptr() volatile noexcept { + return m_next; + } + + KOKKOS_INLINE_FUNCTION + pointer_type const& _next_ptr() const noexcept { + return m_next; + } + + KOKKOS_INLINE_FUNCTION + pointer_type const volatile& _next_ptr() const volatile noexcept { + return m_next; + } + + friend struct LinkedListNodeAccess; + +public: + + // KOKKOS_CONSTEXPR_14 + KOKKOS_INLINE_FUNCTION + bool is_enqueued() const noexcept { + // TODO @tasking @memory_order DSH make this an atomic load with memory order + return m_next != reinterpret_cast(NotEnqueuedValue); + } + + // KOKKOS_CONSTEXPR_14 + KOKKOS_INLINE_FUNCTION + bool is_enqueued() const volatile noexcept { + // TODO @tasking @memory_order DSH make this an atomic load with memory order + return m_next != reinterpret_cast(NotEnqueuedValue); + } + +}; + +/// Attorney for LinkedListNode, since user types inherit from it +struct LinkedListNodeAccess +{ + + template + KOKKOS_INLINE_FUNCTION + static void mark_as_not_enqueued(Node& node) noexcept { + node.mark_as_not_enqueued(); + } + + template + KOKKOS_INLINE_FUNCTION + static void mark_as_not_enqueued(Node volatile& node) noexcept { + node.mark_as_not_enqueued(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + next_ptr(Node& node) noexcept { + return node._next_ptr(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + next_ptr(Node volatile& node) noexcept { + return node._next_ptr(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + next_ptr(Node const& node) noexcept { + return node._next_ptr(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + prev_ptr(Node& node) noexcept { + return node._prev_ptr(); + } + + template + KOKKOS_INLINE_FUNCTION + static + typename Node::pointer_type& + prev_ptr(Node const& node) noexcept { + return node._prev_ptr(); + } + +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* defined KOKKOS_ENABLE_TASKDAG */ +#endif /* #ifndef KOKKOS_IMPL_LINKEDLISTNODE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp b/lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp new file mode 100644 index 0000000000..b4629df5b0 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_MemoryPoolAllocator.hpp @@ -0,0 +1,140 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP +#define KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP + +#include + +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +namespace Kokkos { +namespace Impl { + +template +class MemoryPoolAllocator { +public: + + using memory_pool = MemoryPool; + +private: + + memory_pool m_pool; + +public: + + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator() = default; + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator(MemoryPoolAllocator const&) = default; + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator(MemoryPoolAllocator&&) = default; + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator& operator=(MemoryPoolAllocator const&) = default; + KOKKOS_INLINE_FUNCTION + MemoryPoolAllocator& operator=(MemoryPoolAllocator&&) = default; + KOKKOS_INLINE_FUNCTION + ~MemoryPoolAllocator() = default; + + KOKKOS_INLINE_FUNCTION + explicit MemoryPoolAllocator(memory_pool const& arg_pool) : m_pool(arg_pool) { } + KOKKOS_INLINE_FUNCTION + explicit MemoryPoolAllocator(memory_pool&& arg_pool) : m_pool(std::move(arg_pool)) { } + +public: + + using value_type = T; + using pointer = T*; + using size_type = typename MemoryPool::memory_space::size_type; + using difference_type = typename std::make_signed::type; + + template + struct rebind { + using other = MemoryPoolAllocator; + }; + + KOKKOS_INLINE_FUNCTION + pointer allocate(size_t n) { + void* rv = m_pool.allocate(n * sizeof(T)); + if(rv == nullptr) { + Kokkos::abort("Kokkos MemoryPool allocator failed to allocate memory"); + } + return reinterpret_cast(rv); + } + + KOKKOS_INLINE_FUNCTION + void deallocate(T* ptr, size_t n) { + m_pool.deallocate(ptr, n * sizeof(T)); + } + + KOKKOS_INLINE_FUNCTION + size_type max_size() const { + return m_pool.max_block_size(); + } + + KOKKOS_INLINE_FUNCTION + bool operator==(MemoryPoolAllocator const& other) const { + return m_pool == other.m_pool; + } + + KOKKOS_INLINE_FUNCTION + bool operator!=(MemoryPoolAllocator const& other) const { + return !(*this == other); + } + +}; + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + + +#endif /* #ifndef KOKKOS_IMPL_MEMORYPOOLALLOCATOR_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp new file mode 100644 index 0000000000..ed8d2be5ae --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_MultipleTaskQueue.hpp @@ -0,0 +1,616 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP +#define KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +// A *non*-concurrent linked list of tasks that failed to be enqueued +// (We can't reuse the wait queue for this because of the semantics of that +// queue that require it to be popped exactly once, and if a task has failed +// to be enqueued, it has already been marked ready) +template +struct FailedQueueInsertionLinkedListSchedulingInfo { + using task_base_type = TaskNode; + task_base_type* next = nullptr; +}; + +struct EmptyTaskSchedulingInfo { }; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template < + class ExecSpace, + class MemorySpace, + class TaskQueueTraits, + class MemoryPool +> +class MultipleTaskQueue; + +template +struct MultipleTaskQueueTeamEntry { +public: + + using task_base_type = TaskNode; + using runnable_task_base_type = RunnableTaskBase; + using ready_queue_type = typename TaskQueueTraits::template ready_queue_type; + using task_queue_traits = TaskQueueTraits; + using task_scheduling_info_type = typename std::conditional< + TaskQueueTraits::ready_queue_insertion_may_fail, + FailedQueueInsertionLinkedListSchedulingInfo, + EmptyTaskSchedulingInfo + >::type; + +private: + + // Number of allowed priorities + static constexpr int NumPriorities = 3; + + ready_queue_type m_ready_queues[NumPriorities][2]; + + task_base_type* m_failed_heads[NumPriorities][2]; + + KOKKOS_INLINE_FUNCTION + task_base_type*& + failed_head_for(runnable_task_base_type const& task) + { + return m_failed_heads[int(task.get_priority())][int(task.get_task_type())]; + } + + template + KOKKOS_INLINE_FUNCTION + OptionalRef + _pop_failed_insertion( + int priority, TaskType type, + typename std::enable_if< + task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, + void* + >::type = nullptr + ) { + auto* rv_ptr = m_failed_heads[priority][(int)type]; + if(rv_ptr) { + m_failed_heads[priority][(int)type] = + rv_ptr->as_runnable_task() + .template scheduling_info_as() + .next; + return OptionalRef{ *rv_ptr }; + } + else { + return OptionalRef{ nullptr }; + } + } + + template + KOKKOS_INLINE_FUNCTION + OptionalRef + _pop_failed_insertion( + int priority, TaskType type, + typename std::enable_if< + not task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, + void* + >::type = nullptr + ) { + return OptionalRef{ nullptr }; + } + +public: + + KOKKOS_INLINE_FUNCTION + MultipleTaskQueueTeamEntry() { + for(int iPriority = 0; iPriority < NumPriorities; ++iPriority) { + for(int iType = 0; iType < 2; ++iType) { + m_failed_heads[iPriority][iType] = nullptr; + } + } + } + + + KOKKOS_INLINE_FUNCTION + OptionalRef + try_to_steal_ready_task() + { + auto return_value = OptionalRef{}; + // prefer lower priority tasks when stealing + for(int i_priority = NumPriorities-1; i_priority >= 0; --i_priority) { + // Check for a single task with this priority + return_value = m_ready_queues[i_priority][TaskSingle].steal(); + if(return_value) return return_value; + + // Check for a team task with this priority + return_value = m_ready_queues[i_priority][TaskTeam].steal(); + if(return_value) return return_value; + + } + return return_value; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef + pop_ready_task() + { + auto return_value = OptionalRef{}; + for(int i_priority = 0; i_priority < NumPriorities; ++i_priority) { + return_value = _pop_failed_insertion(i_priority, TaskTeam); + if(not return_value) return_value = m_ready_queues[i_priority][TaskTeam].pop(); + if(return_value) return return_value; + + // Check for a single task with this priority + return_value = _pop_failed_insertion(i_priority, TaskSingle); + if(not return_value) return_value = m_ready_queues[i_priority][TaskSingle].pop(); + if(return_value) return return_value; + } + return return_value; + } + + KOKKOS_INLINE_FUNCTION + ready_queue_type& + team_queue_for(runnable_task_base_type const& task) + { + return m_ready_queues[int(task.get_priority())][int(task.get_task_type())]; + } + + + template + KOKKOS_INLINE_FUNCTION + void do_handle_failed_insertion( + runnable_task_base_type&& task, + typename std::enable_if< + task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, + void* + >::type = nullptr + ) + { + // failed insertions, if they happen, must be from the only thread that + // is allowed to push to m_ready_queues, so this linked-list insertion is not + // concurrent + auto& node = task.template scheduling_info_as(); + auto*& head = failed_head_for(task); + node.next = head; + head = &task; + } + + template + KOKKOS_INLINE_FUNCTION + void do_handle_failed_insertion( + runnable_task_base_type&& task, + typename std::enable_if< + not task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, + void* + >::type = nullptr + ) + { + Kokkos::abort("should be unreachable!"); + } + + + template + KOKKOS_INLINE_FUNCTION + void + flush_failed_insertions( + int priority, + int task_type, + typename std::enable_if< + task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, // just to make this dependent on template parameter + int + >::type = 0 + ) { + // TODO @tasking @minor DSH this somethimes gets some things out of LIFO order, which may be undesirable (but not a bug) + + + auto*& failed_head = m_failed_heads[priority][task_type]; + auto& team_queue = m_ready_queues[priority][task_type]; + + while(failed_head != nullptr) { + bool success = team_queue.push(*failed_head); + if(success) { + // Step to the next linked list element + failed_head = failed_head->as_runnable_task() + .template scheduling_info_as().next; + } + else { + // no more room, stop traversing and leave the head where it is + break; + } + } + } + + + template + KOKKOS_INLINE_FUNCTION + void + flush_failed_insertions( + int, int, + typename std::enable_if< + not task_queue_traits::ready_queue_insertion_may_fail + and std::is_void<_always_void>::value, // just to make this dependent on template parameter + int + >::type = 0 + ) { } + + + KOKKOS_INLINE_FUNCTION + void + flush_all_failed_insertions() { + for(int iPriority = 0; iPriority < NumPriorities; ++iPriority) { + flush_failed_insertions(iPriority, (int)TaskType::TaskTeam); + flush_failed_insertions(iPriority, (int)TaskType::TaskSingle); + } + } + + + template + KOKKOS_INLINE_FUNCTION + void + do_schedule_runnable( + MultipleTaskQueue& queue, + RunnableTaskBase&& task, + TeamSchedulerInfo const& info + + ) { + // Push on any nodes that failed to enqueue + auto& team_queue = team_queue_for(task); + auto priority = task.get_priority(); + auto task_type = task.get_task_type(); + + // First schedule the task + queue.schedule_runnable_to_queue( + std::move(task), + team_queue, + info + ); + + // Task may be enqueued and may be run at any point; don't touch it (hence + // the use of move semantics) + flush_failed_insertions((int)priority, (int)task_type); + } + + + +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template < + class ExecSpace, + class MemorySpace, + class TaskQueueTraits, + class MemoryPool +> +class MultipleTaskQueue final + : public TaskQueueMemoryManager, + public TaskQueueCommonMixin>, + private ObjectWithVLAEmulation< + MultipleTaskQueue, + MultipleTaskQueueTeamEntry + > +{ +public: + + using task_queue_type = MultipleTaskQueue; // mark as task_queue concept + using task_queue_traits = TaskQueueTraits; + using task_base_type = TaskNode; + using ready_queue_type = typename TaskQueueTraits::template ready_queue_type; + +private: + + using base_t = TaskQueueMemoryManager; + using common_mixin_t = TaskQueueCommonMixin; + using vla_emulation_base_t = ObjectWithVLAEmulation< + MultipleTaskQueue, + MultipleTaskQueueTeamEntry + >; + + // Allow private inheritance from ObjectWithVLAEmulation + friend struct VLAEmulationAccess; + +public: + + struct SchedulerInfo { + using team_queue_id_t = int32_t; + static constexpr team_queue_id_t NoAssociatedTeam = -1; + team_queue_id_t team_association = NoAssociatedTeam; + + using scheduler_info_type = SchedulerInfo; + + KOKKOS_INLINE_FUNCTION + constexpr explicit SchedulerInfo(team_queue_id_t association) noexcept + : team_association(association) + { } + + KOKKOS_INLINE_FUNCTION + SchedulerInfo() = default; + + KOKKOS_INLINE_FUNCTION + SchedulerInfo(SchedulerInfo const&) = default; + + KOKKOS_INLINE_FUNCTION + SchedulerInfo(SchedulerInfo&&) = default; + + KOKKOS_INLINE_FUNCTION + SchedulerInfo& operator=(SchedulerInfo const&) = default; + + KOKKOS_INLINE_FUNCTION + SchedulerInfo& operator=(SchedulerInfo&&) = default; + + KOKKOS_INLINE_FUNCTION + ~SchedulerInfo() = default; + + }; + + using task_scheduling_info_type = typename std::conditional< + TaskQueueTraits::ready_queue_insertion_may_fail, + FailedQueueInsertionLinkedListSchedulingInfo, + EmptyTaskSchedulingInfo + >::type; + using team_scheduler_info_type = SchedulerInfo; + + using runnable_task_base_type = RunnableTaskBase; + + template + // requires TaskScheduler && TaskFunctor + using runnable_task_type = RunnableTask< + task_queue_traits, Scheduler, typename Functor::value_type, Functor + >; + + using aggregate_task_type = AggregateTask; + + // Number of allowed priorities + static constexpr int NumPriorities = 3; + + KOKKOS_INLINE_FUNCTION + constexpr typename vla_emulation_base_t::vla_entry_count_type + n_queues() const noexcept { return this->n_vla_entries(); } + +public: + + //---------------------------------------------------------------------------- + // {{{2 + + MultipleTaskQueue() = delete; + MultipleTaskQueue(MultipleTaskQueue const&) = delete; + MultipleTaskQueue(MultipleTaskQueue&&) = delete; + MultipleTaskQueue& operator=(MultipleTaskQueue const&) = delete; + MultipleTaskQueue& operator=(MultipleTaskQueue&&) = delete; + + MultipleTaskQueue( + typename base_t::execution_space const& arg_execution_space, + typename base_t::memory_space const&, + typename base_t::memory_pool const& arg_memory_pool + ) : base_t(arg_memory_pool), + vla_emulation_base_t( + Impl::TaskQueueSpecialization< + // TODO @tasking @generalization DSH avoid referencing SimpleTaskScheduler directly? + SimpleTaskScheduler + >::get_max_team_count(arg_execution_space) + ) + { } + + // end Constructors, destructors, and assignment }}}2 + //---------------------------------------------------------------------------- + + KOKKOS_FUNCTION + void + schedule_runnable( + runnable_task_base_type&& task, + team_scheduler_info_type const& info + ) { + auto team_association = info.team_association; + // Should only not be assigned if this is a host spawn... + if(team_association == team_scheduler_info_type::NoAssociatedTeam) { + team_association = 0; + } + this->vla_value_at(team_association).do_schedule_runnable(*this, std::move(task), info); + // Task may be enqueued and may be run at any point; don't touch it (hence + // the use of move semantics) + } + + KOKKOS_FUNCTION + OptionalRef + pop_ready_task( + team_scheduler_info_type const& info + ) + { + KOKKOS_EXPECTS(info.team_association != team_scheduler_info_type::NoAssociatedTeam); + + auto return_value = OptionalRef{}; + auto team_association = info.team_association; + + // always loop in order of priority first, then prefer team tasks over single tasks + auto& team_queue_info = this->vla_value_at(team_association); + + if(task_queue_traits::ready_queue_insertion_may_fail) { + team_queue_info.flush_all_failed_insertions(); + } + + return_value = team_queue_info.pop_ready_task(); + + if(not return_value) { + + // loop through the rest of the teams and try to steal + for( + auto isteal = (team_association + 1) % this->n_queues(); + isteal != team_association; + isteal = (isteal + 1) % this->n_queues() + ) { + return_value = this->vla_value_at(isteal).try_to_steal_ready_task(); + if(return_value) { break; } + } + + // Note that this is where we'd update the task's scheduling info + } + // if nothing was found, return a default-constructed (empty) OptionalRef + return return_value; + } + + + // TODO @tasking @generalization DSH make this a property-based customization point + KOKKOS_INLINE_FUNCTION + team_scheduler_info_type + initial_team_scheduler_info(int rank_in_league) const noexcept { + return team_scheduler_info_type{ + typename team_scheduler_info_type::team_queue_id_t(rank_in_league % n_queues()) + }; + } + + // TODO @tasking @generalization DSH make this a property-based customization point + static /* KOKKOS_CONSTEXPR_14 */ size_t + task_queue_allocation_size( + typename base_t::execution_space const& exec_space, + typename base_t::memory_space const&, + typename base_t::memory_pool const& + ) + { + using specialization = + Impl::TaskQueueSpecialization< + // TODO @tasking @generalization DSH avoid referencing SimpleTaskScheduler directly? + SimpleTaskScheduler + >; + + return vla_emulation_base_t::required_allocation_size( + /* num_vla_entries = */ specialization::get_max_team_count(exec_space) + ); + } + + // Provide a sensible default that can be overridden + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + runnable_task_base_type& ready_task, + runnable_task_base_type const& predecessor + ) const + { + // Do nothing; we're using the extra storage for the failure linked list + } + + // Provide a sensible default that can be overridden + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + aggregate_task_type& aggregate, + runnable_task_base_type const& predecessor + ) const + { + // Do nothing; we're using the extra storage for the failure linked list + } + + // Provide a sensible default that can be overridden + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + aggregate_task_type& aggregate, + aggregate_task_type const& predecessor + ) const + { + // Do nothing; we're using the extra storage for the failure linked list + } + + // Provide a sensible default that can be overridden + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + runnable_task_base_type& ready_task, + aggregate_task_type const& predecessor + ) const + { + // Do nothing; we're using the extra storage for the failure linked list + } + + KOKKOS_INLINE_FUNCTION + void + handle_failed_ready_queue_insertion( + runnable_task_base_type&& task, + ready_queue_type&, + team_scheduler_info_type const& info + ) { + KOKKOS_EXPECTS(info.team_association != team_scheduler_info_type::NoAssociatedTeam); + + this->vla_value_at(info.team_association).do_handle_failed_insertion( + std::move(task) + ); + } +}; + + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_MULTIPLETASKQUEUE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp b/lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp new file mode 100644 index 0000000000..bf83d1831c --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_OptionalRef.hpp @@ -0,0 +1,242 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_OPTIONALREF_HPP +#define KOKKOS_IMPL_OPTIONALREF_HPP + +#include + +#include + +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +namespace Kokkos { +namespace Impl { + +struct InPlaceTag { }; + +template +struct OptionalRef { +private: + + ObservingRawPtr m_value = nullptr; + +public: + + using value_type = T; + + KOKKOS_INLINE_FUNCTION + OptionalRef() = default; + + KOKKOS_INLINE_FUNCTION + OptionalRef(OptionalRef const&) = default; + + KOKKOS_INLINE_FUNCTION + OptionalRef(OptionalRef&&) = default; + + KOKKOS_INLINE_FUNCTION + OptionalRef& operator=(OptionalRef const&) = default; + + KOKKOS_INLINE_FUNCTION + // Can't return a reference to volatile OptionalRef, since GCC issues a warning about + // reference to volatile not accessing the underlying value + void + operator=(OptionalRef const volatile& other) volatile noexcept + { + m_value = other.m_value; + } + + KOKKOS_INLINE_FUNCTION + OptionalRef& operator=(OptionalRef&&) = default; + + KOKKOS_INLINE_FUNCTION + ~OptionalRef() = default; + + KOKKOS_INLINE_FUNCTION + explicit OptionalRef(T& arg_value) : m_value(&arg_value) { } + + KOKKOS_INLINE_FUNCTION + explicit OptionalRef(std::nullptr_t) : m_value(nullptr) { } + + KOKKOS_INLINE_FUNCTION + OptionalRef& operator=(T& arg_value) { m_value = &arg_value; return *this; } + + KOKKOS_INLINE_FUNCTION + OptionalRef& operator=(std::nullptr_t) { m_value = nullptr; return *this; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + OptionalRef::type> + as_volatile() volatile noexcept { + return + OptionalRef::type>(*(*this)); + } + + KOKKOS_INLINE_FUNCTION + OptionalRef::type>::type> + as_volatile() const volatile noexcept { + return + OptionalRef::type>::type>(*(*this)); + } + + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + T& operator*() & { + KOKKOS_EXPECTS(this->has_value()); + return *m_value; + } + + KOKKOS_INLINE_FUNCTION + T const& operator*() const & { + KOKKOS_EXPECTS(this->has_value()); + return *m_value; + } + + KOKKOS_INLINE_FUNCTION + T volatile& operator*() volatile & { + KOKKOS_EXPECTS(this->has_value()); + return *m_value; + } + + KOKKOS_INLINE_FUNCTION + T const volatile& operator*() const volatile & { + KOKKOS_EXPECTS(this->has_value()); + return *m_value; + } + + KOKKOS_INLINE_FUNCTION + T&& operator*() && { + KOKKOS_EXPECTS(this->has_value()); + return std::move(*m_value); + } + + KOKKOS_INLINE_FUNCTION + T* operator->() { + KOKKOS_EXPECTS(this->has_value()); + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T const* operator->() const { + KOKKOS_EXPECTS(this->has_value()); + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T volatile* operator->() volatile { + KOKKOS_EXPECTS(this->has_value()); + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T const volatile* operator->() const volatile { + KOKKOS_EXPECTS(this->has_value()); + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T* get() { + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T const* get() const { + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T volatile* get() volatile { + return m_value; + } + + KOKKOS_INLINE_FUNCTION + T const volatile* get() const volatile { + return m_value; + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + operator bool() { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + operator bool() const { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + operator bool() volatile { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + operator bool() const volatile { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + bool has_value() { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + bool has_value() const { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + bool has_value() volatile { return m_value != nullptr; } + + KOKKOS_INLINE_FUNCTION + bool has_value() const volatile { return m_value != nullptr; } + +}; + +} // end namespace Impl +} // end namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + + +#endif /* #ifndef KOKKOS_IMPL_OPTIONALREF_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp index d84a854622..687a0e9c37 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -55,104 +55,7 @@ namespace Kokkos { namespace Impl { -template class TaskQueue< Kokkos::Serial > ; - -void TaskQueueSpecialization< Kokkos::Serial >::execute - ( TaskQueue< Kokkos::Serial > * const queue ) -{ - using exec_space = Kokkos::Serial ; - using tqs_queue_type = TaskQueue< exec_space > ; - using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< exec_space > ; - - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - - // Set default buffers - serial_resize_thread_team_data( 0 /* global reduce buffer */ - , 512 /* team reduce buffer */ - , 0 /* team shared buffer */ - , 0 /* thread local buffer */ - ); - - Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); - - Member exec( *data ); - - // Loop until all queues are empty - while ( 0 < queue->m_ready_count ) { - - task_root_type * task = end ; - - for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] ); - } - } - - if ( end != task ) { - - // pop_ready_task resulted in lock == task->m_next - // In the executing state - - (*task->m_apply)( task , & exec ); - -#if 0 - printf( "TaskQueue::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n" - , uintptr_t(task) - , uintptr_t(task->m_wait) - , uintptr_t(task->m_next) - , task->m_task_type - , task->m_priority - , task->m_ref_count ); -#endif - - // If a respawn then re-enqueue otherwise the task is complete - // and all tasks waiting on this task are updated. - queue->complete( task ); - } - else if ( 0 != queue->m_ready_count ) { - Kokkos::abort("TaskQueue::execute ERROR: ready_count"); - } - } -} - -void TaskQueueSpecialization< Kokkos::Serial > :: - iff_single_thread_recursive_execute( - TaskQueue< Kokkos::Serial > * const queue ) -{ - using exec_space = Kokkos::Serial ; - using tqs_queue_type = TaskQueue< exec_space > ; - using task_root_type = TaskBase< void , void , void > ; - using Member = Impl::HostThreadTeamMember< exec_space > ; - - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - - Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); - - Member exec( *data ); - - // Loop until no runnable task - - task_root_type * task = end ; - - do { - - task = end ; - - for ( int i = 0 ; i < tqs_queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = tqs_queue_type::pop_ready_task( & queue->m_ready[i][j] ); - } - } - - if ( end == task ) break ; - - (*task->m_apply)( task , & exec ); - - queue->complete( task ); - - } while(1); -} +template class TaskQueue; }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp index 2fec5dfb89..c379a12fb1 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp @@ -47,7 +47,11 @@ #include #if defined( KOKKOS_ENABLE_TASKDAG ) +#include + #include +#include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -55,32 +59,217 @@ namespace Kokkos { namespace Impl { -//---------------------------------------------------------------------------- - -template<> -class TaskQueueSpecialization< Kokkos::Serial > +template +class TaskQueueSpecialization< + SimpleTaskScheduler +> { public: - using execution_space = Kokkos::Serial ; - using memory_space = Kokkos::HostSpace ; - using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; - using task_base_type = Kokkos::Impl::TaskBase< void , void , void > ; - using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; + // Note: Scheduler may be an incomplete type at class scope (but not inside + // of the methods, obviously) + + using execution_space = Kokkos::Serial; + using memory_space = Kokkos::HostSpace; + using scheduler_type = SimpleTaskScheduler; + using member_type = TaskTeamMemberAdapter< + HostThreadTeamMember, scheduler_type + >; static - void iff_single_thread_recursive_execute( queue_type * const ); + void execute(scheduler_type const& scheduler) + { + using task_base_type = typename scheduler_type::task_base_type; - static - void execute( queue_type * const ); + // Set default buffers + serial_resize_thread_team_data( + 0, /* global reduce buffer */ + 512, /* team reduce buffer */ + 0, /* team shared buffer */ + 0 /* thread local buffer */ + ); - template< typename TaskType > - static - typename TaskType::function_type - get_function_pointer() { return TaskType::apply ; } + Impl::HostThreadTeamData& self = *Impl::serial_get_thread_team_data(); + + auto& queue = scheduler.queue(); + auto team_scheduler = scheduler.get_team_scheduler(0); + + member_type member(scheduler, self); + + auto current_task = OptionalRef(nullptr); + + while(not queue.is_done()) { + + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. + + // pop a task off + current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info()); + + // run the task + if(current_task) { + current_task->as_runnable_task().run(member); + // Respawns are handled in the complete function + queue.complete( + (*std::move(current_task)).as_runnable_task(), + team_scheduler.team_scheduler_info() + ); + } + + } + + } + + static constexpr uint32_t + get_max_team_count(execution_space const&) noexcept + { + return 1; + } + + template + static void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) + { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } }; -extern template class TaskQueue< Kokkos::Serial > ; +//---------------------------------------------------------------------------- + +template +class TaskQueueSpecializationConstrained< + Scheduler, + typename std::enable_if< + std::is_same::value + >::type +> +{ +public: + + // Note: Scheduler may be an incomplete type at class scope (but not inside + // of the methods, obviously) + + using execution_space = Kokkos::Serial; + using memory_space = Kokkos::HostSpace; + using scheduler_type = Scheduler; + using member_type = TaskTeamMemberAdapter< + HostThreadTeamMember, scheduler_type + >; + + static + void iff_single_thread_recursive_execute(scheduler_type const& scheduler) { + using task_base_type = TaskBase; + using queue_type = typename scheduler_type::queue_type; + + task_base_type * const end = (task_base_type *) task_base_type::EndTag ; + + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); + + member_type exec( scheduler, *data ); + + // Loop until no runnable task + + task_base_type * task = end ; + + auto* const queue = scheduler.m_queue; + + do { + + task = end ; + + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task , & exec ); + + queue->complete( task ); + + } while(1); + + } + + static + void execute(scheduler_type const& scheduler) + { + using task_base_type = TaskBase; + using queue_type = typename scheduler_type::queue_type; + + task_base_type * const end = (task_base_type *) task_base_type::EndTag ; + + // Set default buffers + serial_resize_thread_team_data( + 0, /* global reduce buffer */ + 512, /* team reduce buffer */ + 0, /* team shared buffer */ + 0 /* thread local buffer */ + ); + + auto* const queue = scheduler.m_queue; + + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); + + member_type exec( scheduler, *data ); + + // Loop until all queues are empty + while ( 0 < queue->m_ready_count ) { + + task_base_type * task = end ; + + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); + } + } + + if ( end != task ) { + + // pop_ready_task resulted in lock == task->m_next + // In the executing state + + (*task->m_apply)( task , & exec ); + +#if 0 + printf( "TaskQueue::executed: 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , task->m_task_type + , task->m_priority + , task->m_ref_count ); +#endif + + // If a respawn then re-enqueue otherwise the task is complete + // and all tasks waiting on this task are updated. + queue->complete( task ); + } + else if ( 0 != queue->m_ready_count ) { + Kokkos::abort("TaskQueue::execute ERROR: ready_count"); + } + } + } + + template + static void + get_function_pointer( + typename TaskType::function_type& ptr, + typename TaskType::destroy_type& dtor + ) + { + ptr = TaskType::apply; + dtor = TaskType::destroy; + } +}; + +extern template class TaskQueue< Kokkos::Serial, typename Kokkos::Serial::memory_space > ; }} /* namespace Kokkos::Impl */ diff --git a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp index 658f1db06b..77eb69d081 100644 --- a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp @@ -48,11 +48,11 @@ namespace Impl { __thread int SharedAllocationRecord::t_tracking_enabled = 1; +#ifdef KOKKOS_DEBUG bool SharedAllocationRecord< void , void >:: is_sane( SharedAllocationRecord< void , void > * arg_record ) { -#ifdef KOKKOS_DEBUG SharedAllocationRecord * const root = arg_record ? arg_record->m_root : 0 ; bool ok = root != 0 && root->use_count() == 0 ; @@ -102,16 +102,23 @@ is_sane( SharedAllocationRecord< void , void > * arg_record ) } } return ok ; -#else - Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled"); - return false ; -#endif } +#else + +bool +SharedAllocationRecord< void , void >:: +is_sane( SharedAllocationRecord< void , void > * ) +{ + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::is_sane only works with KOKKOS_DEBUG enabled"); + return false ; +} +#endif //#ifdef KOKKOS_DEBUG + +#ifdef KOKKOS_DEBUG SharedAllocationRecord * SharedAllocationRecord::find( SharedAllocationRecord * const arg_root , void * const arg_data_ptr ) { -#ifdef KOKKOS_DEBUG SharedAllocationRecord * root_next = 0 ; static constexpr SharedAllocationRecord * zero = nullptr; @@ -130,11 +137,15 @@ SharedAllocationRecord::find( SharedAllocationRecord * con Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed locking/unlocking"); } return r ; +} #else +SharedAllocationRecord * +SharedAllocationRecord::find( SharedAllocationRecord * const , void * const ) +{ Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord::find only works with KOKKOS_DEBUG enabled"); return nullptr; -#endif } +#endif /**\brief Construct and insert into 'arg_root' tracking set. @@ -271,6 +282,7 @@ decrement( SharedAllocationRecord< void , void > * arg_record ) return arg_record ; } +#ifdef KOKKOS_DEBUG void SharedAllocationRecord< void , void >:: print_host_accessible_records( std::ostream & s @@ -278,7 +290,6 @@ print_host_accessible_records( std::ostream & s , const SharedAllocationRecord * const root , const bool detail ) { -#ifdef KOKKOS_DEBUG const SharedAllocationRecord< void , void > * r = root ; char buffer[256] ; @@ -339,12 +350,20 @@ print_host_accessible_records( std::ostream & s r = r->m_next ; } while ( r != root ); } +} #else +void +SharedAllocationRecord< void , void >:: +print_host_accessible_records( std::ostream & + , const char * const + , const SharedAllocationRecord * const + , const bool ) +{ Kokkos::Impl::throw_runtime_exception( "Kokkos::Impl::SharedAllocationRecord::print_host_accessible_records" " only works with KOKKOS_DEBUG enabled"); -#endif } +#endif } /* namespace Impl */ } /* namespace Kokkos */ diff --git a/lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp b/lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp new file mode 100644 index 0000000000..c2dbc96814 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_SimpleTaskScheduler.hpp @@ -0,0 +1,646 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SIMPLETASKSCHEDULER_HPP +#define KOKKOS_SIMPLETASKSCHEDULER_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include +//---------------------------------------------------------------------------- + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +// TODO @tasking @cleanup move this +template +struct DefaultDestroy { + T* managed_object; + KOKKOS_FUNCTION + void destroy_shared_allocation() { + managed_object->~T(); + } +}; + + +template +class ExecutionSpaceInstanceStorage + : private NoUniqueAddressMemberEmulation +{ +private: + + using base_t = NoUniqueAddressMemberEmulation; + +protected: + + constexpr explicit + ExecutionSpaceInstanceStorage() + : base_t() + { } + + KOKKOS_INLINE_FUNCTION + constexpr explicit + ExecutionSpaceInstanceStorage(ExecutionSpace const& arg_execution_space) + : base_t(arg_execution_space) + { } + + KOKKOS_INLINE_FUNCTION + constexpr explicit + ExecutionSpaceInstanceStorage(ExecutionSpace&& arg_execution_space) + : base_t(std::move(arg_execution_space)) + { } + + KOKKOS_INLINE_FUNCTION + ExecutionSpace& execution_space_instance() & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + ExecutionSpace const& execution_space_instance() const & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + ExecutionSpace&& execution_space_instance() && + { + return std::move(*this).no_unique_address_data_member(); + } +}; + + +template +class MemorySpaceInstanceStorage + : private NoUniqueAddressMemberEmulation +{ +private: + + using base_t = NoUniqueAddressMemberEmulation; + +protected: + + MemorySpaceInstanceStorage() + : base_t() + { } + + KOKKOS_INLINE_FUNCTION + MemorySpaceInstanceStorage(MemorySpace const& arg_memory_space) + : base_t(arg_memory_space) + { } + + KOKKOS_INLINE_FUNCTION + constexpr explicit + MemorySpaceInstanceStorage(MemorySpace&& arg_memory_space) + : base_t(arg_memory_space) + { } + + KOKKOS_INLINE_FUNCTION + MemorySpace& memory_space_instance() & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + MemorySpace const& memory_space_instance() const & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + MemorySpace&& memory_space_instance() && + { + return std::move(*this).no_unique_address_data_member(); + } +}; + +} // end namespace Impl + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template + // requires ExecutionSpace && TaskQueue +class SimpleTaskScheduler + : public Impl::TaskSchedulerBase, + private Impl::ExecutionSpaceInstanceStorage, + private Impl::MemorySpaceInstanceStorage, + private Impl::NoUniqueAddressMemberEmulation +{ +public: + // TODO @tasking @generalization (maybe?) don't force QueueType to be complete here + + using scheduler_type = SimpleTaskScheduler; // tag as scheduler concept + using execution_space = ExecSpace; + using task_queue_type = QueueType; + using memory_space = typename task_queue_type::memory_space; + using memory_pool = typename task_queue_type::memory_pool; + + using team_scheduler_info_type = typename task_queue_type::team_scheduler_info_type; + using task_scheduling_info_type = typename task_queue_type::task_scheduling_info_type; + using specialization = Impl::TaskQueueSpecialization; + using member_type = typename specialization::member_type; + + template + using runnable_task_type = typename QueueType::template runnable_task_type; + + using task_base_type = typename task_queue_type::task_base_type; + using runnable_task_base_type = typename task_queue_type::runnable_task_base_type; + + using task_queue_traits = typename QueueType::task_queue_traits; + + template + using future_type = Kokkos::BasicFuture; + template + using future_type_for_functor = future_type; + +private: + + template + friend class BasicFuture; + + using track_type = Kokkos::Impl::SharedAllocationTracker; + using execution_space_storage = Impl::ExecutionSpaceInstanceStorage; + using memory_space_storage = Impl::MemorySpaceInstanceStorage; + using team_scheduler_info_storage = Impl::NoUniqueAddressMemberEmulation; + + track_type m_track; + task_queue_type* m_queue = nullptr; + + KOKKOS_INLINE_FUNCTION + static constexpr task_base_type* _get_task_ptr(std::nullptr_t) { return nullptr; } + + template + KOKKOS_INLINE_FUNCTION + static constexpr task_base_type* _get_task_ptr(future_type&& f) + { + return f.m_task; + } + + template < + int TaskEnum, + class DepTaskType, + class FunctorType + > + KOKKOS_FUNCTION + future_type_for_functor::type> + _spawn_impl( + DepTaskType arg_predecessor_task, + TaskPriority arg_priority, + typename runnable_task_base_type::function_type apply_function_ptr, + typename runnable_task_base_type::destroy_type destroy_function_ptr, + FunctorType&& functor + ) + { + KOKKOS_EXPECTS(m_queue != nullptr); + + using functor_future_type = future_type_for_functor::type>; + using task_type = typename task_queue_type::template runnable_task_type< + FunctorType, scheduler_type + >; + + // Reference count starts at two: + // +1 for the matching decrement when task is complete + // +1 for the future + auto& runnable_task = *m_queue->template allocate_and_construct( + /* functor = */ std::forward(functor), + /* apply_function_ptr = */ apply_function_ptr, + /* task_type = */ static_cast(TaskEnum), + /* priority = */ arg_priority, + /* queue_base = */ m_queue, + /* initial_reference_count = */ 2 + ); + + if(arg_predecessor_task != nullptr) { + m_queue->initialize_scheduling_info_from_predecessor( + runnable_task, *arg_predecessor_task + ); + runnable_task.set_predecessor(*arg_predecessor_task); + arg_predecessor_task->decrement_and_check_reference_count(); + } + else { + m_queue->initialize_scheduling_info_from_team_scheduler_info( + runnable_task, team_scheduler_info() + ); + } + + auto rv = functor_future_type(&runnable_task); + + Kokkos::memory_fence(); // fence to ensure dependent stores are visible + + m_queue->schedule_runnable( + std::move(runnable_task), + team_scheduler_info() + ); + // note that task may be already completed even here, so don't touch it again + + return rv; + } + + +public: + + //---------------------------------------------------------------------------- + // {{{2 + + SimpleTaskScheduler() = default; + + explicit + SimpleTaskScheduler( + execution_space const& arg_execution_space, + memory_space const& arg_memory_space, + memory_pool const& arg_memory_pool + ) : execution_space_storage(arg_execution_space), + memory_space_storage(arg_memory_space) + { + // Ask the task queue how much space it needs (usually will just be + // sizeof(task_queue_type), but some queues may need additional storage + // dependent on runtime conditions or properties of the execution space) + auto const allocation_size = task_queue_type::task_queue_allocation_size( + arg_execution_space, + arg_memory_space, + arg_memory_pool + ); + + // TODO @tasking @generalization DSH better encapsulation of the SharedAllocationRecord pattern + using record_type = Impl::SharedAllocationRecord< + memory_space, Impl::DefaultDestroy + >; + + // Allocate space for the task queue + auto* record = record_type::allocate( + memory_space(), "TaskQueue", allocation_size + ); + m_queue = new (record->data()) task_queue_type( + arg_execution_space, + arg_memory_space, + arg_memory_pool + ); + record->m_destroy.managed_object = m_queue; + m_track.assign_allocated_record_to_uninitialized(record); + } + + explicit + SimpleTaskScheduler( + execution_space const& arg_execution_space, + memory_pool const& pool + ) : SimpleTaskScheduler(arg_execution_space, memory_space{}, pool) + { /* forwarding ctor, must be empty */ } + + explicit + SimpleTaskScheduler(memory_pool const& pool) + : SimpleTaskScheduler(execution_space{}, memory_space{}, pool) + { /* forwarding ctor, must be empty */ } + + SimpleTaskScheduler( + memory_space const & arg_memory_space, + size_t const mempool_capacity, + unsigned const mempool_min_block_size, // = 1u << 6 + unsigned const mempool_max_block_size, // = 1u << 10 + unsigned const mempool_superblock_size // = 1u << 12 + ) : SimpleTaskScheduler( + execution_space{}, + arg_memory_space, + memory_pool( + arg_memory_space, mempool_capacity, mempool_min_block_size, + mempool_max_block_size, mempool_superblock_size + ) + ) + { /* forwarding ctor, must be empty */ } + + // end Constructors, destructor, and assignment }}}2 + //---------------------------------------------------------------------------- + + // Note that this is an expression of shallow constness + KOKKOS_INLINE_FUNCTION + task_queue_type& queue() const + { + KOKKOS_EXPECTS(m_queue != nullptr); + return *m_queue; + } + + KOKKOS_INLINE_FUNCTION + SimpleTaskScheduler + get_team_scheduler(int rank_in_league) const noexcept + { + KOKKOS_EXPECTS(m_queue != nullptr); + auto rv = SimpleTaskScheduler{ *this }; + rv.team_scheduler_info() = m_queue->initial_team_scheduler_info(rank_in_league); + return rv; + } + + KOKKOS_INLINE_FUNCTION + execution_space const& get_execution_space() const { return this->execution_space_instance(); } + + KOKKOS_INLINE_FUNCTION + team_scheduler_info_type& team_scheduler_info() & + { + return this->team_scheduler_info_storage::no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + team_scheduler_info_type const& team_scheduler_info() const & + { + return this->team_scheduler_info_storage::no_unique_address_data_member(); + } + + //---------------------------------------------------------------------------- + + #ifdef KOKKOS_ENABLE_DEPRECATED_CODE + // For backwards compatibility purposes only + KOKKOS_DEPRECATED + KOKKOS_INLINE_FUNCTION + memory_pool* + memory() const noexcept KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE + { + if(m_queue != nullptr) return &(m_queue->get_memory_pool()); + else return nullptr; + } + #endif + + //---------------------------------------------------------------------------- + + template + KOKKOS_FUNCTION + static + Kokkos::BasicFuture + spawn( + Impl::TaskPolicyWithScheduler&& arg_policy, + typename runnable_task_base_type::function_type arg_function, + typename runnable_task_base_type::destroy_type arg_destroy, + FunctorType&& arg_functor + ) + { + return std::move(arg_policy.scheduler()).template _spawn_impl( + _get_task_ptr(std::move(arg_policy.predecessor())), + arg_policy.priority(), + arg_function, + arg_destroy, + std::forward(arg_functor) + ); + } + + template + KOKKOS_FUNCTION + Kokkos::BasicFuture + spawn( + Impl::TaskPolicyWithPredecessor&& arg_policy, + FunctorType&& arg_functor + ) + { + static_assert( + std::is_same::value, + "Can't create a task policy from a scheduler and a future from a different scheduler" + ); + + using task_type = runnable_task_type; + typename task_type::function_type const ptr = task_type::apply; + typename task_type::destroy_type const dtor = task_type::destroy; + + return _spawn_impl( + std::move(arg_policy).predecessor().m_task, + arg_policy.priority(), + ptr, dtor, + std::forward(arg_functor) + ); + } + + template + KOKKOS_FUNCTION + static void + respawn( + FunctorType* functor, + BasicFuture const& predecessor, + TaskPriority priority = TaskPriority::Regular + ) { + using task_type = typename task_queue_type::template runnable_task_type< + FunctorType, scheduler_type + >; + + auto& task = *static_cast(functor); + + KOKKOS_EXPECTS(!task.get_respawn_flag()); + + task.set_priority(priority); + task.set_predecessor(*predecessor.m_task); + task.set_respawn_flag(true); + } + + template + KOKKOS_FUNCTION + static void + respawn( + FunctorType* functor, + scheduler_type const&, + TaskPriority priority = TaskPriority::Regular + ) { + using task_type = typename task_queue_type::template runnable_task_type< + FunctorType, scheduler_type + >; + + auto& task = *static_cast(functor); + + KOKKOS_EXPECTS(!task.get_respawn_flag()); + + task.set_priority(priority); + KOKKOS_ASSERT(not task.has_predecessor()); + task.set_respawn_flag(true); + } + + + template + KOKKOS_FUNCTION + future_type + when_all(BasicFuture const predecessors[], int n_predecessors) { + + // TODO @tasking @generalization DSH propagate scheduling info + + using task_type = typename task_queue_type::aggregate_task_type; + + future_type rv; + + if(n_predecessors > 0) { + task_queue_type* queue_ptr = nullptr; + + // Loop over the predecessors to find the queue and increment the reference + // counts + for(int i_pred = 0; i_pred < n_predecessors; ++i_pred) { + + auto* predecessor_task_ptr = predecessors[i_pred].m_task; + + if(predecessor_task_ptr != nullptr) { + // TODO @tasking @cleanup DSH figure out when this is allowed to be nullptr (if at all anymore) + + // Increment reference count to track subsequent assignment. + // TODO @tasking @optimization DSH figure out if this reference count increment is necessary + predecessor_task_ptr->increment_reference_count(); + + // TODO @tasking @cleanup DSH we should just set a boolean here instead to make this more readable + queue_ptr = m_queue; + } + + } // end loop over predecessors + + // This only represents a non-ready future if at least one of the predecessors + // has a task (and thus, a queue) + if(queue_ptr != nullptr) { + auto& q = *queue_ptr; + + auto* aggregate_task_ptr = q.template allocate_and_construct_with_vla_emulation< + task_type, task_base_type* + >( + /* n_vla_entries = */ n_predecessors, + /* aggregate_predecessor_count = */ n_predecessors, + /* queue_base = */ &q, + /* initial_reference_count = */ 2 + ); + + rv = future_type(aggregate_task_ptr); + + for(int i_pred = 0; i_pred < n_predecessors; ++i_pred) { + aggregate_task_ptr->vla_value_at(i_pred) = predecessors[i_pred].m_task; + } + + Kokkos::memory_fence(); // we're touching very questionable memory, so be sure to fence + + q.schedule_aggregate(std::move(*aggregate_task_ptr), team_scheduler_info()); + // the aggregate may be processed at any time, so don't touch it after this + } + } + + return rv; + } + + template + KOKKOS_FUNCTION + future_type + when_all(int n_calls, F&& func) + { + // TODO @tasking @generalization DSH propagate scheduling info? + + // later this should be std::invoke_result_t + using generated_type = decltype(func(0)); + using task_type = typename task_queue_type::aggregate_task_type; + + static_assert( + is_future::value, + "when_all function must return a Kokkos future (an instance of Kokkos::BasicFuture)" + ); + static_assert( + std::is_base_of::value, + "when_all function must return a Kokkos::BasicFuture of a compatible scheduler type" + ); + + auto* aggregate_task = m_queue->template allocate_and_construct_with_vla_emulation< + task_type, task_base_type* + >( + /* n_vla_entries = */ n_calls, + /* aggregate_predecessor_count = */ n_calls, + /* queue_base = */ m_queue, + /* initial_reference_count = */ 2 + ); + + auto rv = future_type(aggregate_task); + + for(int i_call = 0; i_call < n_calls; ++i_call) { + + auto generated_future = func(i_call); + + if(generated_future.m_task != nullptr) { + generated_future.m_task->increment_reference_count(); + aggregate_task->vla_value_at(i_call) = generated_future.m_task; + + KOKKOS_ASSERT(m_queue == generated_future.m_task->ready_queue_base_ptr() + && "Queue mismatch in when_all" + ); + } + + } + + Kokkos::memory_fence(); + + m_queue->schedule_aggregate(std::move(*aggregate_task), team_scheduler_info()); + // This could complete at any moment, so don't touch anything after this + + return rv; + } + +}; + + +template +inline +void wait(SimpleTaskScheduler const& scheduler) +{ + using scheduler_type = SimpleTaskScheduler; + scheduler_type::specialization::execute(scheduler); +} + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_SIMPLETASKSCHEDULER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp new file mode 100644 index 0000000000..d73028eb5b --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_SingleTaskQueue.hpp @@ -0,0 +1,207 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_SINGLETASKQUEUE_HPP +#define KOKKOS_IMPL_SINGLETASKQUEUE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template < + class ExecSpace, + class MemorySpace, + class TaskQueueTraits, + class MemoryPool +> +class SingleTaskQueue + : public TaskQueueMemoryManager, + public TaskQueueCommonMixin> +{ +private: + + using base_t = TaskQueueMemoryManager; + using common_mixin_t = TaskQueueCommonMixin; + + struct EmptyTeamSchedulerInfo { }; + struct EmptyTaskSchedulingInfo { }; + +public: + + using task_queue_type = SingleTaskQueue; // mark as task_queue concept + using task_queue_traits = TaskQueueTraits; + using task_base_type = TaskNode; + using ready_queue_type = typename TaskQueueTraits::template ready_queue_type; + + using team_scheduler_info_type = EmptyTeamSchedulerInfo; + using task_scheduling_info_type = EmptyTaskSchedulingInfo; + + using runnable_task_base_type = RunnableTaskBase; + + template + // requires TaskScheduler && TaskFunctor + using runnable_task_type = RunnableTask< + task_queue_traits, Scheduler, typename Functor::value_type, Functor + >; + + using aggregate_task_type = AggregateTask; + + // Number of allowed priorities + static constexpr int NumQueue = 3; + +private: + + ready_queue_type m_ready_queues[NumQueue][2]; + +public: + + //---------------------------------------------------------------------------- + // {{{2 + + SingleTaskQueue() = delete; + SingleTaskQueue(SingleTaskQueue const&) = delete; + SingleTaskQueue(SingleTaskQueue&&) = delete; + SingleTaskQueue& operator=(SingleTaskQueue const&) = delete; + SingleTaskQueue& operator=(SingleTaskQueue&&) = delete; + + explicit + SingleTaskQueue( + typename base_t::execution_space const&, + typename base_t::memory_space const&, + typename base_t::memory_pool const& arg_memory_pool + ) + : base_t(arg_memory_pool) + { } + + ~SingleTaskQueue() { + for(int i_priority = 0; i_priority < NumQueue; ++i_priority) { + KOKKOS_EXPECTS(m_ready_queues[i_priority][TaskTeam].empty()); + KOKKOS_EXPECTS(m_ready_queues[i_priority][TaskSingle].empty()); + } + } + + // end Constructors, destructors, and assignment }}}2 + //---------------------------------------------------------------------------- + + KOKKOS_FUNCTION + void + schedule_runnable( + runnable_task_base_type&& task, + team_scheduler_info_type const& info + ) { + this->schedule_runnable_to_queue( + std::move(task), + m_ready_queues[int(task.get_priority())][int(task.get_task_type())], + info + ); + // Task may be enqueued and may be run at any point; don't touch it (hence + // the use of move semantics) + } + + KOKKOS_FUNCTION + OptionalRef + pop_ready_task( + team_scheduler_info_type const& info + ) + { + OptionalRef return_value; + // always loop in order of priority first, then prefer team tasks over single tasks + for(int i_priority = 0; i_priority < NumQueue; ++i_priority) { + + // Check for a team task with this priority + return_value = m_ready_queues[i_priority][TaskTeam].pop(); + if(return_value) return return_value; + + // Check for a single task with this priority + return_value = m_ready_queues[i_priority][TaskSingle].pop(); + if(return_value) return return_value; + + } + // if nothing was found, return a default-constructed (empty) OptionalRef + return return_value; + } + + KOKKOS_INLINE_FUNCTION + constexpr team_scheduler_info_type + initial_team_scheduler_info(int) const noexcept { return { }; } + +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_SINGLETASKQUEUE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp new file mode 100644 index 0000000000..b0c06fb26e --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskBase.hpp @@ -0,0 +1,329 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKBASE_HPP +#define KOKKOS_IMPL_TASKBASE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include + +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** \brief Base class for task management, access, and execution. + * + * Inheritance structure to allow static_cast from the task root type + * and a task's FunctorType. + * + * // Enable a functor to access the base class + * // and provide memory for result value. + * TaskBase< Space , ResultType , FunctorType > + * : TaskBase< void , void , void > + * , FunctorType + * { ... }; + * Followed by memory allocated for result value. + * + * + * States of a task: + * + * Constructing State, NOT IN a linked list + * m_wait == 0 + * m_next == 0 + * + * Scheduling transition : Constructing -> Waiting + * before: + * m_wait == 0 + * m_next == this task's initial dependence, 0 if none + * after: + * m_wait == EndTag + * m_next == EndTag + * + * Waiting State, IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == next of linked list of tasks + * + * transition : Waiting -> Executing + * before: + * m_next == EndTag + * after:: + * m_next == LockTag + * + * Executing State, NOT IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == LockTag + * + * Respawn transition : Executing -> Executing-Respawn + * before: + * m_next == LockTag + * after: + * m_next == this task's updated dependence, 0 if none + * + * Executing-Respawn State, NOT IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == this task's updated dependence, 0 if none + * + * transition : Executing -> Complete + * before: + * m_wait == head of linked list + * after: + * m_wait == LockTag + * + * Complete State, NOT IN a linked list + * m_wait == LockTag: cannot add dependence (<=> complete) + * m_next == LockTag: not a member of a wait queue + * + */ +class TaskBase +{ +public: + + enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 }; + enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) }; + + template friend class Kokkos::BasicTaskScheduler ; + + using queue_type = TaskQueueBase; + + using function_type = void(*)( TaskBase * , void * ); + typedef void (* destroy_type) ( TaskBase * ); + + // sizeof(TaskBase) == 48 + + function_type m_apply = nullptr; ///< Apply function pointer + queue_type* m_queue = nullptr; ///< Pointer to the scheduler + TaskBase* m_next = nullptr; ///< next in linked list of ready tasks + TaskBase* m_wait = nullptr; ///< Queue of tasks waiting on this + int32_t m_ref_count = 0; + int32_t m_alloc_size = 0; + int32_t m_dep_count ; ///< Aggregate's number of dependences + int16_t m_task_type ; ///< Type of task + int16_t m_priority ; ///< Priority of runnable task + + TaskBase( TaskBase && ) = delete ; + TaskBase( const TaskBase & ) = delete ; + TaskBase & operator = ( TaskBase && ) = delete ; + TaskBase & operator = ( const TaskBase & ) = delete ; + +#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND + KOKKOS_INLINE_FUNCTION ~TaskBase() {}; +#else + KOKKOS_INLINE_FUNCTION ~TaskBase() = default; +#endif + + KOKKOS_INLINE_FUNCTION constexpr + TaskBase() + : m_apply( nullptr ) + , m_queue( nullptr ) + , m_next( nullptr ) + , m_wait( nullptr ) + , m_ref_count( 0 ) + , m_alloc_size( 0 ) + , m_dep_count( 0 ) + , m_task_type( 0 ) + , m_priority( 0 ) + {} + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + TaskBase * volatile * aggregate_dependences() volatile + { return reinterpret_cast( this + 1 ); } + + KOKKOS_INLINE_FUNCTION + bool requested_respawn() + { + // This should only be called when a task has finished executing and is + // in the transition to either the complete or executing-respawn state. + TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag ); + return lock != m_next; + } + + KOKKOS_INLINE_FUNCTION + void add_dependence( TaskBase* dep ) + { + // Precondition: lock == m_next + + TaskBase * const lock = (TaskBase *) LockTag ; + + // Assign dependence to m_next. It will be processed in the subsequent + // call to schedule. Error if the dependence is reset. + if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) { + Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); + } + + if ( 0 != dep ) { + // The future may be destroyed upon returning from this call + // so increment reference count to track this assignment. + Kokkos::atomic_increment( &(dep->m_ref_count) ); + } + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int32_t reference_count() const + { return *((int32_t volatile *)( & m_ref_count )); } + +}; + +static_assert( sizeof(TaskBase) == 48 + , "Verifying expected sizeof(TaskBase)" ); + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class Scheduler, typename ResultType , class FunctorType > +class Task + : public TaskBase, + public FunctorType +{ +public: + + Task() = delete ; + Task( Task && ) = delete ; + Task( const Task & ) = delete ; + Task & operator = ( Task && ) = delete ; + Task & operator = ( const Task & ) = delete ; + + + using root_type = TaskBase; + using functor_type = FunctorType ; + using result_type = ResultType ; + + using specialization = TaskQueueSpecialization ; + using member_type = typename specialization::member_type ; + + KOKKOS_INLINE_FUNCTION + void apply_functor( member_type * const member , void * ) + { this->functor_type::operator()( *member ); } + + template< typename T > + KOKKOS_INLINE_FUNCTION + void apply_functor( member_type * const member + , T * const result ) + { this->functor_type::operator()( *member , *result ); } + + KOKKOS_FUNCTION static + void destroy( root_type * root ) + { + TaskResult::destroy(root); + } + + KOKKOS_FUNCTION static + void apply( root_type * root , void * exec ) + { + Task* const task = static_cast< Task * >( root ); + member_type * const member = reinterpret_cast< member_type * >( exec ); + result_type * const result = TaskResult< result_type >::ptr( task ); + + // Task may be serial or team. + // If team then must synchronize before querying if respawn was requested. + // If team then only one thread calls destructor. + + const bool only_one_thread = +#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) + 0 == threadIdx.x && 0 == threadIdx.y ; +#else + 0 == member->team_rank(); +#endif + + task->apply_functor( member , result ); + + member->team_barrier(); + + if ( only_one_thread && !(task->requested_respawn()) ) { + // Did not respawn, destroy the functor to free memory. + task->functor_type::~functor_type(); + // Cannot destroy and deallocate the task until its dependences + // have been processed. + } + } + + // Constructor for runnable task + KOKKOS_INLINE_FUNCTION constexpr + Task( FunctorType && arg_functor ) + : root_type() , functor_type( std::move(arg_functor) ) + { } + + KOKKOS_INLINE_FUNCTION + ~Task() = delete; +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKBASE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp new file mode 100644 index 0000000000..35f8853f1f --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskNode.hpp @@ -0,0 +1,758 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKNODE_HPP +#define KOKKOS_IMPL_TASKNODE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +enum TaskType : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2, TaskSpecial = -1 }; + +//============================================================================== + +/** Intrusive base class for things allocated with a Kokkos::MemoryPool + * + * @warning Memory pools assume that the address of this class is the same + * as the address of the most derived type that was allocated to + * have the given size. As a consequence, when interacting with + * multiple inheritance, this must always be the first base class + * of any derived class that uses it! + * @todo Consider inverting inheritance structure to avoid this problem? + * + * @tparam CountType type of integer used to store the allocation size + */ +template +class alignas(void*) PoolAllocatedObjectBase { +public: + + using pool_allocation_size_type = CountType; + +private: + + pool_allocation_size_type m_alloc_size; + +public: + + + KOKKOS_INLINE_FUNCTION + constexpr explicit PoolAllocatedObjectBase(pool_allocation_size_type allocation_size) + : m_alloc_size(allocation_size) + { } + + KOKKOS_INLINE_FUNCTION + CountType get_allocation_size() const noexcept { return m_alloc_size; } + +}; + +//============================================================================== + + +// TODO @tasking @cleanup DSH move this? +template +class ReferenceCountedBase { +public: + + using reference_count_size_type = CountType; + +private: + + reference_count_size_type m_ref_count = 0; + +public: + + KOKKOS_INLINE_FUNCTION + constexpr explicit + ReferenceCountedBase(reference_count_size_type initial_reference_count) + : m_ref_count(initial_reference_count) + { + // This can't be here because it breaks constexpr + // KOKKOS_EXPECTS(initial_reference_count > 0); + } + + /** Decrement the reference count, + * and return true iff this decrement caused + * the reference count to become zero + */ + KOKKOS_INLINE_FUNCTION + bool decrement_and_check_reference_count() + { + // TODO @tasking @memory_order DSH memory order + auto old_count = Kokkos::atomic_fetch_add(&m_ref_count, -1); + + KOKKOS_ASSERT(old_count > 0 && "reference count greater less than zero!"); + + return (old_count == 1); + } + + KOKKOS_INLINE_FUNCTION + void increment_reference_count() + { + Kokkos::atomic_increment(&m_ref_count); + } + +}; + +template +class AggregateTask; + +template +class RunnableTaskBase; + +//============================================================================== + +template +class TaskNode + : public PoolAllocatedObjectBase, // size 4, must be first! + public ReferenceCountedBase, // size 4 + public TaskQueueTraits::template intrusive_task_base_type> // size 8+ +{ +public: + + using priority_type = int16_t; + +private: + + using task_base_type = TaskNode; + using pool_allocated_base_type = PoolAllocatedObjectBase; + using reference_counted_base_type = ReferenceCountedBase; + using task_queue_traits = TaskQueueTraits; + using waiting_queue_type = + typename task_queue_traits::template waiting_queue_type; + + waiting_queue_type m_wait_queue; // size 8+ + + // TODO @tasking @cleanup DSH eliminate this, or make its purpose a bit more clear. It's only used in BasicFuture, and only for deallocation purposes + TaskQueueBase* m_ready_queue_base; + + TaskType m_task_type; // size 2 + priority_type m_priority; // size 2 + bool m_is_respawning = false; + +public: + + KOKKOS_INLINE_FUNCTION + constexpr + TaskNode( + TaskType task_type, + TaskPriority priority, + TaskQueueBase* queue_base, + reference_count_size_type initial_reference_count, + pool_allocation_size_type allocation_size + ) : pool_allocated_base_type( + /* allocation_size = */ allocation_size + ), + reference_counted_base_type( + /* initial_reference_count = */ initial_reference_count + ), + m_wait_queue(), + m_ready_queue_base(queue_base), + m_task_type(task_type), + m_priority(static_cast(priority)), + m_is_respawning(false) + { } + + TaskNode() = delete; + TaskNode(TaskNode const&) = delete; + TaskNode(TaskNode&&) = delete; + TaskNode& operator=(TaskNode const&) = delete; + TaskNode& operator=(TaskNode&&) = delete; + + KOKKOS_INLINE_FUNCTION + bool is_aggregate() const noexcept { return m_task_type == TaskType::Aggregate; } + + KOKKOS_INLINE_FUNCTION + bool is_runnable() const noexcept { return m_task_type != TaskType::Aggregate; } + + KOKKOS_INLINE_FUNCTION + bool is_runnable() const volatile noexcept { return m_task_type != TaskType::Aggregate; } + + KOKKOS_INLINE_FUNCTION + bool is_single_runnable() const noexcept { return m_task_type == TaskType::TaskSingle; } + + KOKKOS_INLINE_FUNCTION + bool is_team_runnable() const noexcept { return m_task_type == TaskType::TaskTeam; } + + KOKKOS_INLINE_FUNCTION + TaskType get_task_type() const noexcept { return m_task_type; } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase& + as_runnable_task() & { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast&>(*this); + } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase const& + as_runnable_task() const & { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast const&>(*this); + } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase volatile& + as_runnable_task() volatile & { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast volatile&>(*this); + } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase const volatile& + as_runnable_task() const volatile & { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast const volatile&>(*this); + } + + KOKKOS_INLINE_FUNCTION + RunnableTaskBase&& + as_runnable_task() && { + KOKKOS_EXPECTS(this->is_runnable()); + return static_cast&&>(*this); + } + + template + KOKKOS_INLINE_FUNCTION + AggregateTask& + as_aggregate() & { + KOKKOS_EXPECTS(this->is_aggregate()); + return static_cast&>(*this); + } + + template + KOKKOS_INLINE_FUNCTION + AggregateTask const& + as_aggregate() const & { + KOKKOS_EXPECTS(this->is_aggregate()); + return static_cast const&>(*this); + } + + template + KOKKOS_INLINE_FUNCTION + AggregateTask&& + as_aggregate() && { + KOKKOS_EXPECTS(this->is_aggregate()); + return static_cast&&>(*this); + } + + KOKKOS_INLINE_FUNCTION + bool try_add_waiting(task_base_type& depends_on_this) { + return m_wait_queue.try_push(depends_on_this); + } + + template + KOKKOS_INLINE_FUNCTION + void consume_wait_queue(Function&& f) { + KOKKOS_EXPECTS(not m_wait_queue.is_consumed()); + m_wait_queue.consume(std::forward(f)); + } + + KOKKOS_INLINE_FUNCTION + bool wait_queue_is_consumed() const noexcept { + // TODO @tasking @memory_order DSH memory order + return m_wait_queue.is_consumed(); + } + + KOKKOS_INLINE_FUNCTION + TaskQueueBase* + ready_queue_base_ptr() const noexcept { + return m_ready_queue_base; + } + + KOKKOS_INLINE_FUNCTION + void set_priority(TaskPriority priority) noexcept { + KOKKOS_EXPECTS(!this->is_enqueued()); + m_priority = (priority_type)priority; + } + + KOKKOS_INLINE_FUNCTION + void set_priority(TaskPriority priority) volatile noexcept { + KOKKOS_EXPECTS(!this->is_enqueued()); + m_priority = (priority_type)priority; + } + + KOKKOS_INLINE_FUNCTION + TaskPriority get_priority() const noexcept { + return (TaskPriority)m_priority; + } + + KOKKOS_INLINE_FUNCTION + bool get_respawn_flag() const { return m_is_respawning; } + + KOKKOS_INLINE_FUNCTION + void set_respawn_flag(bool value = true) { + m_is_respawning = value; + } + + KOKKOS_INLINE_FUNCTION + void set_respawn_flag(bool value = true) volatile { + m_is_respawning = value; + } + +}; + +//============================================================================== + +template +class SchedulingInfoStorage; + +//============================================================================== + +template +class SchedulingInfoStorage + : public BaseType, // must be first base class for allocation reasons!!! + private NoUniqueAddressMemberEmulation +{ + +private: + + using base_t = BaseType; + using task_scheduling_info_type = SchedulingInfo; + +public: + + using base_t::base_t; + + KOKKOS_INLINE_FUNCTION + task_scheduling_info_type& scheduling_info() & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + task_scheduling_info_type const& scheduling_info() const & + { + return this->no_unique_address_data_member(); + } + + KOKKOS_INLINE_FUNCTION + task_scheduling_info_type&& scheduling_info() && + { + return std::move(*this).no_unique_address_data_member(); + } + +}; + + +//============================================================================== + +template +class alignas(16) AggregateTask final + : public SchedulingInfoStorage< + TaskNode, + SchedulingInfo + >, // must be first base class for allocation reasons!!! + public ObjectWithVLAEmulation< + AggregateTask, + OwningRawPtr> + > +{ +private: + + using base_t = SchedulingInfoStorage< + TaskNode, + SchedulingInfo + >; + using vla_base_t = ObjectWithVLAEmulation< + AggregateTask, + OwningRawPtr> + >; + + using task_base_type = TaskNode; + +public: + + using aggregate_task_type = AggregateTask; // concept marker + + template + // requires std::is_constructible_v + KOKKOS_INLINE_FUNCTION + constexpr explicit + AggregateTask( + int32_t aggregate_predecessor_count, + Args&&... args + ) : base_t( + TaskType::Aggregate, + TaskPriority::Regular, // all aggregates are regular priority + std::forward(args)... + ), + vla_base_t(aggregate_predecessor_count) + { } + + KOKKOS_INLINE_FUNCTION + int32_t dependence_count() const { return this->n_vla_entries(); } + +}; + +//KOKKOS_IMPL_IS_CONCEPT(aggregate_task); + +//============================================================================== + + +template +class RunnableTaskBase + : public TaskNode // must be first base class for allocation reasons!!! +{ +private: + + using base_t = TaskNode; + +public: + + using task_base_type = TaskNode; + using function_type = void(*)( task_base_type * , void * ); + using destroy_type = void(*)( task_base_type * ); + using runnable_task_type = RunnableTaskBase; + +private: + + function_type m_apply; + task_base_type* m_predecessor = nullptr; + +public: + + template + // requires std::is_constructible_v + KOKKOS_INLINE_FUNCTION + constexpr explicit + RunnableTaskBase( + function_type apply_function_ptr, + Args&&... args + ) : base_t(std::forward(args)...), + m_apply(apply_function_ptr) + { } + + KOKKOS_INLINE_FUNCTION + bool has_predecessor() const { return m_predecessor != nullptr; } + + KOKKOS_INLINE_FUNCTION + void clear_predecessor() { m_predecessor = nullptr; } + + KOKKOS_INLINE_FUNCTION + void clear_predecessor() volatile { m_predecessor = nullptr; } + + template + KOKKOS_INLINE_FUNCTION + SchedulingInfo& + scheduling_info_as() + { + using info_storage_type = SchedulingInfoStorage; + + return static_cast(this)->scheduling_info(); + } + + template + KOKKOS_INLINE_FUNCTION + SchedulingInfo const& + scheduling_info_as() const + { + using info_storage_type = SchedulingInfoStorage; + + return static_cast(this)->scheduling_info(); + } + + + KOKKOS_INLINE_FUNCTION + task_base_type& get_predecessor() const { + KOKKOS_EXPECTS(m_predecessor != nullptr); + return *m_predecessor; + } + + KOKKOS_INLINE_FUNCTION + void set_predecessor(task_base_type& predecessor) + { + KOKKOS_EXPECTS(m_predecessor == nullptr); + // Increment the reference count so that predecessor doesn't go away + // before this task is enqueued. + // (should be memory order acquire) + predecessor.increment_reference_count(); + m_predecessor = &predecessor; + } + + KOKKOS_INLINE_FUNCTION + void acquire_predecessor_from(runnable_task_type& other) + { + KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor); + // since we're transfering, no need to modify the reference count + m_predecessor = other.m_predecessor; + other.m_predecessor = nullptr; + } + + KOKKOS_INLINE_FUNCTION + void acquire_predecessor_from(runnable_task_type& other) volatile + { + KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor); + // since we're transfering, no need to modify the reference count + m_predecessor = other.m_predecessor; + other.m_predecessor = nullptr; + } + + template + KOKKOS_INLINE_FUNCTION + void run(TeamMember& member) { + (*m_apply)(this, &member); + } +}; + +//KOKKOS_IMPL_IS_CONCEPT(runnable_task); + +//============================================================================== + +template +class TaskResultStorage : public Base +{ +private: + + using base_t = Base; + + alignas(Base) ResultType m_value = ResultType{}; + + +public: + + using base_t::base_t; + + KOKKOS_INLINE_FUNCTION + ResultType* value_pointer() { + // Over-alignment makes this a non-standard-layout class, + // so alignas() doesn't work + //static_assert( + // offsetof(TaskResultStorage, m_value) == sizeof(Base), + // "TaskResultStorage must be POD for layout purposes" + //); + return &m_value; + } + + KOKKOS_INLINE_FUNCTION + ResultType& value_reference() { return m_value; } + +}; + + +// TODO @tasking @optimization DSH optimization for empty types (in addition to void) +template +class TaskResultStorage : public Base +{ +private: + + using base_t = Base; + +public: + + using base_t::base_t; + + KOKKOS_INLINE_FUNCTION + void* value_pointer() noexcept { return nullptr; } + + KOKKOS_INLINE_FUNCTION + void value_reference() noexcept { } + +}; + +//============================================================================== + +template < + class TaskQueueTraits, + class Scheduler, + class ResultType, + class FunctorType +> +class alignas(16) RunnableTask + : // using nesting of base classes to control layout; multiple empty base classes + // may not be ABI compatible with CUDA on Windows + public TaskResultStorage< + ResultType, + SchedulingInfoStorage< + RunnableTaskBase, + typename Scheduler::task_queue_type::task_scheduling_info_type + > + >, // must be first base class + public FunctorType +{ +private: + using base_t = TaskResultStorage< + ResultType, + SchedulingInfoStorage< + RunnableTaskBase, + typename Scheduler::task_queue_type::task_scheduling_info_type + > + >; + + using runnable_task_base_type = RunnableTaskBase; + using scheduler_type = Scheduler; + using scheduling_info_type = + typename scheduler_type::task_scheduling_info_type; + using scheduling_info_storage_base = base_t; + + using task_base_type = TaskNode; + using specialization = TaskQueueSpecialization; + using member_type = typename specialization::member_type; + using result_type = ResultType; + using functor_type = FunctorType; + +public: + + template + // requires std::is_constructible_v + KOKKOS_INLINE_FUNCTION + constexpr explicit + RunnableTask( + FunctorType&& functor, + Args&&... args + ) : base_t( + std::forward(args)... + ), + functor_type(std::move(functor)) + { } + + KOKKOS_INLINE_FUNCTION + ~RunnableTask() = delete; + + KOKKOS_INLINE_FUNCTION + void update_scheduling_info( + member_type& member + ) { + // TODO @tasking @generalization DSH call a queue-specific hook here; for now, this info is already updated elsewhere + // this->scheduling_info() = member.scheduler().scheduling_info(); + } + + KOKKOS_INLINE_FUNCTION + void apply_functor(member_type* member, void*) + { + update_scheduling_info(*member); + this->functor_type::operator()(*member); + } + + template + KOKKOS_INLINE_FUNCTION + void apply_functor(member_type* member, T* val) + { + update_scheduling_info(*member); + //this->functor_type::operator()(*member, *val); + this->functor_type::operator()(*member, *val); + } + + KOKKOS_FUNCTION static + void destroy( task_base_type * root ) + { + //TaskResult::destroy(root); + } + + KOKKOS_FUNCTION static + void apply(task_base_type* self, void* member_as_void) + { + using task_type = Impl::RunnableTask*; + auto* const task = static_cast(self); + auto* const member = reinterpret_cast(member_as_void); + + // Now that we're over-aligning the result storage, this isn't a problem any more + //static_assert(std::is_standard_layout::value, + // "Tasks must be standard layout" + //); + //static_assert(std::is_pod::value, + // "Tasks must be PODs" + //); + + // Task may be serial or team. + // If team then must synchronize before querying if respawn was requested. + // If team then only one thread calls destructor. + + const bool only_one_thread = +#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) + 0 == threadIdx.x && 0 == threadIdx.y ; +#else + 0 == member->team_rank(); +#endif + + // Ensure that the respawn flag is set to zero + self->set_respawn_flag(false); + + //task->apply_functor(member, TaskResult::ptr(task)); + task->apply_functor(member, task->value_pointer()); + + member->team_barrier(); + + if ( only_one_thread && !(task->get_respawn_flag()) ) { + // Did not respawn, destroy the functor to free memory. + task->functor_type::~functor_type(); + // Cannot destroy and deallocate the task until its dependences + // have been processed. + } + } + +}; + +} /* namespace Impl */ + + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKNODE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp new file mode 100644 index 0000000000..85e665fffc --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskPolicyData.hpp @@ -0,0 +1,195 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_TASKPOLICYDATA_HPP +#define KOKKOS_IMPL_TASKPOLICYDATA_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template +struct TaskPolicyWithPredecessor +{ +private: + + DepFutureType m_predecessor; + Kokkos::TaskPriority m_priority; + +public: + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor( + DepFutureType arg_predecessor, + Kokkos::TaskPriority arg_priority + ) : m_predecessor(std::move(arg_predecessor)), + m_priority(arg_priority) + { } + + TaskPolicyWithPredecessor() = delete; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor(TaskPolicyWithPredecessor const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor(TaskPolicyWithPredecessor&&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor&&) = default; + + KOKKOS_INLINE_FUNCTION + ~TaskPolicyWithPredecessor() = default; + + KOKKOS_INLINE_FUNCTION + DepFutureType&& predecessor() && { + return std::move(m_predecessor); + } + + KOKKOS_INLINE_FUNCTION + constexpr TaskPriority priority() const { return m_priority; } + + KOKKOS_INLINE_FUNCTION + static constexpr int task_type() noexcept { return TaskEnum; } + +}; + +// TODO @tasking @cleanup DSH clean this up. Using nullptr_t here is too clever +template +struct TaskPolicyWithScheduler +{ +public: + + using predecessor_future_type = PredecessorFuture; + +private: + + Scheduler m_scheduler; + Kokkos::TaskPriority m_priority; + predecessor_future_type m_predecessor; + +public: + + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler( + Scheduler arg_scheduler, + Kokkos::TaskPriority arg_priority + ) : m_scheduler(std::move(arg_scheduler)), + m_priority(arg_priority) + { } + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler( + Scheduler arg_scheduler, + predecessor_future_type arg_predecessor, + Kokkos::TaskPriority arg_priority + ) : m_scheduler(std::move(arg_scheduler)), + m_priority(arg_priority), + m_predecessor(std::move(arg_predecessor)) + { } + + TaskPolicyWithScheduler() = delete; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler(TaskPolicyWithScheduler const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler(TaskPolicyWithScheduler&&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler&&) = default; + + KOKKOS_INLINE_FUNCTION + ~TaskPolicyWithScheduler() = default; + + KOKKOS_INLINE_FUNCTION + Scheduler& scheduler() & { + return m_scheduler; + } + + KOKKOS_INLINE_FUNCTION + constexpr TaskPriority priority() const { return m_priority; } + + KOKKOS_INLINE_FUNCTION + predecessor_future_type& predecessor() & { + return m_predecessor; + } + + KOKKOS_INLINE_FUNCTION + static constexpr bool has_predecessor() noexcept + { + return not std::is_same::value; + } + + KOKKOS_INLINE_FUNCTION + static constexpr int task_type() noexcept { return TaskEnum; } + +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKPOLICYDATA_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp index eacf0837fa..1adcfe4cc4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -49,27 +49,24 @@ #include #if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + #include #include #include -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class Space , typename ResultType , class FunctorType > -class TaskBase ; - -template< typename Space > -class TaskQueue ; - -template< typename Space > -class TaskQueueSpecialization ; - -} /* namespace Impl */ -} /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -77,240 +74,29 @@ class TaskQueueSpecialization ; namespace Kokkos { namespace Impl { -/** \brief Base class for task management, access, and execution. - * - * Inheritance structure to allow static_cast from the task root type - * and a task's FunctorType. - * - * // Enable a functor to access the base class - * // and provide memory for result value. - * TaskBase< Space , ResultType , FunctorType > - * : TaskBase< void , void , void > - * , FunctorType - * { ... }; - * Followed by memory allocated for result value. - * - * - * States of a task: - * - * Constructing State, NOT IN a linked list - * m_wait == 0 - * m_next == 0 - * - * Scheduling transition : Constructing -> Waiting - * before: - * m_wait == 0 - * m_next == this task's initial dependence, 0 if none - * after: - * m_wait == EndTag - * m_next == EndTag - * - * Waiting State, IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == next of linked list of tasks - * - * transition : Waiting -> Executing - * before: - * m_next == EndTag - * after:: - * m_next == LockTag - * - * Executing State, NOT IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == LockTag - * - * Respawn transition : Executing -> Executing-Respawn - * before: - * m_next == LockTag - * after: - * m_next == this task's updated dependence, 0 if none - * - * Executing-Respawn State, NOT IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == this task's updated dependence, 0 if none - * - * transition : Executing -> Complete - * before: - * m_wait == head of linked list - * after: - * m_wait == LockTag - * - * Complete State, NOT IN a linked list - * m_wait == LockTag: cannot add dependence (<=> complete) - * m_next == LockTag: not a member of a wait queue - * - */ -template<> -class TaskBase< void , void , void > -{ -public: - - enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 }; - enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) }; - - template< typename > friend class Kokkos::TaskScheduler ; - - typedef TaskQueue< void > queue_type ; - - typedef void (* function_type) ( TaskBase * , void * ); - - // sizeof(TaskBase) == 48 - - function_type m_apply ; ///< Apply function pointer - queue_type * m_queue ; ///< Pointer to queue - TaskBase * m_wait ; ///< Linked list of tasks waiting on this - TaskBase * m_next ; ///< Waiting linked-list next - int32_t m_ref_count ; ///< Reference count - int32_t m_alloc_size ; ///< Allocation size - int32_t m_dep_count ; ///< Aggregate's number of dependences - int16_t m_task_type ; ///< Type of task - int16_t m_priority ; ///< Priority of runnable task - - TaskBase( TaskBase && ) = delete ; - TaskBase( const TaskBase & ) = delete ; - TaskBase & operator = ( TaskBase && ) = delete ; - TaskBase & operator = ( const TaskBase & ) = delete ; - -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION ~TaskBase() {}; -#else - KOKKOS_INLINE_FUNCTION ~TaskBase() = default; -#endif - - KOKKOS_INLINE_FUNCTION constexpr - TaskBase() - : m_apply( 0 ) - , m_queue( 0 ) - , m_wait( 0 ) - , m_next( 0 ) - , m_ref_count( 0 ) - , m_alloc_size( 0 ) - , m_dep_count( 0 ) - , m_task_type( 0 ) - , m_priority( 0 ) - {} - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - TaskBase * volatile * aggregate_dependences() volatile - { return reinterpret_cast( this + 1 ); } - - KOKKOS_INLINE_FUNCTION - bool requested_respawn() - { - // This should only be called when a task has finished executing and is - // in the transition to either the complete or executing-respawn state. - TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag ); - return lock != m_next; - } - - KOKKOS_INLINE_FUNCTION - void add_dependence( TaskBase* dep ) - { - // Precondition: lock == m_next - - TaskBase * const lock = (TaskBase *) LockTag ; - - // Assign dependence to m_next. It will be processed in the subsequent - // call to schedule. Error if the dependence is reset. - if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) { - Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); - } - - if ( 0 != dep ) { - // The future may be destroyed upon returning from this call - // so increment reference count to track this assignment. - Kokkos::atomic_increment( &(dep->m_ref_count) ); - } - } - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - int32_t reference_count() const - { return *((int32_t volatile *)( & m_ref_count )); } - -}; - -static_assert( sizeof(TaskBase) == 48 - , "Verifying expected sizeof(TaskBase)" ); - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -template< typename ResultType > -struct TaskResult { - - enum : int32_t { size = sizeof(ResultType) }; - - using reference_type = ResultType & ; - - KOKKOS_INLINE_FUNCTION static - ResultType * ptr( TaskBase * task ) - { - return reinterpret_cast< ResultType * > - ( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) ); - } - - KOKKOS_INLINE_FUNCTION static - reference_type get( TaskBase * task ) - { return *ptr( task ); } -}; - -template<> -struct TaskResult< void > { - - enum : int32_t { size = 0 }; - - using reference_type = void ; - - KOKKOS_INLINE_FUNCTION static - void * ptr( TaskBase * ) { return (void*) 0 ; } - - KOKKOS_INLINE_FUNCTION static - reference_type get( TaskBase * ) {} -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template<> -class TaskQueue< void > {}; /** \brief Manage task allocation, deallocation, and scheduling. * * Task execution is deferred to the TaskQueueSpecialization. * All other aspects of task management have shared implementation. */ -template< typename ExecSpace > -class TaskQueue : public TaskQueue { -private: +template< typename ExecSpace, typename MemorySpace > +class TaskQueue : public TaskQueueBase { +protected: - friend class TaskQueueSpecialization< ExecSpace > ; - friend class Kokkos::TaskScheduler< ExecSpace > ; + template + friend struct TaskQueueSpecialization; + template + friend class TaskQueueSpecializationConstrained; + template + friend class Kokkos::BasicTaskScheduler; - using execution_space = ExecSpace ; - using specialization = TaskQueueSpecialization< execution_space > ; - using memory_space = typename specialization::memory_space ; - using device_type = Kokkos::Device< execution_space , memory_space > ; - using memory_pool = Kokkos::MemoryPool< device_type > ; - using task_root_type = Kokkos::Impl::TaskBase ; + using execution_space = ExecSpace; + using memory_space = MemorySpace; + using device_type = Kokkos::Device< execution_space , memory_space > ; + using memory_pool = Kokkos::MemoryPool< device_type > ; + using task_root_type = Kokkos::Impl::TaskBase; + using team_queue_type = TaskQueue; struct Destroy { TaskQueue * m_queue ; @@ -325,8 +111,8 @@ private: memory_pool m_memory ; task_root_type * volatile m_ready[ NumQueue ][ 2 ]; - long m_accum_alloc ; // Accumulated number of allocations - int m_count_alloc ; // Current number of allocations + //long m_accum_alloc ; // Accumulated number of allocations + int m_count_alloc = 0 ; // Current number of allocations int m_max_alloc ; // Maximum number of allocations int m_ready_count ; // Number of ready or executing @@ -347,8 +133,8 @@ private: // task->m_next is the dependence or zero // Postcondition: // task->m_next is linked list membership - KOKKOS_FUNCTION void schedule_runnable( task_root_type * const ); - KOKKOS_FUNCTION void schedule_aggregate( task_root_type * const ); + KOKKOS_FUNCTION void schedule_runnable(task_root_type*); + KOKKOS_FUNCTION void schedule_aggregate(task_root_type*); // Reschedule a task // Precondition: @@ -381,23 +167,29 @@ private: KOKKOS_FUNCTION static void decrement( task_root_type * task ); + public: - // If and only if the execution space is a single thread - // then execute ready tasks. KOKKOS_INLINE_FUNCTION - void iff_single_thread_recursive_execute() - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - specialization::iff_single_thread_recursive_execute( this ); -#endif - } + int allocation_count() const noexcept { return m_count_alloc; } - void execute() { specialization::execute( this ); } + + KOKKOS_INLINE_FUNCTION + void initialize_team_queues(int pool_size) const noexcept { } + + KOKKOS_INLINE_FUNCTION + task_root_type* attempt_to_steal_task() const noexcept { return nullptr; } + + KOKKOS_INLINE_FUNCTION + team_queue_type& get_team_queue(int team_rank) { return *this; } + + //void execute() { specialization::execute( this ); } template< typename FunctorType > void proc_set_apply( typename task_root_type::function_type * ptr ) { + using specialization = + TaskQueueSpecialization>; specialization::template proc_set_apply< FunctorType >( ptr ); } @@ -451,9 +243,7 @@ public: { using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + using task_type = Impl::Task ; enum : size_t { align = ( 1 << 4 ) , align_mask = align - 1 }; enum : size_t { task_size = sizeof(task_type) }; @@ -480,86 +270,6 @@ public: //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { -namespace Impl { - -template< class ExecSpace , typename ResultType , class FunctorType > -class TaskBase - : public TaskBase< void , void , void > - , public FunctorType -{ -private: - - TaskBase() = delete ; - TaskBase( TaskBase && ) = delete ; - TaskBase( const TaskBase & ) = delete ; - TaskBase & operator = ( TaskBase && ) = delete ; - TaskBase & operator = ( const TaskBase & ) = delete ; - -public: - - using root_type = TaskBase< void , void , void > ; - using functor_type = FunctorType ; - using result_type = ResultType ; - - using specialization = TaskQueueSpecialization< ExecSpace > ; - using member_type = typename specialization::member_type ; - - KOKKOS_INLINE_FUNCTION - void apply_functor( member_type * const member , void * ) - { functor_type::operator()( *member ); } - - template< typename T > - KOKKOS_INLINE_FUNCTION - void apply_functor( member_type * const member - , T * const result ) - { functor_type::operator()( *member , *result ); } - - KOKKOS_FUNCTION static - void apply( root_type * root , void * exec ) - { - TaskBase * const task = static_cast< TaskBase * >( root ); - member_type * const member = reinterpret_cast< member_type * >( exec ); - result_type * const result = TaskResult< result_type >::ptr( task ); - - // Task may be serial or team. - // If team then must synchronize before querying if respawn was requested. - // If team then only one thread calls destructor. - - const bool only_one_thread = -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) - 0 == threadIdx.x && 0 == threadIdx.y ; -#else - 0 == member->team_rank(); -#endif - - task->apply_functor( member , result ); - - member->team_barrier(); - - if ( only_one_thread && !(task->requested_respawn()) ) { - // Did not respawn, destroy the functor to free memory. - static_cast(task)->~functor_type(); - // Cannot destroy and deallocate the task until its dependences - // have been processed. - } - } - - // Constructor for runnable task - KOKKOS_INLINE_FUNCTION constexpr - TaskBase( FunctorType && arg_functor ) - : root_type() , functor_type( arg_functor ) {} - - KOKKOS_INLINE_FUNCTION - ~TaskBase() {} -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp new file mode 100644 index 0000000000..b0685506d4 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueueCommon.hpp @@ -0,0 +1,569 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_TASKQUEUECOMMON_HPP +#define KOKKOS_IMPL_TASKQUEUECOMMON_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/// @brief CRTP Base class implementing the ready count parts common to most task queues +template +class TaskQueueCommonMixin +{ +private: + + int32_t m_ready_count = 0; + + // CRTP boilerplate + KOKKOS_INLINE_FUNCTION + Derived& _self() { return *static_cast(this); } + +public: + + //---------------------------------------------------------------------------- + // {{{2 + + TaskQueueCommonMixin() + : m_ready_count(0) + { + // TODO @tasking @memory_order DSH figure out if I need this store to be atomic + } + + ~TaskQueueCommonMixin() { + KOKKOS_EXPECTS((Kokkos::memory_fence(), m_ready_count < 1)); + KOKKOS_EXPECTS(m_ready_count == 0); + } + + // end Constructors, destructor, and assignment }}}2 + //---------------------------------------------------------------------------- + + + //---------------------------------------------------------------------------- + // {{{2 + +private: + + // This would be more readable with a lambda, but that comes with + // all the baggage associated with a lambda (compilation times, bugs with + // nvcc, etc.), so we'll use a simple little helper functor here. + template + struct _schedule_waiting_tasks_operation { + TaskNode const& m_predecessor; + Derived& m_queue; + TeamSchedulerInfo const& m_info; + KOKKOS_INLINE_FUNCTION + void operator()(TaskNode&& task) const noexcept + // requires Same + { + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + if(task.is_runnable()) // KOKKOS_LIKELY + { + // TODO @tasking @optimiazation DSH check this outside of the loop ? + if(m_predecessor.is_runnable()) { + m_queue.update_scheduling_info_from_completed_predecessor( + /* ready_task = */ task.as_runnable_task(), + /* predecessor = */ m_predecessor.as_runnable_task() + ); + } + else { + KOKKOS_ASSERT(m_predecessor.is_aggregate()); + m_queue.update_scheduling_info_from_completed_predecessor( + /* ready_task = */ task.as_runnable_task(), + /* predecessor = */ m_predecessor.template as_aggregate() + ); + } + m_queue.schedule_runnable( + std::move(task).as_runnable_task(), + m_info + ); + } + else { + // The scheduling info update happens inside of schedule_aggregate + m_queue.schedule_aggregate( + std::move(task).template as_aggregate(), + m_info + ); + } + } + }; + +protected: + + template + KOKKOS_FUNCTION + void _complete_finished_task( + TaskNode&& task, + TeamSchedulerInfo const& info + ) { + task.consume_wait_queue( + _schedule_waiting_tasks_operation{ + task, + _self(), + info + } + ); + bool should_delete = task.decrement_and_check_reference_count(); + if(should_delete) { + _self().deallocate(std::move(task)); + } + } + + KOKKOS_INLINE_FUNCTION + void _increment_ready_count() { + // TODO @tasking @memory_order DSH memory order + Kokkos::atomic_increment(&this->m_ready_count); + } + + KOKKOS_INLINE_FUNCTION + void _decrement_ready_count() { + // TODO @tasking @memory_order DSH memory order + Kokkos::atomic_decrement(&this->m_ready_count); + Kokkos::memory_fence(); + } + +public: + + KOKKOS_INLINE_FUNCTION + bool is_done() const noexcept { + // TODO @tasking @memory_order DSH Memory order, instead of volatile + return (*(volatile int*)(&m_ready_count)) == 0; + } + + KOKKOS_INLINE_FUNCTION + int32_t ready_count() const noexcept { + // TODO @tasking @memory_order DSH Memory order, instead of volatile + return (*(volatile int*)(&m_ready_count)); + } + + template + KOKKOS_FUNCTION + void + complete( + RunnableTaskBase&& task, + TeamSchedulerInfo const& info + ) + { + if(task.get_respawn_flag()) { + _self().schedule_runnable(std::move(task), info); + } + else { + _complete_finished_task(std::move(task), info); + } + // A runnable task was popped from a ready queue finished executing. + // If respawned into a ready queue then the ready count was incremented + // so decrement whether respawned or not. If finished, all of the + // tasks waiting on this have been enqueued (either in the ready queue + // or the next waiting queue, in the case of an aggregate), and the + // ready count has been incremented for each of those, preventing + // quiescence. Thus, it's safe to decrement the ready count here. + // TODO @tasking @memory_order DSH memory order? (probably release) + _decrement_ready_count(); + } + + template + KOKKOS_FUNCTION + void + complete( + AggregateTask&& task, + TeamSchedulerInfo const& info + ) { + // TODO @tasking DSH old code has a ifndef __HCC_ACCELERATOR__ here; figure out why + _complete_finished_task(std::move(task), info); + } + + // end Task and queue completion }}}2 + //---------------------------------------------------------------------------- + + + //---------------------------------------------------------------------------- + // {{{2 + +public: + + // This isn't actually generic; the template parameters are just to keep + // Derived from having to be complete + template + KOKKOS_INLINE_FUNCTION + void + schedule_runnable_to_queue( + RunnableTaskBase&& task, + ReadyQueueType& ready_queue, + TeamSchedulerInfo const& info + ) + { + bool task_is_ready = true; + bool scheduling_info_updated = false; + + // do this before enqueueing and potentially losing exclusive access to task + bool task_is_respawning = task.get_respawn_flag(); + + // clear the respawn flag, since we're handling the respawn (if any) here. + // We must make sure this is written through the cache, since the next + // thread to access it might be a Cuda thread from a different thread block. + ((RunnableTaskBase volatile&)task).set_respawn_flag(false); + + if(task.has_predecessor()) { + // save the predecessor into a local variable, then clear it from the + // task before adding it to the wait queue of the predecessor + // (We have exclusive access to the task's predecessor, so we don't need + // to do this atomically) + // TODO @tasking @internal_documentation DSH document that we expect exclusive access to `task` in this function + auto& predecessor = task.get_predecessor(); + // This needs a load/store fence here, technically + // making this a release store would also do this + ((RunnableTaskBase volatile&)task).clear_predecessor(); + + // TODO @tasking @memory_order DSH remove this fence in favor of memory orders + Kokkos::memory_fence(); // for now + + // Try to add the task to the predecessor's waiting queue. If it fails, + // the predecessor is already done + bool predecessor_not_ready = predecessor.try_add_waiting(task); + + // NOTE: if the predecessor was not ready and the task was enqueued, + // we've lost exclusive access and should nt touch task again + + // If the predecessor is not done, then task is not ready + task_is_ready = not predecessor_not_ready; + + if(task_is_ready and predecessor.is_runnable()) { + // this is our last chance to update the scheduling info before + // predecessor is potentially deleted + _self().update_scheduling_info_from_completed_predecessor( + /* ready_task = */ task, + /* predecessor = */ predecessor.as_runnable_task() + ); + scheduling_info_updated = true; + } + + if(task_is_respawning) { + // Reference count for predecessor was incremented when + // respawn called set_dependency() + // so that if predecessor completed prior to the + // above try_add_waiting(), predecessor would not be destroyed. + // predecessor reference count can now be decremented, + // which may deallocate it. + bool should_delete = predecessor.decrement_and_check_reference_count(); + if(should_delete) { + // TODO @tasking @cleanup DSH better encapsulation of this! + _self().deallocate(std::move(predecessor)); + } + } + // Note! predecessor may be destroyed at this point, so don't add anything + // here + } + + if(scheduling_info_updated) { + // We need to go back to the queue itself and see if it wants to schedule + // somewhere else + _self().schedule_runnable(std::move(task), info); + } + // Put it in the appropriate ready queue if it's ready + else if(task_is_ready) { + // Increment the ready count + _self()._increment_ready_count(); + // and enqueue the task + // (can't move because the task isn't expired unless the push succeeds + bool push_success = ready_queue.push(task); + if(not push_success) { + _self().handle_failed_ready_queue_insertion( + std::move(task), ready_queue, info + ); + } + } + + // Task may be enqueued and may be run at any point; don't touch it (hence + // the use of move semantics) + } + + template + KOKKOS_INLINE_FUNCTION + void + handle_failed_ready_queue_insertion( + RunnableTaskBase&& task, + ReadyQueueType& ready_queue, + TeamSchedulerInfo const& info + ) { + Kokkos::abort("Unhandled failure of ready task queue insertion!\n"); + } + + // This isn't actually generic; the template parameters are just to keep + // Derived from having to be complete + template + KOKKOS_FUNCTION + void + schedule_aggregate( + AggregateTask&& aggregate, + TeamSchedulerInfo const& info + ) + { + // Because the aggregate is being scheduled, should not be in any queue + KOKKOS_EXPECTS(not aggregate.is_enqueued()); + + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + using team_scheduler_info_type = typename Derived::team_scheduler_info_type; + static_assert( + std::is_same::value, + "SchedulingInfo type mismatch!" + ); + + bool incomplete_dependence_found = false; + + for(auto*& predecessor_ptr_ref : aggregate) { + + // if a previous scheduling operation hasn't already set the predecessor + // to nullptr, try to enqueue the aggregate into the predecessorendence's waiting + // queue + if(predecessor_ptr_ref != nullptr) { + + // Swap the pointer onto the stack and set the one in the aggregate VLA + // to nullptr before we try to add it to the waiting queue so that some + // other thread doesn't also get to here and find the pointer to be + // not null (since as soon as we try and schedule the aggregate, we + // potentially lose exclusive access to it if that enqueueing operation + // succeeds. The swap doesn't need to happen atomically since we have + // exclusive access to aggregate until an insertion succeeds + auto* predecessor_ptr = std::move(predecessor_ptr_ref); + + // TODO @tasking @memory_order DSH I think this needs to be a store release so that it doesn't get reordered after the queue insertion + predecessor_ptr_ref = nullptr; + + // TODO @tasking @memory_order DSH remove this fence in favor of memory orders + Kokkos::memory_fence(); + + // If adding the aggregate to the waiting queue succeeds, the predecessor is not + // complete + bool pred_not_ready = predecessor_ptr->try_add_waiting(aggregate); + + // NOTE! At this point it is unsafe to access aggregate (unless the + // enqueueing failed, so we can't use move semantics to expire it) + + // we found an incomplete dependence, so we can't make task's successors + // ready yet + incomplete_dependence_found = pred_not_ready; + + if(not pred_not_ready) { + // A predecessor was done, and we didn't enqueue the aggregate + // Update the aggregate's scheduling info (we still have exclusive + // access to it here) + if(predecessor_ptr->is_runnable()) { + _self().update_scheduling_info_from_completed_predecessor( + aggregate, predecessor_ptr->as_runnable_task() + ); + } + else { + KOKKOS_ASSERT(predecessor_ptr->is_aggregate()); + _self().update_scheduling_info_from_completed_predecessor( + aggregate, (*predecessor_ptr).template as_aggregate() + ); + } + } + + // the reference count for the predecessor was incremented when we put + // it into the predecessor list, so decrement it here + bool should_delete = predecessor_ptr->decrement_and_check_reference_count(); + if(should_delete) { + // TODO @tasking @cleanup DSH better encapsulation of this! + _self().deallocate(std::move(*predecessor_ptr)); + } + + // Stop the loop if we found an incomplete dependence + if(incomplete_dependence_found) break; + } + } + + // NOTE: it's not safe to access aggregate any more if an incomplete dependence + // was found, because some other thread could have already popped it off + // of another waiting queue + + if(not incomplete_dependence_found) { + // all of the predecessors were completed, so we can complete `task` + _self().complete(std::move(aggregate), info); + } + // Note!! task may have been deleted at this point, so don't add anything here! + } + + // Provide a sensible default that can be overridden + template + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + RunnableTaskBase& ready_task, + RunnableTaskBase const& predecessor + ) const + { + // by default, tell a ready task to use the scheduling info of its most + // recent predecessor + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + ready_task.template scheduling_info_as() = + predecessor.template scheduling_info_as(); + } + + // Provide a sensible default that can be overridden + template + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + AggregateTask& aggregate, + RunnableTaskBase const& predecessor + ) const + { + // by default, tell a ready task to use the scheduling info of its most + // recent predecessor + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + aggregate.scheduling_info() = + predecessor.template scheduling_info_as(); + } + + // Provide a sensible default that can be overridden + template + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + AggregateTask& aggregate, + AggregateTask const& predecessor + ) const + { + // by default, tell a ready task to use the scheduling info of its most + // recent predecessor + aggregate.scheduling_info() = predecessor.scheduling_info(); + } + + // Provide a sensible default that can be overridden + template + KOKKOS_INLINE_FUNCTION + void update_scheduling_info_from_completed_predecessor( + RunnableTaskBase& ready_task, + AggregateTask const& predecessor + ) const + { + // by default, tell a ready task to use the scheduling info of its most + // recent predecessor + using task_scheduling_info_type = typename Derived::task_scheduling_info_type; + ready_task.template scheduling_info_as() = + predecessor.scheduling_info(); + } + + template + KOKKOS_INLINE_FUNCTION + void initialize_scheduling_info_from_predecessor( + TaskNode& task, + TaskNode& predecessor + ) const + { + /* do nothing by default */ + } + + template + KOKKOS_INLINE_FUNCTION + void initialize_scheduling_info_from_team_scheduler_info( + TaskNode& task, + TeamSchedulerInfo const& info + ) const + { + /* do nothing by default */ + } + + template < + class ExecutionSpace, + class MemorySpace, + class MemoryPool + > + static /* KOKKOS_CONSTEXPR_14 */ size_t + task_queue_allocation_size( + ExecutionSpace const&, + MemorySpace const&, + MemoryPool const& + ) + // requires Same + // && Same + // && Same + { + static_assert( + std::is_same::value + && std::is_same::value + && std::is_same::value, + "Type mismatch in task_queue_allocation_size customization point" + ); + + return sizeof(Derived); + } + + // end Scheduling }}}2 + //---------------------------------------------------------------------------- + +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUECOMMON_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp new file mode 100644 index 0000000000..c3ed1d6c71 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMemoryManager.hpp @@ -0,0 +1,251 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP +#define KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template < + class ExecSpace, + class MemorySpace, + class MemoryPool = Kokkos::MemoryPool> +> +class TaskQueueMemoryManager + : public TaskQueueBase +{ +public: + + using execution_space = ExecSpace; + using memory_space = MemorySpace; + using device_type = Kokkos::Device; + using memory_pool = MemoryPool; + using allocation_size_type = size_t; + +private: + + memory_pool m_pool; + // TODO @tasking @generalization DSH re-enable this with a flag in the type + //long m_accum_alloc = 0; + int m_count_alloc = 0; + int m_max_alloc = 0; + + struct _allocation_result { + bool success; + void* pointer; + }; + + KOKKOS_INLINE_FUNCTION + _allocation_result + _do_pool_allocate(allocation_size_type requested_size) { + // KOKKOS_EXPECTS(requested_size >= 0); generates a warning when allocation_size_type is unsigned + if(requested_size == 0 ) { + return { true, nullptr }; + } + else { + void* data = m_pool.allocate(static_cast(requested_size)); + + //Kokkos::atomic_increment(&m_accum_alloc); // memory_order_relaxed + Kokkos::atomic_increment(&m_count_alloc); // memory_order_relaxed + // TODO @tasking @minor DSH make this thread safe? (otherwise, it's just an approximation, which is probably fine...) + if(m_max_alloc < m_count_alloc) m_max_alloc = m_count_alloc; + + return { data != nullptr, data }; + } + } + + template + KOKKOS_INLINE_FUNCTION + T* + _do_contruct(void* allocated, allocation_size_type allocated_size, Args&&... args) { + + static_assert( + std::is_base_of, T>::value, + "TaskQueueMemoryManager can only allocate objects with PoolAllocatedObjectBase base class" + ); + + // TODO @tasking DSH figure out why this isn't working + //static_assert( + // std::is_constructible::value, + // "TaskQueueMemoryManager can't construct object of the requested type from the " + // " allocation size and the given arguments" + //); + + + auto rv = new (allocated) T( + std::forward(args)..., + allocated_size + ); + + // It feels like there should be a way to check this at compile-time + KOKKOS_ASSERT( + (intptr_t)(rv) == (intptr_t)(static_cast*>(rv)) + && "PoolAllocatedObjectBase must be the first base class of the allocated type" + ); + + return rv; + + } + + +public: + + explicit + TaskQueueMemoryManager(memory_pool const& pool) + : m_pool(pool) + { } + + + template + KOKKOS_FUNCTION + T* + allocate_and_construct(Args&&... args) + // requires + // std::is_base_of_v, T> + // && std::is_constructible_v + { + constexpr auto allocation_size = sizeof(T); + + + auto result = _do_pool_allocate(allocation_size); + + KOKKOS_ASSERT(result.success && "Memory allocation failure"); + + auto rv = _do_contruct(result.pointer, allocation_size, std::forward(args)...); + + KOKKOS_ENSURES(intptr_t(rv) % alignof(T) == 0 && "alignment not preserved!"); + + return rv; + } + + template + KOKKOS_INLINE_FUNCTION + T* + allocate_and_construct_with_vla_emulation( + allocation_size_type n_vla_entries, + Args&&... args + ) + // requires + // std::is_base_of_v, T> + // && std::is_base_of, T>::value + // && std::is_constructible_v + { + + + static_assert( + std::is_base_of, T>::value, + "Can't append emulated variable length array of type with greater alignment than" + " the type to which the VLA is being appended" + ); + + using vla_emulation_base = ObjectWithVLAEmulation; + + auto const allocation_size = vla_emulation_base::required_allocation_size(n_vla_entries); + auto result = _do_pool_allocate(allocation_size); + + KOKKOS_ASSERT(result.success && "Memory allocation failure"); + + auto rv = _do_contruct(result.pointer, allocation_size, std::forward(args)...); + + KOKKOS_ENSURES(intptr_t(rv) % alignof(T) == 0); + + return rv; + } + + template + KOKKOS_INLINE_FUNCTION + void deallocate(PoolAllocatedObjectBase&& obj) + { + m_pool.deallocate((void*)&obj, 1); + Kokkos::atomic_decrement(&m_count_alloc); // memory_order_relaxed + } + + KOKKOS_INLINE_FUNCTION + memory_pool& get_memory_pool() { return m_pool; } + KOKKOS_INLINE_FUNCTION + memory_pool const& get_memory_pool() const { return m_pool; } + + KOKKOS_INLINE_FUNCTION + int allocation_count() const noexcept { return m_count_alloc; } +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//////////////////////////////////////////////////////////////////////////////// +// END OLD CODE +//////////////////////////////////////////////////////////////////////////////// + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMEMORYMANAGER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp new file mode 100644 index 0000000000..17c357ff31 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple.hpp @@ -0,0 +1,286 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP +#define KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename ExecSpace, typename MemorySpace = typename ExecSpace::memory_space > +class LeagueQueueCollection; + +template +class TaskQueueMultiple : public TaskQueue { +private: + + using base_t = TaskQueue; + using queue_collection_t = LeagueQueueCollection; + + int m_league_rank = static_cast(KOKKOS_INVALID_INDEX); + + // This pointer is owning only if m_league_rank == 0 + queue_collection_t* m_other_queues = nullptr; + + +public: + + struct Destroy { + TaskQueueMultiple* m_queue ; + void destroy_shared_allocation(); + }; + + + using team_queue_type = TaskQueueMultiple; + + TaskQueueMultiple( + int arg_league_rank, + queue_collection_t* arg_other_queues, + typename base_t::memory_pool const& arg_memory_pool + ) + : base_t(arg_memory_pool), + m_league_rank(arg_league_rank), + m_other_queues(arg_other_queues) + { } + + explicit TaskQueueMultiple( + typename base_t::memory_pool const& arg_memory_pool + ) + : base_t(arg_memory_pool), + m_league_rank(0) + { + void* other_queues_buffer = typename base_t::memory_space{}.allocate(sizeof(queue_collection_t)); + m_other_queues = new(other_queues_buffer) queue_collection_t(this); + } + + ~TaskQueueMultiple() { + if(m_league_rank == 0 && m_other_queues != nullptr) { + m_other_queues->~queue_collection_t(); + typename base_t::memory_space{}.deallocate(m_other_queues, sizeof(queue_collection_t)); + } + // rest of destruction is handled in the base class + } + + //---------------------------------------- + + void initialize_team_queues(int arg_league_size) const noexcept { + m_other_queues->initialize_team_queues(arg_league_size, this->m_memory); + } + + KOKKOS_INLINE_FUNCTION + team_queue_type& get_team_queue(int arg_league_rank) noexcept { + if(arg_league_rank == m_league_rank) return *this; + else return m_other_queues->get_team_queue(arg_league_rank); + } + + KOKKOS_INLINE_FUNCTION + typename base_t::task_root_type* + attempt_to_steal_task() noexcept { + TaskBase* rv = nullptr; + auto* const end_tag = reinterpret_cast(TaskBase::EndTag); + + if (m_other_queues == nullptr) { + Kokkos::abort("attempted to steal task before queues were initialized!"); + } + + // Loop by priority and then type, and then team + for ( int i = 0 ; i < base_t::NumQueue; ++i ) { + for ( int j = 0 ; j < 2; ++j ) { + // for now, always start by trying to steal from team zero + for(int iteam = 0; iteam < m_other_queues->size(); ++iteam) { + if(iteam == m_league_rank) continue; + auto& steal_from = get_team_queue(iteam); + if( *((volatile int *) & steal_from.m_ready_count) > 0 ) { + // we've found at least one queue that's not done, so even if we can't + // pop something off of it we shouldn't return a nullptr indicating + // completion. rv will be end_tag when the pop fails + rv = base_t::pop_ready_task(&steal_from.m_ready[i][j]); + if(rv != end_tag) { + // task stolen. + // first increment our ready count, then decrement the ready count + // on the other queue: + Kokkos::atomic_increment(&this->m_ready_count); + Kokkos::atomic_decrement(&steal_from.m_ready_count); + return rv; + } + } + } + } + } + + // at this point, rv will only be nullptr if *all* of the queues had an + // m_ready_count of 0. This indicates quiescence. If at least some of them + // had non-zero, there would have been at least one pop_ready_task that + // was called and returned end_tag if it couldn't pop a task + return rv; + } + + +}; + +template +class LeagueQueueCollection { +private: + + using execution_space = ExecSpace; + using memory_space = MemorySpace; + using device_type = Kokkos::Device; + using memory_pool = Kokkos::MemoryPool; + using team_queue_type = TaskQueueMultiple; + using team_scheduler_type = BasicTaskScheduler; + using specialization = TaskQueueSpecialization; + + enum : long { max_num_queues = 6 }; //specialization::max_league_size }; + + // this is a non-owning pointer + team_queue_type* m_rank_zero_queue = nullptr; + // This really needs to be an optional> + union optional_queue { + KOKKOS_INLINE_FUNCTION + optional_queue() : uninitialized(0) { } + KOKKOS_INLINE_FUNCTION + ~optional_queue() { uninitialized = 0; } + char uninitialized; + team_queue_type initialized; + } m_queues[max_num_queues]; + int m_size = static_cast(KOKKOS_INVALID_INDEX); + +public: + + LeagueQueueCollection() = delete; + LeagueQueueCollection(LeagueQueueCollection const&) = delete; + LeagueQueueCollection(LeagueQueueCollection&&) = delete; + LeagueQueueCollection& operator=(LeagueQueueCollection const&) = delete; + LeagueQueueCollection& operator=(LeagueQueueCollection&&) = delete; + + ~LeagueQueueCollection() { + // destroy only the initialized queues that we own + for(int iteam = 0; iteam < m_size - 1; ++iteam) { + m_queues[iteam].initialized.~team_queue_type(); + m_queues[iteam].uninitialized = 0; + } + } + + KOKKOS_INLINE_FUNCTION + explicit LeagueQueueCollection( + team_queue_type* arg_rank_zero_queue + ) : m_rank_zero_queue(arg_rank_zero_queue), + m_size(1) + { } + + void initialize_team_queues( + int arg_count, memory_pool const& arg_memory_pool + ) noexcept + { + arg_count = std::min((int)max_num_queues, arg_count); + //assert(arg_count <= max_num_queues); + if(arg_count > m_size) { + for(int i = m_size; i < arg_count; ++i) { + new(&m_queues[i-1].initialized) team_queue_type(i, this, arg_memory_pool); + } + m_size = arg_count; + } + } + + KOKKOS_INLINE_FUNCTION + constexpr int size() const noexcept { return m_size; } + + KOKKOS_INLINE_FUNCTION + constexpr bool initialized() const noexcept { return m_size != int(KOKKOS_INVALID_INDEX); } + + KOKKOS_INLINE_FUNCTION + team_queue_type& get_team_queue(int iteam) { + iteam %= max_num_queues; + #if !defined(__HCC_ACCELERATOR__) && !defined(__CUDA_ARCH__) + assert(initialized()); + assert(iteam < m_size); + assert(iteam >= 0); + #endif + if(iteam == 0) return *m_rank_zero_queue; + else return m_queues[iteam-1].initialized; + } + +}; + + +} /* namespace Impl */ +} /* namespace Kokkos */ + + + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#include + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp new file mode 100644 index 0000000000..81bcc96831 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueueMultiple_impl.hpp @@ -0,0 +1,72 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP +#define KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include + +#define KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING_MULTIPLE 0 + +namespace Kokkos { +namespace Impl { + +template +void TaskQueueMultiple::Destroy::destroy_shared_allocation() { +// KOKKOS WORKAROUND for CUDA 10.1 with GCC 7.3.0 +#if(KOKKOS_COMPILER_CUDA_VERSION==101) && defined(KOKKOS_COMPILER_NVCC) && (KOKKOS_COMPILER_GNU>=730) + (*m_queue).get_team_queue(0).~TaskQueueMultiple(); +#else + m_queue->get_team_queue(0).~TaskQueueMultiple(); +#endif +} + +} /* namespace Impl */ +} /* namespace Kokkos */ + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUEMULTIPLE_IMPL_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp index 5bcf672ff6..b5f8db0085 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -41,6 +41,8 @@ //@HEADER */ +#ifndef KOKKOS_IMPL_TASKQUEUE_IMPL_HPP +#define KOKKOS_IMPL_TASKQUEUE_IMPL_HPP #include #if defined( KOKKOS_ENABLE_TASKDAG ) @@ -51,22 +53,22 @@ namespace Impl { //---------------------------------------------------------------------------- -template< typename ExecSpace > -void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation() +template< typename ExecSpace, typename MemorySpace > +void TaskQueue< ExecSpace, MemorySpace >::Destroy::destroy_shared_allocation() { m_queue->~TaskQueue(); } //---------------------------------------------------------------------------- -template< typename ExecSpace > -TaskQueue< ExecSpace >::TaskQueue - ( typename TaskQueue< ExecSpace >::memory_pool const & arg_memory_pool ) +template< typename ExecSpace, typename MemorySpace> +TaskQueue< ExecSpace, MemorySpace>::TaskQueue + ( typename TaskQueue< ExecSpace, MemorySpace>::memory_pool const & arg_memory_pool ) : m_memory( arg_memory_pool ) , m_ready() - , m_accum_alloc(0) - , m_count_alloc(0) - , m_max_alloc(0) + //, m_accum_alloc(0) + //, m_count_alloc(0) + //, m_max_alloc(0) , m_ready_count(0) { for ( int i = 0 ; i < NumQueue ; ++i ) { @@ -77,8 +79,8 @@ TaskQueue< ExecSpace >::TaskQueue //---------------------------------------------------------------------------- -template< typename ExecSpace > -TaskQueue< ExecSpace >::~TaskQueue() +template< typename ExecSpace, typename MemorySpace> +TaskQueue< ExecSpace, MemorySpace>::~TaskQueue() { // Verify that queues are empty and ready count is zero @@ -97,10 +99,10 @@ TaskQueue< ExecSpace >::~TaskQueue() //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::decrement - ( TaskQueue< ExecSpace >::task_root_type * task ) +void TaskQueue< ExecSpace, MemorySpace>::decrement + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * task ) { task_root_type volatile & t = *task ; @@ -121,8 +123,13 @@ void TaskQueue< ExecSpace >::decrement ( t.m_next == (task_root_type *) task_root_type::LockTag ) ) { // Reference count is zero and task is complete, deallocate. - TaskQueue< ExecSpace > * const queue = - static_cast< TaskQueue< ExecSpace > * >( t.m_queue ); + //TaskQueue< ExecSpace, MemorySpace> * const queue = + // static_cast( t.m_scheduler )->m_queue; + auto* const volatile queue = static_cast(t.m_queue); + + // TODO @tasking @minor DSH this should call the destructor for a non-trivially destructible type (possibly just ignore this in the old version, though?) + // (Can't just do this; it needs to be queued since it's device code + // if(task->m_destroy) task->m_destroy(task); queue->deallocate( task , t.m_alloc_size ); } @@ -133,32 +140,32 @@ void TaskQueue< ExecSpace >::decrement //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n ) +size_t TaskQueue< ExecSpace, MemorySpace>::allocate_block_size( size_t n ) { return m_memory.allocate_block_size( n ); } -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void * TaskQueue< ExecSpace >::allocate( size_t n ) +void * TaskQueue< ExecSpace, MemorySpace>::allocate( size_t n ) { void * const p = m_memory.allocate(n); if ( p ) { - Kokkos::atomic_increment( & m_accum_alloc ); + //Kokkos::atomic_increment( & m_accum_alloc ); Kokkos::atomic_increment( & m_count_alloc ); - if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ; + //if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ; } return p ; } -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::deallocate( void * p , size_t n ) +void TaskQueue< ExecSpace, MemorySpace>::deallocate( void * p , size_t n ) { m_memory.deallocate( p , n ); Kokkos::atomic_decrement( & m_count_alloc ); @@ -166,11 +173,11 @@ void TaskQueue< ExecSpace >::deallocate( void * p , size_t n ) //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -bool TaskQueue< ExecSpace >::push_task - ( TaskQueue< ExecSpace >::task_root_type * volatile * const queue - , TaskQueue< ExecSpace >::task_root_type * const task +bool TaskQueue< ExecSpace, MemorySpace>::push_task + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * volatile * const queue + , TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task ) { // Push task into a concurrently pushed and popped queue. @@ -200,20 +207,29 @@ bool TaskQueue< ExecSpace >::push_task Kokkos::abort("TaskQueue::push_task ERROR: already a member of another queue" ); } - task_root_type * y = *queue ; + // store the head of the queue + task_root_type * old_head = *queue ; - while ( lock != y ) { + while ( old_head != lock ) { - next = y ; + // set task->next to the head of the queue + next = old_head; // Do not proceed until 'next' has been stored. Kokkos::memory_fence(); - task_root_type * const x = y ; + // store the old head + task_root_type * const old_head_tmp = old_head; - y = Kokkos::atomic_compare_exchange(queue,y,task); + // attempt to swap task with the old head of the queue + // as if this were done atomically: + // if(*queue == old_head) { + // *queue = task; + // } + // old_head = *queue; + old_head = Kokkos::atomic_compare_exchange(queue, old_head, task); - if ( x == y ) return true ; + if(old_head_tmp == old_head) return true; } // Failed, replace 'task->m_next' value since 'task' remains @@ -229,11 +245,11 @@ bool TaskQueue< ExecSpace >::push_task //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -typename TaskQueue< ExecSpace >::task_root_type * -TaskQueue< ExecSpace >::pop_ready_task - ( TaskQueue< ExecSpace >::task_root_type * volatile * const queue ) +typename TaskQueue< ExecSpace, MemorySpace>::task_root_type * +TaskQueue< ExecSpace, MemorySpace>::pop_ready_task + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * volatile * const queue ) { // Pop task from a concurrently pushed and popped ready task queue. // The queue is a linked list where 'task->m_next' form the links. @@ -280,6 +296,10 @@ TaskQueue< ExecSpace >::pop_ready_task task_root_type * volatile & next = task->m_next ; + // This algorithm is not lockfree because a adversarial scheduler could + // context switch this thread at this point and the rest of the threads + // calling this method would never make forward progress + *queue = next ; next = lock ; Kokkos::memory_fence(); @@ -304,10 +324,10 @@ TaskQueue< ExecSpace >::pop_ready_task //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::schedule_runnable - ( TaskQueue< ExecSpace >::task_root_type * const task ) +void TaskQueue< ExecSpace, MemorySpace>::schedule_runnable + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task ) { // Schedule a runnable task upon construction / spawn // and upon completion of other tasks that 'task' is waiting on. @@ -389,6 +409,8 @@ void TaskQueue< ExecSpace >::schedule_runnable Kokkos::memory_fence(); + // If we don't have a dependency, or if pushing onto the wait queue of that dependency + // failed (since the only time that queue should be locked is when the task is transitioning to complete??!?) const bool is_ready = ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); @@ -431,10 +453,10 @@ void TaskQueue< ExecSpace >::schedule_runnable // from a queue and processed it as appropriate. } -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::schedule_aggregate - ( TaskQueue< ExecSpace >::task_root_type * const task ) +void TaskQueue< ExecSpace, MemorySpace>::schedule_aggregate + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * const task ) { // Schedule an aggregate task upon construction // and upon completion of other tasks that 'task' is waiting on. @@ -556,9 +578,9 @@ void TaskQueue< ExecSpace >::schedule_aggregate //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::reschedule( task_root_type * task ) +void TaskQueue< ExecSpace, MemorySpace>::reschedule( task_root_type * task ) { // Precondition: // task is in Executing state @@ -578,10 +600,10 @@ void TaskQueue< ExecSpace >::reschedule( task_root_type * task ) //---------------------------------------------------------------------------- -template< typename ExecSpace > +template< typename ExecSpace, typename MemorySpace> KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::complete - ( TaskQueue< ExecSpace >::task_root_type * task ) +void TaskQueue< ExecSpace, MemorySpace>::complete + ( TaskQueue< ExecSpace, MemorySpace>::task_root_type * task ) { // Complete a runnable task that has finished executing // or a when_all task when all of its dependeneces are complete. @@ -679,4 +701,5 @@ void TaskQueue< ExecSpace >::complete } /* namespace Kokkos */ #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_IMPL_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp new file mode 100644 index 0000000000..d45ebff00b --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskResult.hpp @@ -0,0 +1,151 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// Experimental unified task-data parallel manycore LDRD + +#ifndef KOKKOS_IMPL_TASKRESULT_HPP +#define KOKKOS_IMPL_TASKRESULT_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include + +#include +#include + +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename ResultType > +struct TaskResult { + + enum : int32_t { size = sizeof(ResultType) }; + + using reference_type = ResultType & ; + + template + KOKKOS_INLINE_FUNCTION static + ResultType * ptr( PoolAllocatedObjectBase* task ) + { + return reinterpret_cast< ResultType * > + ( reinterpret_cast< char * >(task) + task->get_allocation_size() - sizeof(ResultType) ); + } + + KOKKOS_INLINE_FUNCTION static + ResultType * ptr( TaskBase* task ) + { + return reinterpret_cast< ResultType * > + ( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) ); + } + + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskBase* task ) + { return *ptr( task ); } + + template + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskNode* task ) + { return *ptr( task ); } + + KOKKOS_INLINE_FUNCTION static + void destroy( TaskBase* task ) + { get(task).~ResultType(); } + + + //template + //KOKKOS_INLINE_FUNCTION static + //void destroy( TaskNode* task ) + //{ get(task).~ResultType(); } +}; + +template<> +struct TaskResult< void > { + + enum : int32_t { size = 0 }; + + using reference_type = void ; + + template + KOKKOS_INLINE_FUNCTION static + void* ptr( TaskNode* task ) + { return nullptr; } + + KOKKOS_INLINE_FUNCTION static + void * ptr( TaskBase* ) { return (void*) nullptr ; } + + template + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskNode* task ) + { /* Should never be called */ } + + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskBase* ) {} + + KOKKOS_INLINE_FUNCTION static + void destroy( TaskBase* task ) + { } + + //template + //KOKKOS_INLINE_FUNCTION static + //void destroy( TaskNode* task ) + //{ } +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_TASKRESULT_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp new file mode 100644 index 0000000000..4bf3f4fa94 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_TaskTeamMember.hpp @@ -0,0 +1,135 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TASKTEAMMEMBER_HPP +#define KOKKOS_TASKTEAMMEMBER_HPP + +//---------------------------------------------------------------------------- + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + +#include +#include +//---------------------------------------------------------------------------- + +#include +#include + +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +class TaskTeamMemberAdapter : public TeamMember { +private: + + Scheduler m_scheduler; + +public: + + //---------------------------------------- + + // Forward everything but the Scheduler to the constructor of the TeamMember + // type that we're adapting + template + KOKKOS_INLINE_FUNCTION + explicit TaskTeamMemberAdapter( + typename std::enable_if< + std::is_constructible::value, + Scheduler + >::type arg_scheduler, + Args&&... args + ) // TODO @tasking @minor DSH noexcept specification + : TeamMember(std::forward(args)...), + m_scheduler(std::move(arg_scheduler).get_team_scheduler(this->league_rank())) + { } + + // (rule of 6 constructors) + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter() = default; + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter(TaskTeamMemberAdapter const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter(TaskTeamMemberAdapter&&) = default; + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter const&) = default; + + KOKKOS_INLINE_FUNCTION + TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter&&) = default; + + KOKKOS_INLINE_FUNCTION ~TaskTeamMemberAdapter() = default; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + Scheduler const& scheduler() const noexcept { return m_scheduler; } + + KOKKOS_INLINE_FUNCTION + Scheduler& scheduler() noexcept { return m_scheduler; } + + //---------------------------------------- + +}; + +} // end namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_TASKTEAMMEMBER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp index 475a696719..a5af82838f 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp @@ -483,6 +483,54 @@ struct is_integral_constant< integral_constant > : public true_ enum { integral_value = v }; }; +//---------------------------------------------------------------------------- + +template +class TypeList; + +//---------------------------------------------------------------------------- + +template +struct ReverseTypeList; + +template +struct ReverseTypeList> { + template + struct impl { + using type = typename ReverseTypeList>::template impl::type; + }; + using type = typename impl<>::type; +}; + +template <> +struct ReverseTypeList> { + template + struct impl { + using type = TypeList; + }; + using type = TypeList<>; +}; + +//---------------------------------------------------------------------------- + +template +struct make_all_extents_into_pointers +{ + using type = T; +}; + +template +struct make_all_extents_into_pointers +{ + using type = typename make_all_extents_into_pointers::type*; +}; + +template +struct make_all_extents_into_pointers +{ + using type = typename make_all_extents_into_pointers::type*; +}; + } // namespace Impl } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp b/lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp new file mode 100644 index 0000000000..48e1851e60 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_VLAEmulation.hpp @@ -0,0 +1,295 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_VLAEMULATION_HPP +#define KOKKOS_IMPL_VLAEMULATION_HPP + +#include +#if defined( KOKKOS_ENABLE_TASKDAG ) + + +#include + +#include // KOKKOS_EXPECTS + +#include // std::is_abstract<>, ... + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template < + class Derived, + class VLAValueType, + class EntryCountType = int32_t +> +struct ObjectWithVLAEmulation; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +/** @brief Attorney to enable private CRTP inheritance from ObjectWithVLAEmulation + */ +struct VLAEmulationAccess { +private: + + template + friend struct ObjectWithVLAEmulation; + + template + KOKKOS_FORCEINLINE_FUNCTION + static constexpr Derived* + _cast_to_derived(ObjectWithVLAEmulation* base) noexcept + { + return static_cast(base); + } + + template + KOKKOS_FORCEINLINE_FUNCTION + static constexpr Derived const* + _cast_to_derived(ObjectWithVLAEmulation const* base) noexcept + { + return static_cast(base); + } + +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +/** \brief A CRTP base class for a type that includes a variable-length array by allocation + * + * The storage for the derived type must be allocated manually and the objects + * (both derived type and VLA objects) must be constructed with placement new. + * Obviously, this can't be done for objects on the stack. + * + * Note: Though most uses of this currently delete the copy and move constructor + * in the `Derived` type, this type is intended to have value semantics. + * + * \todo @documentation elaborate on implications of value semantics for this class template + * + */ +template < + class Derived, + class VLAValueType, + class EntryCountType /* = int32_t */ +> +struct ObjectWithVLAEmulation { +public: + + using object_type = Derived; + using vla_value_type = VLAValueType; + using vla_entry_count_type = EntryCountType; + + using iterator = VLAValueType*; + using const_iterator = typename std::add_const::type*; + + + // TODO @tasking @minor DSH require that Derived be marked final? (note that std::is_final is C++14) + // TODO @tasking @minor DSH delete non-placement operator new for Derived type? + +private: + + vla_entry_count_type m_num_entries; + + // CRTP boilerplate + + KOKKOS_FORCEINLINE_FUNCTION + /* KOKKOS_CONSTEXPR_14 */ + Derived* _this() noexcept { return VLAEmulationAccess::_cast_to_derived(this); } + + KOKKOS_FORCEINLINE_FUNCTION + /* KOKKOS_CONSTEXPR_14 */ + Derived const* _this() const noexcept { return VLAEmulationAccess::_cast_to_derived(this); } + + // Note: can't be constexpr because of reinterpret_cast + KOKKOS_FORCEINLINE_FUNCTION + /* KOKKOS_CONSTEXPR_14 */ + vla_value_type* _vla_pointer() noexcept { + // The data starts right after the aligned storage of Derived + return reinterpret_cast(_this() + 1); + } + + // Note: can't be constexpr because of reinterpret_cast + KOKKOS_FORCEINLINE_FUNCTION + /* KOKKOS_CONSTEXPR_14 */ + vla_value_type const* _vla_pointer() const noexcept { + // The data starts right after the aligned storage of Derived + return reinterpret_cast(_this() + 1); + } + +public: + + KOKKOS_INLINE_FUNCTION + static /* KOKKOS_CONSTEXPR_14 */ size_t + required_allocation_size(vla_entry_count_type num_vla_entries) { + KOKKOS_EXPECTS(num_vla_entries >= 0); + return sizeof(Derived) + num_vla_entries * sizeof(VLAValueType); + } + + //---------------------------------------------------------------------------- + // {{{2 + + // TODO @tasking @optimization DSH specialization for trivially constructible VLAValueType? + // TODO @tasking @minor DSH SFINAE-out this constructor for non-default contructible vla_value_types + KOKKOS_INLINE_FUNCTION + explicit + ObjectWithVLAEmulation(vla_entry_count_type num_entries) + noexcept(noexcept(vla_value_type())) + : m_num_entries(num_entries) + { + // Note: We can't do this at class scope because it unnecessarily requires + // object_type to be a complete type + static_assert( + alignof(object_type) >= alignof(vla_value_type), + "Can't append emulated variable length array of type with greater alignment than" + " the type to which the VLA is being appended" + ); + + // Note: We can't do this at class scope because it unnecessarily requires + // vla_value_type to be a complete type + static_assert( + not std::is_abstract::value, + "Can't use abstract type with VLA emulation" + ); + + KOKKOS_EXPECTS(num_entries >= 0); + for(vla_entry_count_type i = 0; i < m_num_entries; ++i) { + new (_vla_pointer() + i) vla_value_type(); + } + } + + KOKKOS_INLINE_FUNCTION + ~ObjectWithVLAEmulation() + noexcept(noexcept(std::declval().~vla_value_type())) + { + for(auto&& value : *this) { value.~vla_value_type(); } + } + + // TODO @tasking @new_feature DSH constrained analogs for move and copy ctors and assignment ops + // TODO @tasking @new_feature DSH forwarding in_place constructor + // TODO @tasking @new_feature DSH initializer_list constructor? + + // end Constructors, destructor, and assignment }}}2 + //---------------------------------------------------------------------------- + + + KOKKOS_INLINE_FUNCTION + constexpr EntryCountType n_vla_entries() const noexcept { return m_num_entries; } + + + //---------------------------------------------------------------------------- + // {{{2 + + KOKKOS_INLINE_FUNCTION + object_type& object() & { return static_cast(*this); } + + KOKKOS_INLINE_FUNCTION + object_type const& object() const & { return static_cast(*this); } + + KOKKOS_INLINE_FUNCTION + object_type&& object() && { return static_cast(*this); } + + + KOKKOS_INLINE_FUNCTION + vla_value_type& vla_value_at(vla_entry_count_type n) & + { + KOKKOS_EXPECTS(n < n_vla_entries()); + return _vla_pointer()[n]; + } + + KOKKOS_INLINE_FUNCTION + vla_value_type const& vla_value_at(vla_entry_count_type n) const & + { + KOKKOS_EXPECTS(n < n_vla_entries()); + return _vla_pointer()[n]; + } + + KOKKOS_INLINE_FUNCTION + vla_value_type& vla_value_at(vla_entry_count_type n) && + { + KOKKOS_EXPECTS(n < n_vla_entries()); + return _vla_pointer()[n]; + } + + // end Accessing the object and the VLA values }}}2 + //---------------------------------------------------------------------------- + + + //---------------------------------------------------------------------------- + // {{{2 + + KOKKOS_INLINE_FUNCTION + iterator begin() noexcept { return _vla_pointer(); } + + KOKKOS_INLINE_FUNCTION + const_iterator begin() const noexcept { return _vla_pointer(); } + + KOKKOS_INLINE_FUNCTION + const_iterator cbegin() noexcept { return _vla_pointer(); } + + KOKKOS_INLINE_FUNCTION + iterator end() noexcept { return _vla_pointer() + m_num_entries; } + + KOKKOS_INLINE_FUNCTION + const_iterator end() const noexcept { return _vla_pointer() + m_num_entries; } + + KOKKOS_INLINE_FUNCTION + const_iterator cend() noexcept { return _vla_pointer() + m_num_entries; } + + // end Iterators }}}2 + //---------------------------------------------------------------------------- + +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ +#endif /* #ifndef KOKKOS_IMPL_VLAEMULATION_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp index e1539d10b0..07774da279 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp @@ -367,6 +367,8 @@ public: // Can only convert to View::array_type + enum { is_assignable_data_type = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value && + (DstTraits::rank==SrcTraits::rank+1)}; enum { is_assignable = std::is_same< typename DstTraits::data_type , typename SrcTraits::scalar_array_type >::value && std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value }; diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index 773f336281..b2d8dea20a 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -275,7 +276,7 @@ struct ALL_t { constexpr const ALL_t & operator()() const { return *this ; } KOKKOS_INLINE_FUNCTION - constexpr bool operator == ( const ALL_t & right) const { return true;} + constexpr bool operator == ( const ALL_t & ) const { return true;} }; }} // namespace Kokkos::Impl @@ -1548,7 +1549,7 @@ struct ViewOffset< Dimension , Kokkos::LayoutRight template< class DimRHS > KOKKOS_INLINE_FUNCTION constexpr ViewOffset - ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & rhs + ( const ViewOffset< DimRHS , Kokkos::LayoutRight , void > & , const SubviewExtents< DimRHS::rank , dimension_type::rank > & sub ) : m_dim( sub.range_extent(0) , 0, 0, 0, 0, 0, 0, 0 ) @@ -2319,7 +2320,7 @@ struct ViewDataHandle< Traits , && std::is_same< typename Traits::specialize , void >::value && - Traits::memory_traits::Atomic + Traits::memory_traits::is_atomic )>::type > { typedef typename Traits::value_type value_type ; @@ -2348,16 +2349,16 @@ struct ViewDataHandle< Traits , typename std::enable_if<( std::is_same< typename Traits::specialize , void >::value && - (!Traits::memory_traits::Aligned) + (!Traits::memory_traits::is_aligned) && - Traits::memory_traits::Restrict + Traits::memory_traits::is_restrict #ifdef KOKKOS_ENABLE_CUDA && (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) #endif && - (!Traits::memory_traits::Atomic) + (!Traits::memory_traits::is_atomic) )>::type > { typedef typename Traits::value_type value_type ; @@ -2366,17 +2367,17 @@ struct ViewDataHandle< Traits , typedef Kokkos::Impl::SharedAllocationTracker track_type ; KOKKOS_INLINE_FUNCTION - static handle_type assign( value_type * arg_data_ptr + static value_type* assign( value_type * arg_data_ptr , track_type const & /*arg_tracker*/ ) { - return handle_type( arg_data_ptr ); + return (value_type*)( arg_data_ptr ); } KOKKOS_INLINE_FUNCTION - static handle_type assign( handle_type const arg_data_ptr + static value_type* assign( handle_type const arg_data_ptr , size_t offset ) { - return handle_type( arg_data_ptr + offset ); + return (value_type*)( arg_data_ptr + offset ); } }; @@ -2385,16 +2386,16 @@ struct ViewDataHandle< Traits , typename std::enable_if<( std::is_same< typename Traits::specialize , void >::value && - Traits::memory_traits::Aligned + Traits::memory_traits::is_aligned && - (!Traits::memory_traits::Restrict) + (!Traits::memory_traits::is_restrict) #ifdef KOKKOS_ENABLE_CUDA && (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) #endif && - (!Traits::memory_traits::Atomic) + (!Traits::memory_traits::is_atomic) )>::type > { typedef typename Traits::value_type value_type ; @@ -2428,16 +2429,16 @@ struct ViewDataHandle< Traits , typename std::enable_if<( std::is_same< typename Traits::specialize , void >::value && - Traits::memory_traits::Aligned + Traits::memory_traits::is_aligned && - Traits::memory_traits::Restrict + Traits::memory_traits::is_restrict #ifdef KOKKOS_ENABLE_CUDA && (!( std::is_same< typename Traits::memory_space,Kokkos::CudaSpace>::value || std::is_same< typename Traits::memory_space,Kokkos::CudaUVMSpace>::value )) #endif && - (!Traits::memory_traits::Atomic) + (!Traits::memory_traits::is_atomic) )>::type > { typedef typename Traits::value_type value_type ; @@ -2446,23 +2447,23 @@ struct ViewDataHandle< Traits , typedef Kokkos::Impl::SharedAllocationTracker track_type ; KOKKOS_INLINE_FUNCTION - static handle_type assign( value_type * arg_data_ptr + static value_type* assign( value_type * arg_data_ptr , track_type const & /*arg_tracker*/ ) { if ( reinterpret_cast(arg_data_ptr) % Impl::MEMORY_ALIGNMENT ) { Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute"); } - return handle_type( arg_data_ptr ); + return (value_type*)( arg_data_ptr ); } KOKKOS_INLINE_FUNCTION - static handle_type assign( handle_type const arg_data_ptr + static value_type* assign( handle_type const arg_data_ptr , size_t offset ) { if ( reinterpret_cast(arg_data_ptr+offset) % Impl::MEMORY_ALIGNMENT ) { Kokkos::abort("Assigning NonAligned View or Pointer to Kokkos::View with Aligned attribute"); } - return handle_type( arg_data_ptr + offset ); + return (value_type*)( arg_data_ptr + offset ); } }; }} // namespace Kokkos::Impl @@ -2955,7 +2956,8 @@ private: }; public: - + enum { is_assignable_data_type = is_assignable_value_type && + is_assignable_dimension }; enum { is_assignable = is_assignable_space && is_assignable_value_type && is_assignable_dimension && @@ -3052,7 +3054,8 @@ private: , typename SrcTraits::dimension >::value }; public: - + enum { is_assignable_data_type = is_assignable_value_type && + is_assignable_dimension }; enum { is_assignable = is_assignable_space && is_assignable_value_type && is_assignable_dimension }; @@ -3062,7 +3065,7 @@ public: typedef ViewMapping< SrcTraits , void > SrcType ; KOKKOS_INLINE_FUNCTION - static bool assignable_layout_check(DstType & dst, const SrcType & src) //Runtime check + static bool assignable_layout_check(DstType &, const SrcType & src) //Runtime check { size_t strides[9]; bool assignable = true; @@ -3134,6 +3137,73 @@ public: // Subview mapping. // Deduce destination view type from source view traits and subview arguments +template +struct SubViewDataTypeImpl; + +/* base case */ +template +struct SubViewDataTypeImpl< + void, + ValueType, + Experimental::Extents<> +> +{ using type = ValueType; }; + +/* for integral args, subview doesn't have that dimension */ +template +struct SubViewDataTypeImpl< + typename std::enable_if::type>::value>::type, + ValueType, + Experimental::Extents, + Integral, Args... +> : SubViewDataTypeImpl< + void, ValueType, + Experimental::Extents, + Args... + > +{ }; + + +/* for ALL slice, subview has the same dimension */ +template +struct SubViewDataTypeImpl< + void, + ValueType, + Experimental::Extents, + ALL_t, Args... +> : SubViewDataTypeImpl< + void, typename ApplyExtent::type, + Experimental::Extents, + Args... + > +{ }; + + +/* for pair-style slice, subview has dynamic dimension, since pair doesn't give static sizes */ +/* Since we don't allow interleaving of dynamic and static extents, make all of the dimensions to the left dynamic */ +template +struct SubViewDataTypeImpl< + typename std::enable_if::value>::type, + ValueType, + Experimental::Extents, + PairLike, Args... +> : SubViewDataTypeImpl< + void, typename make_all_extents_into_pointers::type*, + Experimental::Extents, + Args... + > +{ }; + + +template +struct SubViewDataType + : SubViewDataTypeImpl< + void, ValueType, Exts, Args... + > +{ }; + +//---------------------------------------------------------------------------- + template< class SrcTraits , class ... Args > struct ViewMapping < typename std::enable_if<( @@ -3201,17 +3271,25 @@ private: typedef typename SrcTraits::value_type value_type ; - typedef typename std::conditional< rank == 0 , value_type , - typename std::conditional< rank == 1 , value_type * , - typename std::conditional< rank == 2 , value_type ** , - typename std::conditional< rank == 3 , value_type *** , - typename std::conditional< rank == 4 , value_type **** , - typename std::conditional< rank == 5 , value_type ***** , - typename std::conditional< rank == 6 , value_type ****** , - typename std::conditional< rank == 7 , value_type ******* , - value_type ******** - >::type >::type >::type >::type >::type >::type >::type >::type - data_type ; + using data_type = + typename SubViewDataType< + value_type, + typename Kokkos::Impl::ParseViewExtents< + typename SrcTraits::data_type + >::type, + Args... + >::type; + //typedef typename std::conditional< rank == 0 , value_type , + // typename std::conditional< rank == 1 , value_type * , + // typename std::conditional< rank == 2 , value_type ** , + // typename std::conditional< rank == 3 , value_type *** , + // typename std::conditional< rank == 4 , value_type **** , + // typename std::conditional< rank == 5 , value_type ***** , + // typename std::conditional< rank == 6 , value_type ****** , + // typename std::conditional< rank == 7 , value_type ******* , + // value_type ******** + // >::type >::type >::type >::type >::type >::type >::type >::type + // data_type ; public: diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp index 716b9ceca5..a8645db451 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp @@ -50,6 +50,9 @@ namespace Kokkos { namespace Impl { +// =========================================================================== +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + // View mapping for rank two tiled array template< class L > @@ -208,11 +211,17 @@ struct ViewMapping } }; +#endif // KOKKOS_ENABLE_DEPRECATED_CODE +// =============================================================================== + } /* namespace Impl */ } /* namespace Kokkos */ namespace Kokkos { +// ============================================================================== +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + template< typename T , unsigned N0 , unsigned N1 , class ... P > KOKKOS_INLINE_FUNCTION Kokkos::View< T[N0][N1] , LayoutLeft , P... > @@ -229,6 +238,9 @@ tile_subview( const Kokkos::View,P...> & ( src , SrcLayout() , i_tile0 , i_tile1 ); } +#endif // KOKKOS_ENABLE_DEPRECATED_CODE +// =============================================================================== + } /* namespace Kokkos */ //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index fad4e1d45e..6a480daa8d 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -11,6 +11,7 @@ IF(NOT KOKKOS_HAS_TRILINOS) ENDIF() SET(GTEST_SOURCE_DIR ${${PARENT_PACKAGE_NAME}_SOURCE_DIR}/tpls/gtest) +# TODO get the C++ standard flag from KOKKOS_CXX_STANDARD SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_PTHREAD=0") INCLUDE_DIRECTORIES(${GTEST_SOURCE_DIR}) @@ -21,6 +22,17 @@ TRIBITS_ADD_LIBRARY( TESTONLY ) +IF(NOT KOKKOS_HAS_TRILINOS) +target_compile_options( + kokkos_gtest + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> +) +target_link_libraries( + kokkos_gtest + PUBLIC ${KOKKOS_LD_FLAGS} +) +ENDIF() + # # Define the tests # @@ -29,69 +41,212 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) IF(Kokkos_ENABLE_Serial) - TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_Serial - SOURCES - UnitTestMainInit.cpp - serial/TestSerial_AtomicOperations_int.cpp - serial/TestSerial_AtomicOperations_unsignedint.cpp - serial/TestSerial_AtomicOperations_longint.cpp - serial/TestSerial_AtomicOperations_unsignedlongint.cpp - serial/TestSerial_AtomicOperations_longlongint.cpp - serial/TestSerial_AtomicOperations_double.cpp - serial/TestSerial_AtomicOperations_float.cpp - serial/TestSerial_AtomicViews.cpp - serial/TestSerial_Atomics.cpp - serial/TestSerial_Complex.cpp - serial/TestSerial_Init.cpp - serial/TestSerial_MDRange_a.cpp - serial/TestSerial_MDRange_b.cpp - serial/TestSerial_MDRange_c.cpp - serial/TestSerial_MDRange_d.cpp - serial/TestSerial_MDRange_e.cpp - serial/TestSerial_Other.cpp - serial/TestSerial_RangePolicy.cpp - serial/TestSerial_Reductions.cpp - serial/TestSerial_Reducers_a.cpp - serial/TestSerial_Reducers_b.cpp - serial/TestSerial_Reducers_c.cpp - serial/TestSerial_Reducers_d.cpp - serial/TestSerial_Scan.cpp - serial/TestSerial_SharedAlloc.cpp - serial/TestSerial_SubView_a.cpp - serial/TestSerial_SubView_b.cpp - serial/TestSerial_SubView_c01.cpp - serial/TestSerial_SubView_c02.cpp - serial/TestSerial_SubView_c03.cpp - serial/TestSerial_SubView_c04.cpp - serial/TestSerial_SubView_c05.cpp - serial/TestSerial_SubView_c06.cpp - serial/TestSerial_SubView_c07.cpp - serial/TestSerial_SubView_c08.cpp - serial/TestSerial_SubView_c09.cpp - serial/TestSerial_SubView_c10.cpp - serial/TestSerial_SubView_c11.cpp - serial/TestSerial_SubView_c12.cpp - serial/TestSerial_SubView_c13.cpp - serial/TestSerial_Team.cpp - serial/TestSerial_TeamReductionScan.cpp - serial/TestSerial_TeamScratch.cpp - serial/TestSerial_ViewAPI_a.cpp - serial/TestSerial_ViewAPI_b.cpp - serial/TestSerial_ViewAPI_c.cpp - serial/TestSerial_ViewAPI_d.cpp - serial/TestSerial_ViewAPI_e.cpp - serial/TestSerial_ViewMapping_a.cpp - serial/TestSerial_ViewMapping_b.cpp - serial/TestSerial_ViewMapping_subview.cpp - serial/TestSerial_ViewOfClass.cpp - serial/TestSerial_Crs.cpp - serial/TestSerial_WorkGraph.cpp - COMM serial mpi - NUM_MPI_PROCS 1 - FAIL_REGULAR_EXPRESSION " FAILED " - TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} - ) + IF(KOKKOS_SEPARATE_TESTS) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Atomics + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_AtomicOperations_int.cpp + serial/TestSerial_AtomicOperations_unsignedint.cpp + serial/TestSerial_AtomicOperations_longint.cpp + serial/TestSerial_AtomicOperations_unsignedlongint.cpp + serial/TestSerial_AtomicOperations_longlongint.cpp + serial/TestSerial_AtomicOperations_double.cpp + serial/TestSerial_AtomicOperations_float.cpp + serial/TestSerial_AtomicOperations_complexdouble.cpp + serial/TestSerial_AtomicOperations_complexfloat.cpp + serial/TestSerial_AtomicViews.cpp + serial/TestSerial_Atomics.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_SubView + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_SubView_a.cpp + serial/TestSerial_SubView_b.cpp + serial/TestSerial_SubView_c01.cpp + serial/TestSerial_SubView_c02.cpp + serial/TestSerial_SubView_c03.cpp + serial/TestSerial_SubView_c04.cpp + serial/TestSerial_SubView_c05.cpp + serial/TestSerial_SubView_c06.cpp + serial/TestSerial_SubView_c07.cpp + serial/TestSerial_SubView_c08.cpp + serial/TestSerial_SubView_c09.cpp + serial/TestSerial_SubView_c10.cpp + serial/TestSerial_SubView_c11.cpp + serial/TestSerial_SubView_c12.cpp + serial/TestSerial_SubView_c13.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_ViewAPI + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_ViewAPI_a.cpp + serial/TestSerial_ViewAPI_b.cpp + serial/TestSerial_ViewAPI_c.cpp + serial/TestSerial_ViewAPI_d.cpp + serial/TestSerial_ViewAPI_e.cpp + serial/TestSerial_ViewOfClass.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_ViewMapping + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_ViewMapping_a.cpp + serial/TestSerial_ViewMapping_b.cpp + serial/TestSerial_ViewMapping_subview.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Reducers + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_Reductions.cpp + serial/TestSerial_Reducers_a.cpp + serial/TestSerial_Reducers_b.cpp + serial/TestSerial_Reducers_c.cpp + serial/TestSerial_Reducers_d.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_MDRange + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_MDRange_a.cpp + serial/TestSerial_MDRange_b.cpp + serial/TestSerial_MDRange_c.cpp + serial/TestSerial_MDRange_d.cpp + serial/TestSerial_MDRange_e.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Team + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_Team.cpp + serial/TestSerial_TeamReductionScan.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Tasking + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_Task.cpp + serial/TestSerial_WorkGraph.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial_Misc + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_Complex.cpp + serial/TestSerial_Init.cpp + serial/TestSerial_Other.cpp + serial/TestSerial_RangePolicy.cpp + serial/TestSerial_Scan.cpp + serial/TestSerial_SharedAlloc.cpp + serial/TestSerial_Crs.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + ELSE() + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial + SOURCES + UnitTestMainInit.cpp + serial/TestSerial_AtomicOperations_int.cpp + serial/TestSerial_AtomicOperations_unsignedint.cpp + serial/TestSerial_AtomicOperations_longint.cpp + serial/TestSerial_AtomicOperations_unsignedlongint.cpp + serial/TestSerial_AtomicOperations_longlongint.cpp + serial/TestSerial_AtomicOperations_double.cpp + serial/TestSerial_AtomicOperations_float.cpp + serial/TestSerial_AtomicOperations_complexdouble.cpp + serial/TestSerial_AtomicOperations_complexfloat.cpp + serial/TestSerial_AtomicViews.cpp + serial/TestSerial_Atomics.cpp + serial/TestSerial_Complex.cpp + serial/TestSerial_Init.cpp + serial/TestSerial_MDRange_a.cpp + serial/TestSerial_MDRange_b.cpp + serial/TestSerial_MDRange_c.cpp + serial/TestSerial_MDRange_d.cpp + serial/TestSerial_MDRange_e.cpp + serial/TestSerial_Other.cpp + serial/TestSerial_RangePolicy.cpp + serial/TestSerial_Reductions.cpp + serial/TestSerial_Reducers_a.cpp + serial/TestSerial_Reducers_b.cpp + serial/TestSerial_Reducers_c.cpp + serial/TestSerial_Reducers_d.cpp + serial/TestSerial_Scan.cpp + serial/TestSerial_SharedAlloc.cpp + serial/TestSerial_SubView_a.cpp + serial/TestSerial_SubView_b.cpp + serial/TestSerial_SubView_c01.cpp + serial/TestSerial_SubView_c02.cpp + serial/TestSerial_SubView_c03.cpp + serial/TestSerial_SubView_c04.cpp + serial/TestSerial_SubView_c05.cpp + serial/TestSerial_SubView_c06.cpp + serial/TestSerial_SubView_c07.cpp + serial/TestSerial_SubView_c08.cpp + serial/TestSerial_SubView_c09.cpp + serial/TestSerial_SubView_c10.cpp + serial/TestSerial_SubView_c11.cpp + serial/TestSerial_SubView_c12.cpp + serial/TestSerial_SubView_c13.cpp + serial/TestSerial_Task.cpp + serial/TestSerial_Team.cpp + serial/TestSerial_TeamReductionScan.cpp + serial/TestSerial_TeamScratch.cpp + serial/TestSerial_ViewAPI_a.cpp + serial/TestSerial_ViewAPI_b.cpp + serial/TestSerial_ViewAPI_c.cpp + serial/TestSerial_ViewAPI_d.cpp + serial/TestSerial_ViewAPI_e.cpp + serial/TestSerial_ViewMapping_a.cpp + serial/TestSerial_ViewMapping_b.cpp + serial/TestSerial_ViewMapping_subview.cpp + serial/TestSerial_ViewOfClass.cpp + serial/TestSerial_Crs.cpp + serial/TestSerial_WorkGraph.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + ENDIF() ENDIF() IF(Kokkos_ENABLE_Pthread) @@ -106,6 +261,8 @@ IF(Kokkos_ENABLE_Pthread) threads/TestThreads_AtomicOperations_longlongint.cpp threads/TestThreads_AtomicOperations_double.cpp threads/TestThreads_AtomicOperations_float.cpp + threads/TestThreads_AtomicOperations_complexdouble.cpp + threads/TestThreads_AtomicOperations_complexfloat.cpp threads/TestThreads_AtomicViews.cpp threads/TestThreads_Atomics.cpp threads/TestThreads_Complex.cpp @@ -161,75 +318,305 @@ IF(Kokkos_ENABLE_Pthread) ENDIF() IF(Kokkos_ENABLE_OpenMP) + IF(KOKKOS_SEPARATE_TESTS) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Atomics + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_AtomicOperations_int.cpp + openmp/TestOpenMP_AtomicOperations_unsignedint.cpp + openmp/TestOpenMP_AtomicOperations_longint.cpp + openmp/TestOpenMP_AtomicOperations_unsignedlongint.cpp + openmp/TestOpenMP_AtomicOperations_longlongint.cpp + openmp/TestOpenMP_AtomicOperations_double.cpp + openmp/TestOpenMP_AtomicOperations_float.cpp + openmp/TestOpenMP_AtomicOperations_complexdouble.cpp + openmp/TestOpenMP_AtomicOperations_complexfloat.cpp + openmp/TestOpenMP_AtomicViews.cpp + openmp/TestOpenMP_Atomics.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_SubView + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_SubView_a.cpp + openmp/TestOpenMP_SubView_b.cpp + openmp/TestOpenMP_SubView_c01.cpp + openmp/TestOpenMP_SubView_c02.cpp + openmp/TestOpenMP_SubView_c03.cpp + openmp/TestOpenMP_SubView_c04.cpp + openmp/TestOpenMP_SubView_c05.cpp + openmp/TestOpenMP_SubView_c06.cpp + openmp/TestOpenMP_SubView_c07.cpp + openmp/TestOpenMP_SubView_c08.cpp + openmp/TestOpenMP_SubView_c09.cpp + openmp/TestOpenMP_SubView_c10.cpp + openmp/TestOpenMP_SubView_c11.cpp + openmp/TestOpenMP_SubView_c12.cpp + openmp/TestOpenMP_SubView_c13.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_ViewAPI + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_ViewAPI_a.cpp + openmp/TestOpenMP_ViewAPI_b.cpp + openmp/TestOpenMP_ViewAPI_c.cpp + openmp/TestOpenMP_ViewAPI_d.cpp + openmp/TestOpenMP_ViewAPI_e.cpp + openmp/TestOpenMP_ViewOfClass.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_ViewMapping + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_ViewMapping_a.cpp + openmp/TestOpenMP_ViewMapping_b.cpp + openmp/TestOpenMP_ViewMapping_subview.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Reducers + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_Reductions.cpp + openmp/TestOpenMP_Reducers_a.cpp + openmp/TestOpenMP_Reducers_b.cpp + openmp/TestOpenMP_Reducers_c.cpp + openmp/TestOpenMP_Reducers_d.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_MDRange + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_MDRange_a.cpp + openmp/TestOpenMP_MDRange_b.cpp + openmp/TestOpenMP_MDRange_c.cpp + openmp/TestOpenMP_MDRange_d.cpp + openmp/TestOpenMP_MDRange_e.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Team + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_Team.cpp + openmp/TestOpenMP_TeamReductionScan.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Tasking + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_Task.cpp + openmp/TestOpenMP_WorkGraph.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP_Misc + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_Complex.cpp + openmp/TestOpenMP_Init.cpp + openmp/TestOpenMP_Other.cpp + openmp/TestOpenMP_RangePolicy.cpp + openmp/TestOpenMP_Scan.cpp + openmp/TestOpenMP_SharedAlloc.cpp + openmp/TestOpenMP_Crs.cpp + openmp/TestOpenMP_UniqueToken.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMPInterOp + SOURCES + UnitTestMain.cpp + openmp/TestOpenMP_InterOp.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + ELSE() + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP + SOURCES + UnitTestMainInit.cpp + openmp/TestOpenMP_AtomicOperations_int.cpp + openmp/TestOpenMP_AtomicOperations_unsignedint.cpp + openmp/TestOpenMP_AtomicOperations_longint.cpp + openmp/TestOpenMP_AtomicOperations_unsignedlongint.cpp + openmp/TestOpenMP_AtomicOperations_longlongint.cpp + openmp/TestOpenMP_AtomicOperations_double.cpp + openmp/TestOpenMP_AtomicOperations_float.cpp + openmp/TestOpenMP_AtomicOperations_complexdouble.cpp + openmp/TestOpenMP_AtomicOperations_complexfloat.cpp + openmp/TestOpenMP_AtomicViews.cpp + openmp/TestOpenMP_Atomics.cpp + openmp/TestOpenMP_Complex.cpp + openmp/TestOpenMP_Init.cpp + openmp/TestOpenMP_MDRange_a.cpp + openmp/TestOpenMP_MDRange_b.cpp + openmp/TestOpenMP_MDRange_c.cpp + openmp/TestOpenMP_MDRange_d.cpp + openmp/TestOpenMP_MDRange_e.cpp + openmp/TestOpenMP_Other.cpp + openmp/TestOpenMP_RangePolicy.cpp + openmp/TestOpenMP_Reductions.cpp + openmp/TestOpenMP_Reducers_a.cpp + openmp/TestOpenMP_Reducers_b.cpp + openmp/TestOpenMP_Reducers_c.cpp + openmp/TestOpenMP_Reducers_d.cpp + openmp/TestOpenMP_Scan.cpp + openmp/TestOpenMP_SharedAlloc.cpp + openmp/TestOpenMP_SubView_a.cpp + openmp/TestOpenMP_SubView_b.cpp + openmp/TestOpenMP_SubView_c01.cpp + openmp/TestOpenMP_SubView_c02.cpp + openmp/TestOpenMP_SubView_c03.cpp + openmp/TestOpenMP_SubView_c04.cpp + openmp/TestOpenMP_SubView_c05.cpp + openmp/TestOpenMP_SubView_c06.cpp + openmp/TestOpenMP_SubView_c07.cpp + openmp/TestOpenMP_SubView_c08.cpp + openmp/TestOpenMP_SubView_c09.cpp + openmp/TestOpenMP_SubView_c10.cpp + openmp/TestOpenMP_SubView_c11.cpp + openmp/TestOpenMP_SubView_c12.cpp + openmp/TestOpenMP_SubView_c13.cpp + openmp/TestOpenMP_Task.cpp + openmp/TestOpenMP_Team.cpp + openmp/TestOpenMP_TeamReductionScan.cpp + openmp/TestOpenMP_ViewAPI_a.cpp + openmp/TestOpenMP_ViewAPI_b.cpp + openmp/TestOpenMP_ViewAPI_c.cpp + openmp/TestOpenMP_ViewAPI_d.cpp + openmp/TestOpenMP_ViewAPI_e.cpp + openmp/TestOpenMP_ViewMapping_a.cpp + openmp/TestOpenMP_ViewMapping_b.cpp + openmp/TestOpenMP_ViewMapping_subview.cpp + openmp/TestOpenMP_ViewOfClass.cpp + openmp/TestOpenMP_Crs.cpp + openmp/TestOpenMP_WorkGraph.cpp + openmp/TestOpenMP_UniqueToken.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMPInterOp + SOURCES + UnitTestMain.cpp + openmp/TestOpenMP_InterOp.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + ENDIF() +ENDIF() + +IF(Kokkos_ENABLE_HPX) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_OpenMP + UnitTest_HPX SOURCES UnitTestMainInit.cpp - openmp/TestOpenMP_AtomicOperations_int.cpp - openmp/TestOpenMP_AtomicOperations_unsignedint.cpp - openmp/TestOpenMP_AtomicOperations_longint.cpp - openmp/TestOpenMP_AtomicOperations_unsignedlongint.cpp - openmp/TestOpenMP_AtomicOperations_longlongint.cpp - openmp/TestOpenMP_AtomicOperations_double.cpp - openmp/TestOpenMP_AtomicOperations_float.cpp - openmp/TestOpenMP_AtomicViews.cpp - openmp/TestOpenMP_Atomics.cpp - openmp/TestOpenMP_Complex.cpp - openmp/TestOpenMP_Init.cpp - openmp/TestOpenMP_MDRange_a.cpp - openmp/TestOpenMP_MDRange_b.cpp - openmp/TestOpenMP_MDRange_c.cpp - openmp/TestOpenMP_MDRange_d.cpp - openmp/TestOpenMP_MDRange_e.cpp - openmp/TestOpenMP_Other.cpp - openmp/TestOpenMP_RangePolicy.cpp - openmp/TestOpenMP_Reductions.cpp - openmp/TestOpenMP_Reducers_a.cpp - openmp/TestOpenMP_Reducers_b.cpp - openmp/TestOpenMP_Reducers_c.cpp - openmp/TestOpenMP_Reducers_d.cpp - openmp/TestOpenMP_Scan.cpp - openmp/TestOpenMP_SharedAlloc.cpp - openmp/TestOpenMP_SubView_a.cpp - openmp/TestOpenMP_SubView_b.cpp - openmp/TestOpenMP_SubView_c01.cpp - openmp/TestOpenMP_SubView_c02.cpp - openmp/TestOpenMP_SubView_c03.cpp - openmp/TestOpenMP_SubView_c04.cpp - openmp/TestOpenMP_SubView_c05.cpp - openmp/TestOpenMP_SubView_c06.cpp - openmp/TestOpenMP_SubView_c07.cpp - openmp/TestOpenMP_SubView_c08.cpp - openmp/TestOpenMP_SubView_c09.cpp - openmp/TestOpenMP_SubView_c10.cpp - openmp/TestOpenMP_SubView_c11.cpp - openmp/TestOpenMP_SubView_c12.cpp - openmp/TestOpenMP_SubView_c13.cpp - openmp/TestOpenMP_Task.cpp - openmp/TestOpenMP_Team.cpp - openmp/TestOpenMP_TeamReductionScan.cpp - openmp/TestOpenMP_ViewAPI_a.cpp - openmp/TestOpenMP_ViewAPI_b.cpp - openmp/TestOpenMP_ViewAPI_c.cpp - openmp/TestOpenMP_ViewAPI_d.cpp - openmp/TestOpenMP_ViewAPI_e.cpp - openmp/TestOpenMP_ViewMapping_a.cpp - openmp/TestOpenMP_ViewMapping_b.cpp - openmp/TestOpenMP_ViewMapping_subview.cpp - openmp/TestOpenMP_ViewOfClass.cpp - openmp/TestOpenMP_Crs.cpp - openmp/TestOpenMP_WorkGraph.cpp - openmp/TestOpenMP_UniqueToken.cpp + hpx/TestHPX_AtomicOperations_int.cpp + hpx/TestHPX_AtomicOperations_unsignedint.cpp + hpx/TestHPX_AtomicOperations_longint.cpp + hpx/TestHPX_AtomicOperations_unsignedlongint.cpp + hpx/TestHPX_AtomicOperations_longlongint.cpp + hpx/TestHPX_AtomicOperations_double.cpp + hpx/TestHPX_AtomicOperations_float.cpp + hpx/TestHPX_AtomicViews.cpp + hpx/TestHPX_Atomics.cpp + hpx/TestHPX_Complex.cpp + hpx/TestHPX_Init.cpp + hpx/TestHPX_MDRange_a.cpp + hpx/TestHPX_MDRange_b.cpp + hpx/TestHPX_MDRange_c.cpp + hpx/TestHPX_MDRange_d.cpp + hpx/TestHPX_MDRange_e.cpp + hpx/TestHPX_Other.cpp + hpx/TestHPX_RangePolicy.cpp + hpx/TestHPX_Reductions.cpp + hpx/TestHPX_Reducers_a.cpp + hpx/TestHPX_Reducers_b.cpp + hpx/TestHPX_Reducers_c.cpp + hpx/TestHPX_Reducers_d.cpp + hpx/TestHPX_Scan.cpp + hpx/TestHPX_SharedAlloc.cpp + hpx/TestHPX_SubView_a.cpp + hpx/TestHPX_SubView_b.cpp + hpx/TestHPX_SubView_c01.cpp + hpx/TestHPX_SubView_c02.cpp + hpx/TestHPX_SubView_c03.cpp + hpx/TestHPX_SubView_c04.cpp + hpx/TestHPX_SubView_c05.cpp + hpx/TestHPX_SubView_c06.cpp + hpx/TestHPX_SubView_c07.cpp + hpx/TestHPX_SubView_c08.cpp + hpx/TestHPX_SubView_c09.cpp + hpx/TestHPX_SubView_c10.cpp + hpx/TestHPX_SubView_c11.cpp + hpx/TestHPX_SubView_c12.cpp + hpx/TestHPX_SubView_c13.cpp + hpx/TestHPX_Task.cpp + hpx/TestHPX_Team.cpp + hpx/TestHPX_TeamReductionScan.cpp + hpx/TestHPX_ViewAPI_a.cpp + hpx/TestHPX_ViewAPI_b.cpp + hpx/TestHPX_ViewAPI_c.cpp + hpx/TestHPX_ViewAPI_d.cpp + hpx/TestHPX_ViewAPI_e.cpp + hpx/TestHPX_ViewMapping_a.cpp + hpx/TestHPX_ViewMapping_b.cpp + hpx/TestHPX_ViewMapping_subview.cpp + hpx/TestHPX_ViewOfClass.cpp + hpx/TestHPX_Crs.cpp + hpx/TestHPX_WorkGraph.cpp + hpx/TestHPX_UniqueToken.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_OpenMPInterOp + UnitTest_HPXInterOp SOURCES UnitTestMain.cpp - openmp/TestOpenMP_InterOp.cpp + hpx/TestHPX_InterOp.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -310,6 +697,8 @@ IF(Kokkos_ENABLE_Cuda) cuda/TestCuda_AtomicOperations_longlongint.cpp cuda/TestCuda_AtomicOperations_double.cpp cuda/TestCuda_AtomicOperations_float.cpp + cuda/TestCuda_AtomicOperations_complexdouble.cpp + cuda/TestCuda_AtomicOperations_complexfloat.cpp cuda/TestCuda_AtomicViews.cpp cuda/TestCuda_Atomics.cpp cuda/TestCuda_Complex.cpp @@ -366,10 +755,20 @@ IF(Kokkos_ENABLE_Cuda) TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_CudaInterOp + UnitTest_CudaInterOpInit SOURCES UnitTestMain.cpp - cuda/TestCuda_InterOp.cpp + cuda/TestCuda_InterOp_Init.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_CudaInterOpStreams + SOURCES + UnitTestMain.cpp + cuda/TestCuda_InterOp_Streams.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -456,3 +855,40 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ${TEST_LINK_TARGETS} ) + +# +# Compile-only tests +# +FUNCTION(KOKKOS_ADD_COMPILE_TEST TEST_NAME) + + SET(options LINK_KOKKOS) + SET(oneValueArgs) + SET(multiValueArgs) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + IF(PARSE_LINK_KOKKOS) + SET(libs ${TEST_LINK_TARGETS}) + ELSE() + SET(libs) + ENDIF() + + TRIBITS_ADD_EXECUTABLE( + ${TEST_NAME} + TESTONLY + COMM serial + TESTONLYLIBS ${libs} + ${PARSE_UNPARSED_ARGUMENTS} + ) + + target_compile_options( + ${PACKAGE_NAME}_${TEST_NAME} + PUBLIC $<$:${KOKKOS_CXX_FLAGS}> + ) + target_link_libraries( + ${PACKAGE_NAME}_${TEST_NAME} + PUBLIC ${KOKKOS_LD_FLAGS} + ) + +ENDFUNCTION() + diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index 72832271c8..5a69213108 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -9,6 +9,7 @@ vpath %.cpp ${KOKKOS_PATH}/core/unit_test/threads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmptarget vpath %.cpp ${KOKKOS_PATH}/core/unit_test/qthreads +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/hpx vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda vpath %.cpp ${KOKKOS_PATH}/core/unit_test/rocm @@ -38,253 +39,310 @@ TEST_TARGETS = TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - OBJ_CUDA = UnitTestMainInit.o gtest-all.o - OBJ_CUDA += TestCuda_Init.o - OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o - OBJ_CUDA += TestCuda_RangePolicy.o - OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o - OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewLayoutStrideAssignment.o - OBJ_CUDA += TestCudaUVM_ViewCopy.o TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o - OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o - OBJ_CUDA += TestCudaHostPinned_ViewCopy.o TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o - OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o - OBJ_CUDA += TestCuda_View_64bit.o - OBJ_CUDA += TestCuda_ViewOfClass.o - OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o - OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o - OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o - OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o - OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o - OBJ_CUDA += TestCuda_SubView_c13.o - OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o - OBJ_CUDA += TestCuda_Reductions_DeviceView.o - OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o - OBJ_CUDA += TestCuda_Complex.o - OBJ_CUDA += TestCuda_AtomicOperations_int.o TestCuda_AtomicOperations_unsignedint.o TestCuda_AtomicOperations_longint.o - OBJ_CUDA += TestCuda_AtomicOperations_unsignedlongint.o TestCuda_AtomicOperations_longlongint.o TestCuda_AtomicOperations_double.o TestCuda_AtomicOperations_float.o - OBJ_CUDA += TestCuda_AtomicViews.o TestCuda_Atomics.o - OBJ_CUDA += TestCuda_Team.o TestCuda_TeamScratch.o - OBJ_CUDA += TestCuda_TeamReductionScan.o TestCuda_TeamTeamSize.o - OBJ_CUDA += TestCuda_Other.o - OBJ_CUDA += TestCuda_MDRange_a.o TestCuda_MDRange_b.o TestCuda_MDRange_c.o TestCuda_MDRange_d.o TestCuda_MDRange_e.o - OBJ_CUDA += TestCuda_Crs.o - OBJ_CUDA += TestCuda_Task.o TestCuda_WorkGraph.o - OBJ_CUDA += TestCuda_Spaces.o - OBJ_CUDA += TestCuda_UniqueToken.o - - TARGETS += KokkosCore_UnitTest_Cuda - TARGETS += KokkosCore_UnitTest_CudaInterOp - TEST_TARGETS += test-cuda + OBJ_CUDA = UnitTestMainInit.o gtest-all.o + OBJ_CUDA += TestCuda_Init.o + OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o + OBJ_CUDA += TestCuda_RangePolicy.o + OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o + OBJ_CUDA += TestCuda_DeepCopyAlignment.o + OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewLayoutStrideAssignment.o + OBJ_CUDA += TestCudaUVM_ViewCopy.o TestCudaUVM_ViewAPI_a.o TestCudaUVM_ViewAPI_b.o TestCudaUVM_ViewAPI_c.o TestCudaUVM_ViewAPI_d.o TestCudaUVM_ViewAPI_e.o + OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o + OBJ_CUDA += TestCudaHostPinned_ViewCopy.o TestCudaHostPinned_ViewAPI_a.o TestCudaHostPinned_ViewAPI_b.o TestCudaHostPinned_ViewAPI_c.o TestCudaHostPinned_ViewAPI_d.o TestCudaHostPinned_ViewAPI_e.o + OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o + OBJ_CUDA += TestCuda_View_64bit.o + OBJ_CUDA += TestCuda_ViewOfClass.o + OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o + OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o + OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o + OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o + OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o + OBJ_CUDA += TestCuda_SubView_c13.o + OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o + OBJ_CUDA += TestCuda_Reductions_DeviceView.o + OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o + OBJ_CUDA += TestCuda_Complex.o + OBJ_CUDA += TestCuda_AtomicOperations_int.o TestCuda_AtomicOperations_unsignedint.o TestCuda_AtomicOperations_longint.o + OBJ_CUDA += TestCuda_AtomicOperations_unsignedlongint.o TestCuda_AtomicOperations_longlongint.o TestCuda_AtomicOperations_double.o TestCuda_AtomicOperations_float.o + OBJ_CUDA += TestCuda_AtomicOperations_complexfloat.o TestCuda_AtomicOperations_complexdouble.o + OBJ_CUDA += TestCuda_AtomicViews.o TestCuda_Atomics.o + OBJ_CUDA += TestCuda_Team.o TestCuda_TeamScratch.o + OBJ_CUDA += TestCuda_TeamReductionScan.o TestCuda_TeamTeamSize.o + OBJ_CUDA += TestCuda_TeamVectorRange.o + OBJ_CUDA += TestCuda_Other.o + OBJ_CUDA += TestCuda_MDRange_a.o TestCuda_MDRange_b.o TestCuda_MDRange_c.o TestCuda_MDRange_d.o TestCuda_MDRange_e.o + OBJ_CUDA += TestCuda_Crs.o + OBJ_CUDA += TestCuda_Task.o TestCuda_WorkGraph.o + OBJ_CUDA += TestCuda_Spaces.o + OBJ_CUDA += TestCuda_UniqueToken.o + OBJ_CUDA += TestCuda_LocalDeepCopy.o + + TARGETS += KokkosCore_UnitTest_Cuda + TARGETS += KokkosCore_UnitTest_CudaInterOpInit + TARGETS += KokkosCore_UnitTest_CudaInterOpStreams + TEST_TARGETS += test-cuda endif ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) - OBJ_ROCM = UnitTestMainInit.o gtest-all.o - OBJ_ROCM += TestROCm_Init.o - OBJ_ROCM += TestROCm_Complex.o - OBJ_ROCM += TestROCm_RangePolicy.o - OBJ_ROCM += TestROCm_AtomicOperations_int.o TestROCm_AtomicOperations_unsignedint.o TestROCm_AtomicOperations_longint.o - OBJ_ROCM += TestROCm_AtomicOperations_unsignedlongint.o TestROCm_AtomicOperations_longlongint.o TestROCm_AtomicOperations_double.o TestROCm_AtomicOperations_float.o - OBJ_ROCM += TestROCm_Atomics.o - OBJ_ROCM += TestROCm_AtomicViews.o - OBJ_ROCM += TestROCm_Other.o - OBJ_ROCM += TestROCm_MDRange_a.o TestROCm_MDRange_b.o TestROCm_MDRange_c.o TestROCm_MDRange_d.o TestROCm_MDRange_e.o - OBJ_ROCM += TestROCm_MDRangeReduce_a.o TestROCm_MDRangeReduce_b.o TestROCm_MDRangeReduce_c.o TestROCm_MDRangeReduce_d.o TestROCm_MDRangeReduce_e.o - OBJ_ROCM += TestROCm_Reductions.o - OBJ_ROCM += TestROCm_Reducers_a.o TestROCm_Reducers_b.o TestROCm_Reducers_c.o TestROCm_Reducers_d.o - OBJ_ROCM += TestROCm_Scan.o - OBJ_ROCM += TestROCm_SharedAlloc.o - OBJ_ROCM += TestROCm_SubView_a.o - OBJ_ROCM += TestROCm_SubView_b.o - OBJ_ROCM += TestROCm_SubView_c01.o - OBJ_ROCM += TestROCm_SubView_c02.o - OBJ_ROCM += TestROCm_SubView_c03.o - OBJ_ROCM += TestROCm_SubView_c04.o - OBJ_ROCM += TestROCm_SubView_c05.o - OBJ_ROCM += TestROCm_SubView_c06.o - OBJ_ROCM += TestROCm_SubView_c07.o - OBJ_ROCM += TestROCm_SubView_c08.o - OBJ_ROCM += TestROCm_SubView_c09.o - OBJ_ROCM += TestROCm_SubView_c10.o - OBJ_ROCM += TestROCm_SubView_c11.o - OBJ_ROCM += TestROCm_SubView_c12.o - OBJ_ROCM += TestROCm_SubView_c13.o - OBJ_ROCM += TestROCm_Team.o - OBJ_ROCM += TestROCm_TeamReductionScan.o - OBJ_ROCM += TestROCm_TeamScratch.o TestROCm_TeamTeamSize.o - OBJ_ROCM += TestROCm_ViewAPI_a.o TestROCm_ViewAPI_b.o TestROCm_ViewAPI_c.o TestROCm_ViewAPI_d.o TestROCm_ViewAPI_e.o - OBJ_ROCM += TestROCm_ViewMapping_a.o - OBJ_ROCM += TestROCm_ViewMapping_b.o - OBJ_ROCM += TestROCm_ViewMapping_subview.o - OBJ_ROCM += TestROCmHostPinned_ViewCopy.o TestROCmHostPinned_ViewAPI_a.o TestROCmHostPinned_ViewAPI_b.o TestROCmHostPinned_ViewAPI_c.o TestROCmHostPinned_ViewAPI_d.o TestROCmHostPinned_ViewAPI_e.o - OBJ_ROCM += TestROCmHostPinned_View_64bit.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o - OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o - OBJ_ROCM += TestROCm_ViewOfClass.o - OBJ_ROCM += TestROCm_Spaces.o - OBJ_ROCM += TestROCm_Crs.o - - TARGETS += KokkosCore_UnitTest_ROCm - TEST_TARGETS += test-rocm + OBJ_ROCM = UnitTestMainInit.o gtest-all.o + OBJ_ROCM += TestROCm_Init.o + OBJ_ROCM += TestROCm_Complex.o + OBJ_ROCM += TestROCm_RangePolicy.o + OBJ_ROCM += TestROCm_AtomicOperations_int.o TestROCm_AtomicOperations_unsignedint.o TestROCm_AtomicOperations_longint.o + OBJ_ROCM += TestROCm_AtomicOperations_unsignedlongint.o TestROCm_AtomicOperations_longlongint.o TestROCm_AtomicOperations_double.o TestROCm_AtomicOperations_float.o + OBJ_ROCM += TestROCm_Atomics.o + OBJ_ROCM += TestROCm_AtomicViews.o + OBJ_ROCM += TestROCm_Other.o + OBJ_ROCM += TestROCm_MDRange_a.o TestROCm_MDRange_b.o TestROCm_MDRange_c.o TestROCm_MDRange_d.o TestROCm_MDRange_e.o + OBJ_ROCM += TestROCm_MDRangeReduce_a.o TestROCm_MDRangeReduce_b.o TestROCm_MDRangeReduce_c.o TestROCm_MDRangeReduce_d.o TestROCm_MDRangeReduce_e.o + OBJ_ROCM += TestROCm_Reductions.o + OBJ_ROCM += TestROCm_Reducers_a.o TestROCm_Reducers_b.o TestROCm_Reducers_c.o TestROCm_Reducers_d.o + OBJ_ROCM += TestROCm_Scan.o + OBJ_ROCM += TestROCm_SharedAlloc.o + OBJ_ROCM += TestROCm_SubView_a.o + OBJ_ROCM += TestROCm_SubView_b.o + OBJ_ROCM += TestROCm_SubView_c01.o + OBJ_ROCM += TestROCm_SubView_c02.o + OBJ_ROCM += TestROCm_SubView_c03.o + OBJ_ROCM += TestROCm_SubView_c04.o + OBJ_ROCM += TestROCm_SubView_c05.o + OBJ_ROCM += TestROCm_SubView_c06.o + OBJ_ROCM += TestROCm_SubView_c07.o + OBJ_ROCM += TestROCm_SubView_c08.o + OBJ_ROCM += TestROCm_SubView_c09.o + OBJ_ROCM += TestROCm_SubView_c10.o + OBJ_ROCM += TestROCm_SubView_c11.o + OBJ_ROCM += TestROCm_SubView_c12.o + OBJ_ROCM += TestROCm_SubView_c13.o + OBJ_ROCM += TestROCm_Team.o + OBJ_ROCM += TestROCm_TeamReductionScan.o + OBJ_ROCM += TestROCm_TeamScratch.o TestROCm_TeamTeamSize.o + OBJ_ROCM += TestROCm_ViewAPI_a.o TestROCm_ViewAPI_b.o TestROCm_ViewAPI_c.o TestROCm_ViewAPI_d.o TestROCm_ViewAPI_e.o + OBJ_ROCM += TestROCm_DeepCopyAlignment.o + OBJ_ROCM += TestROCm_ViewMapping_a.o + OBJ_ROCM += TestROCm_ViewMapping_b.o + OBJ_ROCM += TestROCm_ViewMapping_subview.o + OBJ_ROCM += TestROCmHostPinned_ViewCopy.o TestROCmHostPinned_ViewAPI_a.o TestROCmHostPinned_ViewAPI_b.o TestROCmHostPinned_ViewAPI_c.o TestROCmHostPinned_ViewAPI_d.o TestROCmHostPinned_ViewAPI_e.o + OBJ_ROCM += TestROCmHostPinned_View_64bit.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_a.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_b.o + OBJ_ROCM += TestROCmHostPinned_ViewMapping_subview.o + OBJ_ROCM += TestROCm_ViewOfClass.o + OBJ_ROCM += TestROCm_Spaces.o + OBJ_ROCM += TestROCm_Crs.o + + TARGETS += KokkosCore_UnitTest_ROCm + TEST_TARGETS += test-rocm endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - - OBJ_THREADS = UnitTestMainInit.o gtest-all.o - OBJ_THREADS += TestThreads_Init.o - OBJ_THREADS += TestThreads_SharedAlloc.o - OBJ_THREADS += TestThreads_RangePolicy.o + OBJ_THREADS = UnitTestMainInit.o gtest-all.o + OBJ_THREADS += TestThreads_Init.o + OBJ_THREADS += TestThreads_SharedAlloc.o + OBJ_THREADS += TestThreads_RangePolicy.o OBJ_THREADS += TestThreads_View_64bit.o - OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o - OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o TestThreads_ViewLayoutStrideAssignment.o - OBJ_THREADS += TestThreads_ViewOfClass.o - OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o - OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o - OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o - OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o - OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o - OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o - OBJ_THREADS += TestThreads_Reductions_DeviceView.o - OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o - OBJ_THREADS += TestThreads_Complex.o - OBJ_THREADS += TestThreads_AtomicOperations_int.o TestThreads_AtomicOperations_unsignedint.o TestThreads_AtomicOperations_longint.o - OBJ_THREADS += TestThreads_AtomicOperations_unsignedlongint.o TestThreads_AtomicOperations_longlongint.o TestThreads_AtomicOperations_double.o TestThreads_AtomicOperations_float.o - OBJ_THREADS += TestThreads_AtomicViews.o TestThreads_Atomics.o - OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o TestThreads_TeamTeamSize.o - OBJ_THREADS += TestThreads_TeamReductionScan.o - OBJ_THREADS += TestThreads_Other.o - OBJ_THREADS += TestThreads_MDRange_a.o TestThreads_MDRange_b.o TestThreads_MDRange_c.o TestThreads_MDRange_d.o TestThreads_MDRange_e.o + OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o + OBJ_THREADS += TestThreads_DeepCopyAlignment.o + OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o TestThreads_ViewLayoutStrideAssignment.o + OBJ_THREADS += TestThreads_ViewOfClass.o + OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o + OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o + OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o + OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o + OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o + OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o + OBJ_THREADS += TestThreads_Reductions_DeviceView.o + OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o + OBJ_THREADS += TestThreads_Complex.o + OBJ_THREADS += TestThreads_AtomicOperations_int.o TestThreads_AtomicOperations_unsignedint.o TestThreads_AtomicOperations_longint.o + OBJ_THREADS += TestThreads_AtomicOperations_unsignedlongint.o TestThreads_AtomicOperations_longlongint.o TestThreads_AtomicOperations_double.o TestThreads_AtomicOperations_float.o + OBJ_THREADS += TestThreads_AtomicOperations_complexfloat.o TestThreads_AtomicOperations_complexdouble.o + OBJ_THREADS += TestThreads_AtomicViews.o TestThreads_Atomics.o + OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o TestThreads_TeamTeamSize.o + OBJ_THREADS += TestThreads_TeamReductionScan.o + OBJ_THREADS += TestThreads_TeamVectorRange.o + OBJ_THREADS += TestThreads_Other.o + OBJ_THREADS += TestThreads_MDRange_a.o TestThreads_MDRange_b.o TestThreads_MDRange_c.o TestThreads_MDRange_d.o TestThreads_MDRange_e.o + OBJ_THREADS += TestThreads_LocalDeepCopy.o - TARGETS += KokkosCore_UnitTest_Threads + TARGETS += KokkosCore_UnitTest_Threads - TEST_TARGETS += test-threads + TEST_TARGETS += test-threads endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = UnitTestMainInit.o gtest-all.o - OBJ_OPENMP += TestOpenMP_Init.o - OBJ_OPENMP += TestOpenMP_SharedAlloc.o - OBJ_OPENMP += TestOpenMP_RangePolicy.o + OBJ_OPENMP = UnitTestMainInit.o gtest-all.o + OBJ_OPENMP += TestOpenMP_Init.o + OBJ_OPENMP += TestOpenMP_SharedAlloc.o + OBJ_OPENMP += TestOpenMP_RangePolicy.o OBJ_OPENMP += TestOpenMP_View_64bit.o - OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o - OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o TestOpenMP_ViewLayoutStrideAssignment.o - OBJ_OPENMP += TestOpenMP_ViewOfClass.o - OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o - OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o - OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o - OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o - OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o - OBJ_OPENMP += TestOpenMP_SubView_c13.o - OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o - OBJ_OPENMP += TestOpenMP_Reductions_DeviceView.o - OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o - OBJ_OPENMP += TestOpenMP_Complex.o - OBJ_OPENMP += TestOpenMP_AtomicOperations_int.o TestOpenMP_AtomicOperations_unsignedint.o TestOpenMP_AtomicOperations_longint.o - OBJ_OPENMP += TestOpenMP_AtomicOperations_unsignedlongint.o TestOpenMP_AtomicOperations_longlongint.o TestOpenMP_AtomicOperations_double.o TestOpenMP_AtomicOperations_float.o - OBJ_OPENMP += TestOpenMP_AtomicViews.o TestOpenMP_Atomics.o - OBJ_OPENMP += TestOpenMP_Team.o TestOpenMP_TeamScratch.o - OBJ_OPENMP += TestOpenMP_TeamReductionScan.o TestOpenMP_TeamTeamSize.o - OBJ_OPENMP += TestOpenMP_Other.o - OBJ_OPENMP += TestOpenMP_MDRange_a.o TestOpenMP_MDRange_b.o TestOpenMP_MDRange_c.o TestOpenMP_MDRange_d.o TestOpenMP_MDRange_e.o - OBJ_OPENMP += TestOpenMP_Crs.o - OBJ_OPENMP += TestOpenMP_Task.o TestOpenMP_WorkGraph.o - OBJ_OPENMP += TestOpenMP_UniqueToken.o - - TARGETS += KokkosCore_UnitTest_OpenMP + OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o + OBJ_OPENMP += TestOpenMP_DeepCopyAlignment.o + OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o TestOpenMP_ViewLayoutStrideAssignment.o + OBJ_OPENMP += TestOpenMP_ViewOfClass.o + OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o + OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o + OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o + OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o + OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o + OBJ_OPENMP += TestOpenMP_SubView_c13.o + OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o + OBJ_OPENMP += TestOpenMP_Reductions_DeviceView.o + OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o + OBJ_OPENMP += TestOpenMP_Complex.o + OBJ_OPENMP += TestOpenMP_AtomicOperations_int.o TestOpenMP_AtomicOperations_unsignedint.o TestOpenMP_AtomicOperations_longint.o + OBJ_OPENMP += TestOpenMP_AtomicOperations_unsignedlongint.o TestOpenMP_AtomicOperations_longlongint.o TestOpenMP_AtomicOperations_double.o TestOpenMP_AtomicOperations_float.o + OBJ_OPENMP += TestOpenMP_AtomicOperations_complexfloat.o TestOpenMP_AtomicOperations_complexdouble.o + OBJ_OPENMP += TestOpenMP_AtomicViews.o TestOpenMP_Atomics.o + OBJ_OPENMP += TestOpenMP_Team.o TestOpenMP_TeamScratch.o + OBJ_OPENMP += TestOpenMP_TeamReductionScan.o TestOpenMP_TeamTeamSize.o + OBJ_OPENMP += TestOpenMP_TeamVectorRange.o + OBJ_OPENMP += TestOpenMP_Other.o + OBJ_OPENMP += TestOpenMP_MDRange_a.o TestOpenMP_MDRange_b.o TestOpenMP_MDRange_c.o TestOpenMP_MDRange_d.o TestOpenMP_MDRange_e.o + OBJ_OPENMP += TestOpenMP_Crs.o + OBJ_OPENMP += TestOpenMP_Task.o TestOpenMP_WorkGraph.o + OBJ_OPENMP += TestOpenMP_UniqueToken.o + OBJ_OPENMP += TestOpenMP_LocalDeepCopy.o + + TARGETS += KokkosCore_UnitTest_OpenMP TARGETS += KokkosCore_UnitTest_OpenMPInterOp - TEST_TARGETS += test-openmp + TEST_TARGETS += test-openmp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - OBJ_OPENMPTARGET = UnitTestMainInit.o gtest-all.o - OBJ_OPENMPTARGET += TestOpenMPTarget_Init.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_SharedAlloc.o - OBJ_OPENMPTARGET += TestOpenMPTarget_RangePolicy.o - OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_a.o TestOpenMPTarget_ViewAPI_b.o TestOpenMPTarget_ViewAPI_c.o TestOpenMPTarget_ViewAPI_d.o TestOpenMPTarget_ViewAPI_e.o #Some commented out code - OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_a.o - OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_b.o + OBJ_OPENMPTARGET = UnitTestMainInit.o gtest-all.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Init.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SharedAlloc.o + OBJ_OPENMPTARGET += TestOpenMPTarget_RangePolicy.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_a.o TestOpenMPTarget_ViewAPI_b.o TestOpenMPTarget_ViewAPI_c.o TestOpenMPTarget_ViewAPI_d.o TestOpenMPTarget_ViewAPI_e.o #Some commented out code + OBJ_OPENMPTARGET += TestOpenMPTarget_DeepCopyAlignment.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_a.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_b.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_subview.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewOfClass.o - OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_a.o TestOpenMPTarget_SubView_b.o - #The following subview tests need something like UVM: - #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c01.o TestOpenMPTarget_SubView_c02.o TestOpenMPTarget_SubView_c03.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c04.o TestOpenMPTarget_SubView_c05.o TestOpenMPTarget_SubView_c06.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c07.o TestOpenMPTarget_SubView_c08.o TestOpenMPTarget_SubView_c09.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c10.o TestOpenMPTarget_SubView_c11.o TestOpenMPTarget_SubView_c12.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_Reductions.o # Need custom reductions - #OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_Scan.o - OBJ_OPENMPTARGET += TestOpenMPTarget_Complex.o - OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_int.o TestOpenMPTarget_AtomicOperations_unsignedint.o TestOpenMPTarget_AtomicOperations_longint.o - OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_unsignedlongint.o TestOpenMPTarget_AtomicOperations_longlongint.o TestOpenMPTarget_AtomicOperations_double.o TestOpenMPTarget_AtomicOperations_float.o - OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicViews.o - OBJ_OPENMPTARGET += TestOpenMPTarget_Atomics.o # Commented Out Arbitrary Type Atomics - #OBJ_OPENMPTARGET += TestOpenMPTarget_Team.o # There is still a static function in this - #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamScratch.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamReductionScan.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_Other.o - #OBJ_OPENMPTARGET += TestOpenMPTarget_MDRange_a.o TestOpenMPTarget_MDRange_b.o TestOpenMPTarget_MDRange_c.o TestOpenMPTarget_MDRange_d.o TestOpenMPTarget_MDRange_d.e - #OBJ_OPENMPTARGET += TestOpenMPTarget_Task.o - - TARGETS += KokkosCore_UnitTest_OpenMPTarget + #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewOfClass.o + OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_a.o TestOpenMPTarget_SubView_b.o + #The following subview tests need something like UVM: + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c01.o TestOpenMPTarget_SubView_c02.o TestOpenMPTarget_SubView_c03.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c04.o TestOpenMPTarget_SubView_c05.o TestOpenMPTarget_SubView_c06.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c07.o TestOpenMPTarget_SubView_c08.o TestOpenMPTarget_SubView_c09.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c10.o TestOpenMPTarget_SubView_c11.o TestOpenMPTarget_SubView_c12.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Reductions.o # Need custom reductions + #OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Scan.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Complex.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_int.o TestOpenMPTarget_AtomicOperations_unsignedint.o TestOpenMPTarget_AtomicOperations_longint.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_unsignedlongint.o TestOpenMPTarget_AtomicOperations_longlongint.o TestOpenMPTarget_AtomicOperations_double.o TestOpenMPTarget_AtomicOperations_float.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_complexfloat.o TestOpenMPTarget_AtomicOperations_complexdouble.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicViews.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Atomics.o # Commented Out Arbitrary Type Atomics + #OBJ_OPENMPTARGET += TestOpenMPTarget_Team.o # There is still a static function in this + #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamScratch.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamReductionScan.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Other.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_MDRange_a.o TestOpenMPTarget_MDRange_b.o TestOpenMPTarget_MDRange_c.o TestOpenMPTarget_MDRange_d.o TestOpenMPTarget_MDRange_d.e + #OBJ_OPENMPTARGET += TestOpenMPTarget_Task.o - TEST_TARGETS += test-openmptarget + TARGETS += KokkosCore_UnitTest_OpenMPTarget + TEST_TARGETS += test-openmptarget endif ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - OBJ_QTHREADS = TestQthreads_Other.o TestQthreads_Reductions.o TestQthreads_Atomics.o TestQthreads_Team.o - OBJ_QTHREADS += TestQthreads_SubView_a.o TestQthreads_SubView_b.o - OBJ_QTHREADS += TestQthreads_SubView_c01.o TestQthreads_SubView_c02.o TestQthreads_SubView_c03.o - OBJ_QTHREADS += TestQthreads_SubView_c04.o TestQthreads_SubView_c05.o TestQthreads_SubView_c06.o - OBJ_QTHREADS += TestQthreads_SubView_c07.o TestQthreads_SubView_c08.o TestQthreads_SubView_c09.o - OBJ_QTHREADS += TestQthreads_SubView_c10.o TestQthreads_SubView_c11.o TestQthreads_SubView_c12.o - OBJ_QTHREADS += TestQthreads_ViewAPI_a.o TestQthreads_ViewAPI_b.o TestQthreads_ViewAPI_c.o TestQthreads_ViewAPI_d.o TestQthreads_ViewAPI_e.o UnitTestMain.o gtest-all.o - TARGETS += KokkosCore_UnitTest_Qthreads + OBJ_QTHREADS = TestQthreads_Other.o TestQthreads_Reductions.o TestQthreads_Atomics.o TestQthreads_Team.o + OBJ_QTHREADS += TestQthreads_SubView_a.o TestQthreads_SubView_b.o + OBJ_QTHREADS += TestQthreads_SubView_c01.o TestQthreads_SubView_c02.o TestQthreads_SubView_c03.o + OBJ_QTHREADS += TestQthreads_SubView_c04.o TestQthreads_SubView_c05.o TestQthreads_SubView_c06.o + OBJ_QTHREADS += TestQthreads_SubView_c07.o TestQthreads_SubView_c08.o TestQthreads_SubView_c09.o + OBJ_QTHREADS += TestQthreads_SubView_c10.o TestQthreads_SubView_c11.o TestQthreads_SubView_c12.o + OBJ_QTHREADS += TestQthreads_ViewAPI_a.o TestQthreads_ViewAPI_b.o TestQthreads_ViewAPI_c.o TestQthreads_ViewAPI_d.o TestQthreads_ViewAPI_e.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Qthreads - OBJ_QTHREADS2 = UnitTestMainInit.o gtest-all.o - OBJ_QTHREADS2 += TestQthreads_Complex.o - TARGETS += KokkosCore_UnitTest_Qthreads2 + OBJ_QTHREADS2 = UnitTestMainInit.o gtest-all.o + OBJ_QTHREADS2 += TestQthreads_Complex.o + TARGETS += KokkosCore_UnitTest_Qthreads2 - TEST_TARGETS += test-qthreads + TEST_TARGETS += test-qthreads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + OBJ_HPX = UnitTestMainInit.o gtest-all.o + OBJ_HPX += TestHPX_Init.o + OBJ_HPX += TestHPX_SharedAlloc.o + OBJ_HPX += TestHPX_RangePolicy.o + OBJ_HPX += TestHPX_View_64bit.o + OBJ_HPX += TestHPX_ViewAPI_a.o TestHPX_ViewAPI_b.o TestHPX_ViewAPI_c.o TestHPX_ViewAPI_d.o TestHPX_ViewAPI_e.o + OBJ_HPX += TestHPX_ViewMapping_a.o TestHPX_ViewMapping_b.o TestHPX_ViewMapping_subview.o + OBJ_HPX += TestHPX_ViewOfClass.o + OBJ_HPX += TestHPX_SubView_a.o TestHPX_SubView_b.o + OBJ_HPX += TestHPX_SubView_c01.o TestHPX_SubView_c02.o TestHPX_SubView_c03.o + OBJ_HPX += TestHPX_SubView_c04.o TestHPX_SubView_c05.o TestHPX_SubView_c06.o + OBJ_HPX += TestHPX_SubView_c07.o TestHPX_SubView_c08.o TestHPX_SubView_c09.o + OBJ_HPX += TestHPX_SubView_c10.o TestHPX_SubView_c11.o TestHPX_SubView_c12.o + OBJ_HPX += TestHPX_SubView_c13.o + OBJ_HPX += TestHPX_Reductions.o + OBJ_HPX += TestHPX_Scan.o + OBJ_HPX += TestHPX_Reducers_a.o TestHPX_Reducers_b.o TestHPX_Reducers_c.o TestHPX_Reducers_d.o + OBJ_HPX += TestHPX_Complex.o + OBJ_HPX += TestHPX_AtomicOperations_int.o TestHPX_AtomicOperations_unsignedint.o TestHPX_AtomicOperations_longint.o + OBJ_HPX += TestHPX_AtomicOperations_unsignedlongint.o TestHPX_AtomicOperations_longlongint.o TestHPX_AtomicOperations_double.o TestHPX_AtomicOperations_float.o + OBJ_HPX += TestHPX_AtomicViews.o TestHPX_Atomics.o + OBJ_HPX += TestHPX_Team.o + OBJ_HPX += TestHPX_TeamVectorRange.o + OBJ_HPX += TestHPX_TeamScratch.o + OBJ_HPX += TestHPX_TeamReductionScan.o + OBJ_HPX += TestHPX_Other.o + OBJ_HPX += TestHPX_MDRange_a.o TestHPX_MDRange_b.o TestHPX_MDRange_c.o TestHPX_MDRange_d.o TestHPX_MDRange_e.o + OBJ_HPX += TestHPX_Crs.o + OBJ_HPX += TestHPX_Task.o + OBJ_HPX += TestHPX_WorkGraph.o + OBJ_HPX += TestHPX_UniqueToken.o + + TARGETS += KokkosCore_UnitTest_HPX + TARGETS += KokkosCore_UnitTest_HPXInterOp + + TEST_TARGETS += test-hpx endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - OBJ_SERIAL = UnitTestMainInit.o gtest-all.o - OBJ_SERIAL += TestSerial_Init.o - OBJ_SERIAL += TestSerial_SharedAlloc.o - OBJ_SERIAL += TestSerial_RangePolicy.o - OBJ_SERIAL += TestSerial_View_64bit.o - OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o - OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o TestSerial_ViewLayoutStrideAssignment.o - OBJ_SERIAL += TestSerial_ViewOfClass.o - OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o - OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o - OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o - OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o - OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o - OBJ_SERIAL += TestSerial_SubView_c13.o - OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o - OBJ_SERIAL += TestSerial_Reductions_DeviceView.o - OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o - OBJ_SERIAL += TestSerial_Complex.o - OBJ_SERIAL += TestSerial_AtomicOperations_int.o TestSerial_AtomicOperations_unsignedint.o TestSerial_AtomicOperations_longint.o - OBJ_SERIAL += TestSerial_AtomicOperations_unsignedlongint.o TestSerial_AtomicOperations_longlongint.o TestSerial_AtomicOperations_double.o TestSerial_AtomicOperations_float.o - OBJ_SERIAL += TestSerial_AtomicViews.o TestSerial_Atomics.o - OBJ_SERIAL += TestSerial_Team.o TestSerial_TeamScratch.o - OBJ_SERIAL += TestSerial_TeamReductionScan.o TestSerial_TeamTeamSize.o - OBJ_SERIAL += TestSerial_Other.o - #HCC_WORKAROUND - ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) + OBJ_SERIAL = UnitTestMainInit.o gtest-all.o + OBJ_SERIAL += TestSerial_Init.o + OBJ_SERIAL += TestSerial_SharedAlloc.o + OBJ_SERIAL += TestSerial_RangePolicy.o + OBJ_SERIAL += TestSerial_View_64bit.o + OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o + OBJ_SERIAL += TestSerial_DeepCopyAlignment.o + OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o TestSerial_ViewLayoutStrideAssignment.o + OBJ_SERIAL += TestSerial_ViewOfClass.o + OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o + OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o + OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o + OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o + OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o + OBJ_SERIAL += TestSerial_SubView_c13.o + OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o + OBJ_SERIAL += TestSerial_Reductions_DeviceView.o + OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o + OBJ_SERIAL += TestSerial_Complex.o + OBJ_SERIAL += TestSerial_AtomicOperations_int.o TestSerial_AtomicOperations_unsignedint.o TestSerial_AtomicOperations_longint.o + OBJ_SERIAL += TestSerial_AtomicOperations_unsignedlongint.o TestSerial_AtomicOperations_longlongint.o TestSerial_AtomicOperations_double.o TestSerial_AtomicOperations_float.o + OBJ_SERIAL += TestSerial_AtomicOperations_complexfloat.o TestSerial_AtomicOperations_complexdouble.o + OBJ_SERIAL += TestSerial_AtomicViews.o TestSerial_Atomics.o + OBJ_SERIAL += TestSerial_Team.o TestSerial_TeamScratch.o + OBJ_SERIAL += TestSerial_TeamVectorRange.o + OBJ_SERIAL += TestSerial_TeamReductionScan.o TestSerial_TeamTeamSize.o + OBJ_SERIAL += TestSerial_Other.o + #HCC_WORKAROUND + ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) OBJ_SERIAL += TestSerial_MDRange_a.o TestSerial_MDRange_b.o TestSerial_MDRange_c.o TestSerial_MDRange_d.o TestSerial_MDRange_e.o - endif - OBJ_SERIAL += TestSerial_Crs.o - OBJ_SERIAL += TestSerial_Task.o TestSerial_WorkGraph.o - - TARGETS += KokkosCore_UnitTest_Serial + endif + OBJ_SERIAL += TestSerial_Crs.o + OBJ_SERIAL += TestSerial_Task.o TestSerial_WorkGraph.o + OBJ_SERIAL += TestSerial_LocalDeepCopy.o - TEST_TARGETS += test-serial + TARGETS += KokkosCore_UnitTest_Serial + + TEST_TARGETS += test-serial endif OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o @@ -298,10 +356,10 @@ TEST_TARGETS += test-host-barrier OBJ_DEFAULT = UnitTestMainInit.o gtest-all.o ifneq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) ifneq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1) - OBJ_DEFAULT += TestDefaultDeviceType.o - OBJ_DEFAULT += TestDefaultDeviceType_a1.o TestDefaultDeviceType_b1.o TestDefaultDeviceType_c1.o - OBJ_DEFAULT += TestDefaultDeviceType_a2.o TestDefaultDeviceType_b2.o TestDefaultDeviceType_c2.o - OBJ_DEFAULT += TestDefaultDeviceType_a3.o TestDefaultDeviceType_b3.o TestDefaultDeviceType_c3.o + OBJ_DEFAULT += TestDefaultDeviceType.o + OBJ_DEFAULT += TestDefaultDeviceType_a1.o TestDefaultDeviceType_b1.o TestDefaultDeviceType_c1.o + OBJ_DEFAULT += TestDefaultDeviceType_a2.o TestDefaultDeviceType_b2.o TestDefaultDeviceType_c2.o + OBJ_DEFAULT += TestDefaultDeviceType_a3.o TestDefaultDeviceType_b3.o TestDefaultDeviceType_c3.o OBJ_DEFAULT += TestDefaultDeviceType_d.o endif endif @@ -325,9 +383,11 @@ TEST_TARGETS += ${INITTESTS_TEST_TARGETS} KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Cuda -KokkosCore_UnitTest_CudaInterOp: UnitTestMain.o gtest-all.o TestCuda_InterOp.o - $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestCuda_InterOp.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_CudaInterOp - +KokkosCore_UnitTest_CudaInterOpInit: UnitTestMain.o gtest-all.o TestCuda_InterOp_Init.o $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestCuda_InterOp_Init.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_CudaInterOpInit +KokkosCore_UnitTest_CudaInterOpStreams: UnitTestMain.o gtest-all.o TestCuda_InterOp_Streams.o $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestCuda_InterOp_Streams.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_CudaInterOpStreams + KokkosCore_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_ROCm @@ -337,7 +397,7 @@ KokkosCore_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) KokkosCore_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_OpenMP -KokkosCore_UnitTest_OpenMPInterOp: UnitTestMain.o gtest-all.o TestOpenMP_InterOp.o +KokkosCore_UnitTest_OpenMPInterOp: UnitTestMain.o gtest-all.o TestOpenMP_InterOp.o $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestOpenMP_InterOp.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_OpenMPInterOp KokkosCore_UnitTest_OpenMPTarget: $(OBJ_OPENMPTARGET) $(KOKKOS_LINK_DEPENDS) @@ -352,6 +412,12 @@ KokkosCore_UnitTest_Qthreads: $(OBJ_QTHREADS) $(KOKKOS_LINK_DEPENDS) KokkosCore_UnitTest_Qthreads2: $(OBJ_QTHREADS2) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_QTHREADS2) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Qthreads2 +KokkosCore_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HPX + +KokkosCore_UnitTest_HPXInterOp: UnitTestMain.o gtest-all.o TestHPX_InterOp.o $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestHPX_InterOp.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HPXInterOp + KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HWLOC @@ -376,7 +442,8 @@ ${INITTESTS_TARGETS}: KokkosCore_UnitTest_DefaultDeviceTypeInit_%: TestDefaultDe test-cuda: KokkosCore_UnitTest_Cuda ./KokkosCore_UnitTest_Cuda - ./KokkosCore_UnitTest_CudaInterOp + ./KokkosCore_UnitTest_CudaInterOpInit + ./KokkosCore_UnitTest_CudaInterOpStreams test-rocm: KokkosCore_UnitTest_ROCm ./KokkosCore_UnitTest_ROCm @@ -398,6 +465,10 @@ test-qthreads: KokkosCore_UnitTest_Qthreads KokkosCore_UnitTest_Qthreads2 ./KokkosCore_UnitTest_Qthreads ./KokkosCore_UnitTest_Qthreads2 +test-hpx: KokkosCore_UnitTest_HPX + ./KokkosCore_UnitTest_HPX + ./KokkosCore_UnitTest_HPXInterOp + test-hwloc: KokkosCore_UnitTest_HWLOC ./KokkosCore_UnitTest_HWLOC diff --git a/lib/kokkos/core/unit_test/TestAtomic.hpp b/lib/kokkos/core/unit_test/TestAtomic.hpp index 58b6325115..ee93d53470 100644 --- a/lib/kokkos/core/unit_test/TestAtomic.hpp +++ b/lib/kokkos/core/unit_test/TestAtomic.hpp @@ -211,13 +211,13 @@ T AddLoop( int loop ) { f_zero.data = data; Kokkos::parallel_for( 1, f_zero ); - execution_space::fence(); + execution_space().fence(); struct AddFunctor< T, execution_space > f_add; f_add.data = data; Kokkos::parallel_for( loop, f_add ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -226,7 +226,7 @@ T AddLoop( int loop ) { f_add_red.data = data; int dummy_result; Kokkos::parallel_reduce( loop, f_add_red , dummy_result ); - execution_space::fence(); + execution_space().fence(); return val; } @@ -298,12 +298,12 @@ T CASLoop( int loop ) { f_zero.data = data; Kokkos::parallel_for( 1, f_zero ); - execution_space::fence(); + execution_space().fence(); struct CASFunctor< T, execution_space > f_cas; f_cas.data = data; Kokkos::parallel_for( loop, f_cas ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -312,7 +312,7 @@ T CASLoop( int loop ) { f_cas_red.data = data; int dummy_result; Kokkos::parallel_reduce( loop, f_cas_red , dummy_result ); - execution_space::fence(); + execution_space().fence(); return val; } @@ -381,20 +381,20 @@ T ExchLoop( int loop ) { f_zero.data = data; Kokkos::parallel_for( 1, f_zero ); - execution_space::fence(); + execution_space().fence(); typename ZeroFunctor< T, execution_space >::type data2( "Data" ); typename ZeroFunctor< T, execution_space >::h_type h_data2( "HData" ); f_zero.data = data2; Kokkos::parallel_for( 1, f_zero ); - execution_space::fence(); + execution_space().fence(); struct ExchFunctor< T, execution_space > f_exch; f_exch.data = data; f_exch.data2 = data2; Kokkos::parallel_for( loop, f_exch ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); Kokkos::deep_copy( h_data2, data2 ); @@ -405,7 +405,7 @@ T ExchLoop( int loop ) { f_exch_red.data2 = data2; int dummy_result; Kokkos::parallel_reduce( loop, f_exch_red , dummy_result ); - execution_space::fence(); + execution_space().fence(); return val; } diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp index d068c18d87..e043737e42 100644 --- a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -113,13 +113,13 @@ T MaxAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct MaxFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -191,13 +191,13 @@ T MinAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct MinFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -268,13 +268,13 @@ T IncAtomic( T i0 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct IncFunctor< T, execution_space > f( i0 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -345,13 +345,13 @@ T DecAtomic( T i0 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct DecFunctor< T, execution_space > f( i0 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -423,13 +423,13 @@ T MulAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct MulFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -501,13 +501,13 @@ T DivAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct DivFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -536,7 +536,9 @@ bool DivAtomicTest( T i0, T i1 ) bool passed = true; - if ( (resSerial-res)*(resSerial-res) > 1e-10 ) { + using std::abs; + using Kokkos::abs; + if ( abs( (resSerial-res) * 1.) > 1e-5 ) { passed = false; std::cout << "Loop<" @@ -579,13 +581,13 @@ T ModAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct ModFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -657,13 +659,13 @@ T AndAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct AndFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -735,13 +737,13 @@ T OrAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct OrFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -813,13 +815,13 @@ T XorAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct XorFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -891,13 +893,13 @@ T LShiftAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct LShiftFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); @@ -969,13 +971,13 @@ T RShiftAtomic( T i0, T i1 ) { f_init.data = data; Kokkos::parallel_for( 1, f_init ); - execution_space::fence(); + execution_space().fence(); struct RShiftFunctor< T, execution_space > f( i0, i1 ); f.data = data; Kokkos::parallel_for( 1, f ); - execution_space::fence(); + execution_space().fence(); Kokkos::deep_copy( h_data, data ); T val = h_data(); diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations_complexdouble.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations_complexdouble.hpp new file mode 100644 index 0000000000..a8474d8952 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestAtomicOperations_complexdouble.hpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { +TEST_F( TEST_CATEGORY , atomic_operations_complexdouble ) +{ + const int start = 1; // Avoid zero for division. + const int end = 11; + for ( int i = start; i < end; ++i ) + { + ASSERT_TRUE( ( TestAtomicOperations::MulAtomicTest< Kokkos::complex, TEST_EXECSPACE >( start , end - i) ) ); + ASSERT_TRUE( ( TestAtomicOperations::DivAtomicTest< Kokkos::complex, TEST_EXECSPACE >( start , end - i) ) ); + } +} +} diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations_complexfloat.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations_complexfloat.hpp new file mode 100644 index 0000000000..961418e675 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestAtomicOperations_complexfloat.hpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace Test { +TEST_F( TEST_CATEGORY , atomic_operations_complexfloat ) +{ + const int start = 1; // Avoid zero for division. + const int end = 11; + for ( int i = start; i < end; ++i ) + { + ASSERT_TRUE( ( TestAtomicOperations::MulAtomicTest< Kokkos::complex, TEST_EXECSPACE >( start , end - i) ) ); + ASSERT_TRUE( ( TestAtomicOperations::DivAtomicTest< Kokkos::complex, TEST_EXECSPACE >( start , end - i) ) ); + } +} +} diff --git a/lib/kokkos/core/unit_test/TestCXX11.hpp b/lib/kokkos/core/unit_test/TestCXX11.hpp index 8a158e2667..542b4a1912 100644 --- a/lib/kokkos/core/unit_test/TestCXX11.hpp +++ b/lib/kokkos/core/unit_test/TestCXX11.hpp @@ -235,6 +235,7 @@ double ReduceTestFunctor() { else { Kokkos::parallel_reduce( policy_type( 25, Kokkos::AUTO ), FunctorReduceTest< DeviceType >( a ), unmanaged_result( & result ) ); } + Kokkos::fence(); return result; } @@ -281,6 +282,7 @@ double ReduceTestLambda() { } }, unmanaged_result( & result ) ); } + Kokkos::fence(); return result; } diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp index e6b5c48d3d..07c332a9ae 100644 --- a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp +++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp @@ -102,7 +102,7 @@ bool Test() { AddFunctor< DeviceType > f( a, b ); Kokkos::parallel_for( 1024, f ); - DeviceType::fence(); + DeviceType().fence(); return true; } diff --git a/lib/kokkos/core/unit_test/TestDeepCopy.hpp b/lib/kokkos/core/unit_test/TestDeepCopy.hpp new file mode 100644 index 0000000000..aebf263290 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestDeepCopy.hpp @@ -0,0 +1,167 @@ +#include + +namespace Test { + +namespace Impl { +template +struct TestDeepCopy { + + typedef Kokkos::View a_base_t; + typedef Kokkos::View b_base_t; + typedef Kokkos::View a_char_t; + typedef Kokkos::View b_char_t; + + typedef Kokkos::RangePolicy policyA_t; + typedef Kokkos::RangePolicy policyB_t; + + static void reset_a_copy_and_b(Kokkos::View a_char_copy, Kokkos::View b_char) { + const int N = b_char.extent(0); + Kokkos::parallel_for("TestDeepCopy: FillA_copy",policyA_t(0,N), KOKKOS_LAMBDA (const int& i) { + a_char_copy(i) = char(0); + }); + Kokkos::parallel_for("TestDeepCopy: FillB",policyB_t(0,N), KOKKOS_LAMBDA (const int& i) { + b_char(i) = char(0); + }); + } + + static int compare_equal(Kokkos::View a_char_copy, Kokkos::View a_char) { + const int N = a_char.extent(0); + int errors; + Kokkos::parallel_reduce("TestDeepCopy: FillA_copy",policyA_t(0,N), KOKKOS_LAMBDA (const int& i, int& lsum) { + if(a_char_copy(i) != a_char(i)) lsum++; + },errors); + return errors; + } + + static void run_test(int num_bytes) { + a_base_t a_base("test_space_to_space",(num_bytes+128)/8); + a_base_t a_base_copy("test_space_to_space",(num_bytes+128)/8); + Kokkos::View b_base("test_space_to_space",(num_bytes+128)/8); + + Kokkos::View a_char((char*) a_base.data(),a_base.extent(0)*8); + Kokkos::View a_char_copy((char*) a_base_copy.data(),a_base.extent(0)*8); + Kokkos::View b_char((char*) b_base.data(),b_base.extent(0)*8); + + Kokkos::parallel_for("TestDeepCopy: FillA",policyA_t(0,a_char.extent(0)), KOKKOS_LAMBDA (const int& i) { + a_char(i) = static_cast(i%97)+1; + }); + + reset_a_copy_and_b(a_char_copy, b_char); + + { + int check = compare_equal(a_char_copy,a_char); + ASSERT_EQ( check, a_char.extent(0) ); + } + + // (a.data()%8, (a.data()+a.extent(0))%8, b.data()%8, (b.data()+b.extent(0))%8 + // (0,0,0,0) + { + int a_begin = 0; + int a_end = 0; + int b_begin = 0; + int b_end = 0; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 0; + int a_end = 5; + int b_begin = 0; + int b_end = 5; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 3; + int a_end = 0; + int b_begin = 3; + int b_end = 0; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 3; + int a_end = 6; + int b_begin = 3; + int b_end = 6; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 5; + int a_end = 4; + int b_begin = 3; + int b_end = 6; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 0; + int a_end = 8; + int b_begin = 2; + int b_end = 6; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + { + int a_begin = 2; + int a_end = 6; + int b_begin = 0; + int b_end = 8; + auto a = Kokkos::subview(a_char,std::pair(a_begin,a_char.extent(0)-a_end)); + auto b = Kokkos::subview(b_char,std::pair(b_begin,b_char.extent(0)-b_end)); + auto a_copy = Kokkos::subview(a_char_copy,std::pair(a_begin,a_char_copy.extent(0)-a_end)); + Kokkos::deep_copy(b,a); + Kokkos::deep_copy(a_copy,b); + int check = compare_equal(a_copy,a); + ASSERT_EQ( check, 0 ); + } + + } +}; +} + +TEST_F( TEST_CATEGORY, deep_copy_alignment ) +{ + { Impl::TestDeepCopy< TEST_EXECSPACE::memory_space , TEST_EXECSPACE::memory_space >::run_test( 100000 ); } + { Impl::TestDeepCopy< Kokkos::HostSpace , TEST_EXECSPACE::memory_space >::run_test( 100000 ); } + { Impl::TestDeepCopy< TEST_EXECSPACE::memory_space , Kokkos::HostSpace >::run_test( 100000 ); } +} + +} diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp index 1e1418fcbf..1261948f87 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -222,6 +222,14 @@ void check_correct_initialization( const Kokkos::InitArguments & argstruct ) { expected_nthreads = 1; } #endif + +#ifdef KOKKOS_ENABLE_HPX + // HPX uses all cores on machine by default. Skip this test. + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Experimental::HPX >::value || + std::is_same< Kokkos::DefaultHostExecutionSpace, Kokkos::Experimental::HPX >::value ) { + return; + } +#endif } int expected_numa = argstruct.num_numa; diff --git a/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp b/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp new file mode 100644 index 0000000000..31bda530a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestLocalDeepCopy.hpp @@ -0,0 +1,904 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include +#include +#include +#include + +#include + +namespace Test { + + template + void impl_test_local_deepcopy_teampolicy_rank_1 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, lid, Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, lid, Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_2 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_3 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_4 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_5 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_6 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_teampolicy_rank_7 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + Kokkos::deep_copy( A, 10.0 ); + + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + //Deep Copy + Kokkos::parallel_for( team_policy( N, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type &teamMember ) { + int lid = teamMember.league_rank();// returns a number between 0 and N + auto subSrc = Kokkos::subview(A, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, lid, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(teamMember,subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_1 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, 1, i, Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_2 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_3 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_4 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_5 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_6 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + auto subA = Kokkos::subview(A, 1, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy( subA, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, 1, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i + void impl_test_local_deepcopy_rangepolicy_rank_7 (const int N) { + + // Allocate matrices on device. + ViewType A( "A", N, N, N, N, N, N, N, N ); + ViewType B( "B", N, N, N, N, N, N, N, N ); + + // Create host mirrors of device views. + typename ViewType::HostMirror h_A = Kokkos::create_mirror_view( A ); + typename ViewType::HostMirror h_B = Kokkos::create_mirror_view( B ); + + // Initialize A matrix. + Kokkos::deep_copy( A, 10.0 ); + + //Deep Copy + Kokkos::parallel_for( Kokkos::RangePolicy(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subSrc = Kokkos::subview(A, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + auto subDst = Kokkos::subview(B, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,subSrc); + }); + + Kokkos::deep_copy( h_A, A ); + Kokkos::deep_copy( h_B, B ); + + bool test = true; + for(size_t i=0; i(0,N), KOKKOS_LAMBDA ( const int& i ) { + auto subDst = Kokkos::subview(B, i, Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(subDst,20.0); + }); + + Kokkos::deep_copy( h_B, B ); + + double sum_all = 0.0; + for(size_t i=0; i ViewType; + + { //Rank-1 + impl_test_local_deepcopy_teampolicy_rank_1(8); + } + { //Rank-2 + impl_test_local_deepcopy_teampolicy_rank_2(8); + } + { //Rank-3 + impl_test_local_deepcopy_teampolicy_rank_3(8); + } + { //Rank-4 + impl_test_local_deepcopy_teampolicy_rank_4(8); + } + { //Rank-5 + impl_test_local_deepcopy_teampolicy_rank_5(8); + } + { //Rank-6 + impl_test_local_deepcopy_teampolicy_rank_6(8); + } + { //Rank-7 + impl_test_local_deepcopy_teampolicy_rank_7(8); + } +} +//------------------------------------------------------------------------------------------------------------- +TEST_F( TEST_CATEGORY , local_deepcopy_rangepolicy_layoutleft ) +{ + typedef TEST_EXECSPACE ExecSpace; + typedef Kokkos::View ViewType; + + { //Rank-1 + impl_test_local_deepcopy_rangepolicy_rank_1(8); + } + { //Rank-2 + impl_test_local_deepcopy_rangepolicy_rank_2(8); + } + { //Rank-3 + impl_test_local_deepcopy_rangepolicy_rank_3(8); + } + { //Rank-4 + impl_test_local_deepcopy_rangepolicy_rank_4(8); + } + { //Rank-5 + impl_test_local_deepcopy_rangepolicy_rank_5(8); + } + { //Rank-6 + impl_test_local_deepcopy_rangepolicy_rank_6(8); + } + { //Rank-7 + impl_test_local_deepcopy_rangepolicy_rank_7(8); + } +} +//------------------------------------------------------------------------------------------------------------- +TEST_F( TEST_CATEGORY , local_deepcopy_teampolicy_layoutright ) +{ + typedef TEST_EXECSPACE ExecSpace; + typedef Kokkos::View ViewType; + + { //Rank-1 + impl_test_local_deepcopy_teampolicy_rank_1(8); + } + { //Rank-2 + impl_test_local_deepcopy_teampolicy_rank_2(8); + } + { //Rank-3 + impl_test_local_deepcopy_teampolicy_rank_3(8); + } + { //Rank-4 + impl_test_local_deepcopy_teampolicy_rank_4(8); + } + { //Rank-5 + impl_test_local_deepcopy_teampolicy_rank_5(8); + } + { //Rank-6 + impl_test_local_deepcopy_teampolicy_rank_6(8); + } + { //Rank-7 + impl_test_local_deepcopy_teampolicy_rank_7(8); + } +} +//------------------------------------------------------------------------------------------------------------- +TEST_F( TEST_CATEGORY , local_deepcopy_rangepolicy_layoutright ) +{ + typedef TEST_EXECSPACE ExecSpace; + typedef Kokkos::View ViewType; + + { //Rank-1 + impl_test_local_deepcopy_rangepolicy_rank_1(8); + } + { //Rank-2 + impl_test_local_deepcopy_rangepolicy_rank_2(8); + } + { //Rank-3 + impl_test_local_deepcopy_rangepolicy_rank_3(8); + } + { //Rank-4 + impl_test_local_deepcopy_rangepolicy_rank_4(8); + } + { //Rank-5 + impl_test_local_deepcopy_rangepolicy_rank_5(8); + } + { //Rank-6 + impl_test_local_deepcopy_rangepolicy_rank_6(8); + } + { //Rank-7 + impl_test_local_deepcopy_rangepolicy_rank_7(8); + } +} +#endif +#endif +} diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp index a382a20700..cea89a4872 100644 --- a/lib/kokkos/core/unit_test/TestMDRange.hpp +++ b/lib/kokkos/core/unit_test/TestMDRange.hpp @@ -351,6 +351,7 @@ struct TestMDRange_2D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 ); @@ -931,6 +932,7 @@ struct TestMDRange_3D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); @@ -1502,6 +1504,7 @@ struct TestMDRange_4D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); @@ -2089,6 +2092,7 @@ struct TestMDRange_5D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); @@ -2607,6 +2611,7 @@ struct TestMDRange_6D { Kokkos::Sum< value_type > reducer_view( sum_view ); parallel_reduce( range, functor, reducer_view); + Kokkos::fence(); sum = sum_view(); ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); diff --git a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp index efbb32e387..be744a7712 100644 --- a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp +++ b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -476,6 +476,9 @@ private: void test_run_time_parameters_type() { int league_size = 131; int team_size = 4 < policy_t::execution_space::concurrency() ? 4 : policy_t::execution_space::concurrency(); +#ifdef KOKKOS_ENABLE_HPX + team_size = 1; +#endif int chunk_size = 4; int per_team_scratch = 1024; int per_thread_scratch = 16; diff --git a/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp b/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp index fe947fe14e..293cc0ca59 100644 --- a/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp +++ b/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp @@ -453,15 +453,18 @@ struct TestReduceCombinatoricalInstantiation { result_view() = 0; CallParallelReduce( args..., result_view ); + Kokkos::fence(); ASSERT_EQ( expected_result, result_view() ); value = 0; CallParallelReduce( args..., Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits >( &value ) ); + Kokkos::fence(); ASSERT_EQ( expected_result, value ); result_view() = 0; const Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits > result_view_const_um = result_view; CallParallelReduce( args..., result_view_const_um ); + Kokkos::fence(); ASSERT_EQ( expected_result, result_view_const_um() ); value = 0; @@ -526,18 +529,21 @@ struct TestReduceCombinatoricalInstantiation { h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarFinal< ISTEAM >( result_view ) ); + Kokkos::fence(); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal< ISTEAM >( result_view ) ); + Kokkos::fence(); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit< ISTEAM >( result_view ) ); + Kokkos::fence(); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); } diff --git a/lib/kokkos/core/unit_test/TestReduceDeviceView.hpp b/lib/kokkos/core/unit_test/TestReduceDeviceView.hpp index 4f65166e37..d55c5449bc 100644 --- a/lib/kokkos/core/unit_test/TestReduceDeviceView.hpp +++ b/lib/kokkos/core/unit_test/TestReduceDeviceView.hpp @@ -30,7 +30,7 @@ void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor TestIsAsynchFunctor(atomic_test)); double time0 = timer.seconds(); timer.reset(); - ExecSpace::execution_space::fence(); + typename ExecSpace::execution_space().fence(); double time_fence0 = timer.seconds(); Kokkos::deep_copy(result,0); timer.reset(); @@ -42,7 +42,7 @@ void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor double time1 = timer.seconds(); // Check whether it was asyncronous timer.reset(); - ExecSpace::execution_space::fence(); + typename ExecSpace::execution_space().fence(); double time_fence1 = timer.seconds(); Kokkos::deep_copy(reducer_result,result); Kokkos::deep_copy(result,0); @@ -55,7 +55,7 @@ void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor double time2 = timer.seconds(); // Check whether it was asyncronous timer.reset(); - ExecSpace::execution_space::fence(); + typename ExecSpace::execution_space().fence(); double time_fence2 = timer.seconds(); Kokkos::deep_copy(view_result,result); Kokkos::deep_copy(result,0); @@ -69,7 +69,7 @@ void test_reduce_device_view(int64_t N, PolicyType policy, ReduceFunctor functor // Check whether it was asyncronous timer.reset(); - ExecSpace::execution_space::fence(); + typename ExecSpace::execution_space().fence(); double time_fence3 = timer.seconds(); ASSERT_EQ(N,scalar_result); diff --git a/lib/kokkos/core/unit_test/TestReducers.hpp b/lib/kokkos/core/unit_test/TestReducers.hpp index 7270ea3375..1d77574412 100644 --- a/lib/kokkos/core/unit_test/TestReducers.hpp +++ b/lib/kokkos/core/unit_test/TestReducers.hpp @@ -319,6 +319,7 @@ struct TestReducers { sum_view() = init; Kokkos::Sum< Scalar > reducer_view( sum_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar sum_view_scalar = sum_view(); ASSERT_EQ( sum_view_scalar, reference_sum ); @@ -365,6 +366,7 @@ struct TestReducers { prod_view() = init; Kokkos::Prod< Scalar > reducer_view( prod_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar prod_view_scalar = prod_view(); ASSERT_EQ( prod_view_scalar, reference_prod ); @@ -412,6 +414,7 @@ struct TestReducers { min_view() = init; Kokkos::Min< Scalar > reducer_view( min_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar min_view_scalar = min_view(); ASSERT_EQ( min_view_scalar, reference_min ); @@ -459,6 +462,7 @@ struct TestReducers { max_view() = init; Kokkos::Max< Scalar > reducer_view( max_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar max_view_scalar = max_view(); ASSERT_EQ( max_view_scalar, reference_max ); @@ -517,6 +521,7 @@ struct TestReducers { Kokkos::View< value_type, Kokkos::HostSpace > min_view( "View" ); Kokkos::MinLoc< Scalar, int > reducer_view( min_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); value_type min_view_scalar = min_view(); ASSERT_EQ( min_view_scalar.val, reference_min ); @@ -577,6 +582,7 @@ struct TestReducers { Kokkos::View< value_type, Kokkos::HostSpace > max_view( "View" ); Kokkos::MaxLoc< Scalar, int > reducer_view( max_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); value_type max_view_scalar = max_view(); ASSERT_EQ( max_view_scalar.val, reference_max ); @@ -687,6 +693,7 @@ struct TestReducers { Kokkos::View< value_type, Kokkos::HostSpace > minmax_view( "View" ); Kokkos::MinMaxLoc< Scalar, int > reducer_view( minmax_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); value_type minmax_view_scalar = minmax_view(); ASSERT_EQ( minmax_view_scalar.min_val, reference_min ); @@ -740,6 +747,7 @@ struct TestReducers { band_view() = init; Kokkos::BAnd< Scalar > reducer_view( band_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar band_view_scalar = band_view(); ASSERT_EQ( band_view_scalar, reference_band ); @@ -786,6 +794,7 @@ struct TestReducers { bor_view() = init; Kokkos::BOr< Scalar > reducer_view( bor_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar bor_view_scalar = bor_view(); ASSERT_EQ( bor_view_scalar, reference_bor ); @@ -832,6 +841,7 @@ struct TestReducers { land_view() = init; Kokkos::LAnd< Scalar > reducer_view( land_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar land_view_scalar = land_view(); ASSERT_EQ( land_view_scalar, reference_land ); @@ -878,6 +888,7 @@ struct TestReducers { lor_view() = init; Kokkos::LOr< Scalar > reducer_view( lor_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Kokkos::fence(); Scalar lor_view_scalar = lor_view(); ASSERT_EQ( lor_view_scalar, reference_lor ); diff --git a/lib/kokkos/core/unit_test/TestScan.hpp b/lib/kokkos/core/unit_test/TestScan.hpp index e021ed09f5..eaebb254a7 100644 --- a/lib/kokkos/core/unit_test/TestScan.hpp +++ b/lib/kokkos/core/unit_test/TestScan.hpp @@ -96,6 +96,7 @@ struct TestScan { long long int total = 0; Kokkos::parallel_scan( N, *this, total ); + run_check( size_t( ( N+1 )*N/2 ), size_t( total ) ); check_error(); } @@ -109,6 +110,8 @@ struct TestScan { errors = errors_a; Kokkos::parallel_scan( exec_policy( Start , N ) , *this ); + Kokkos::fence(); + check_error(); } @@ -138,7 +141,7 @@ TEST_F( TEST_CATEGORY, scan ) TestScan< TEST_EXECSPACE >( 0 ); TestScan< TEST_EXECSPACE >( 100000 ); TestScan< TEST_EXECSPACE >( 10000000 ); - TEST_EXECSPACE::fence(); + TEST_EXECSPACE().fence(); } @@ -153,7 +156,7 @@ TEST_F( TEST_CATEGORY, scan ) TestScanFunctor( 1000000 ); TestScanFunctor( 10000000 ); - TEST_EXECSPACE::fence(); + TEST_EXECSPACE().fence(); }*/ diff --git a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp index 1a942b89c8..c475fe55dc 100644 --- a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp +++ b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp @@ -107,6 +107,8 @@ void test_shared_alloc() ASSERT_EQ( r[i], RecordMemS::get_record( r[i]->data() ) ); }); + Kokkos::fence(); + #ifdef KOKKOS_DEBUG // Sanity check for the whole set of allocation records to which this record belongs. RecordBase::is_sane( r[0] ); @@ -120,6 +122,8 @@ void test_shared_alloc() #endif } }); + + Kokkos::fence(); } { @@ -145,6 +149,8 @@ void test_shared_alloc() ASSERT_EQ( r[i], RecordMemS::get_record( r[i]->data() ) ); }); + Kokkos::fence(); + #ifdef KOKKOS_DEBUG RecordBase::is_sane( r[0] ); #endif @@ -157,6 +163,8 @@ void test_shared_alloc() } }); + Kokkos::fence(); + ASSERT_EQ( destroy_count, int( N ) ); } @@ -196,12 +204,14 @@ void test_shared_alloc() ASSERT_EQ( track.use_count(), 1 ); } - Kokkos::parallel_for( range, [=] ( size_t i ) { + Kokkos::parallel_for( range, [=] ( size_t ) { Tracker local_tracker; local_tracker.assign_allocated_record_to_uninitialized( rec ); ASSERT_GT( rec->use_count(), 1 ); }); + Kokkos::fence(); + ASSERT_EQ( rec->use_count(), 1 ); ASSERT_EQ( track.use_count(), 1 ); diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp index ac32a01fb8..361e8da9e1 100644 --- a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp +++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp @@ -47,11 +47,15 @@ #include #if defined( KOKKOS_ENABLE_TASKDAG ) #include +#include #include #include #include +//============================================================================== +// {{{1 + namespace TestTaskScheduler { namespace { @@ -72,29 +76,30 @@ long eval_fib( long n ) } -template< typename Space > +template< typename Scheduler > struct TestFib { - typedef Kokkos::TaskScheduler< Space > sched_type; - typedef Kokkos::Future< long, Space > future_type; - typedef long value_type; + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture< long, Scheduler >; + using value_type = long; - sched_type sched; future_type fib_m1; future_type fib_m2; const value_type n; KOKKOS_INLINE_FUNCTION - TestFib( const sched_type & arg_sched, const value_type arg_n ) - : sched( arg_sched ), fib_m1(), fib_m2(), n( arg_n ) {} + TestFib( const value_type arg_n ) + : fib_m1(), fib_m2(), n( arg_n ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename sched_type::member_type &, value_type & result ) + void operator()( typename sched_type::member_type & member, value_type & result ) { #if 0 printf( "\nTestFib(%ld) %d %d\n", n, int( !fib_m1.is_null() ), int( !fib_m2.is_null() ) ); #endif + auto& sched = member.scheduler(); + if ( n < 2 ) { result = n; } @@ -107,13 +112,13 @@ struct TestFib // path to completion. fib_m2 = Kokkos::task_spawn( Kokkos::TaskSingle( sched, Kokkos::TaskPriority::High ) - , TestFib( sched, n - 2 ) ); + , TestFib( n - 2 ) ); fib_m1 = Kokkos::task_spawn( Kokkos::TaskSingle( sched ) - , TestFib( sched, n - 1 ) ); + , TestFib( n - 1 ) ); - Kokkos::Future< Space > dep[] = { fib_m1, fib_m2 }; - Kokkos::Future< Space > fib_all = Kokkos::when_all( dep, 2 ); + Kokkos::BasicFuture dep[] = { fib_m1, fib_m2 }; + Kokkos::BasicFuture fib_all = sched.when_all( dep, 2 ); if ( !fib_m2.is_null() && !fib_m1.is_null() && !fib_all.is_null() ) { // High priority to retire this branch. @@ -123,9 +128,9 @@ struct TestFib #if 1 printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" , n - , sched.allocation_capacity() - , sched.allocated_task_count_max() - , sched.allocated_task_count_accum() + , 0 //sched.allocation_capacity() + , 0 //sched.allocated_task_count_max() + , 0l //sched.allocated_task_count_accum() ); #endif @@ -149,12 +154,18 @@ struct TestFib , std::min(size_t(MaxBlockSize),MemoryCapacity) , std::min(size_t(SuperBlockSize),MemoryCapacity) ); - future_type f = Kokkos::host_spawn( Kokkos::TaskSingle( root_sched ) - , TestFib( root_sched, i ) ); + { + future_type f = Kokkos::host_spawn( Kokkos::TaskSingle( root_sched ) + , TestFib( i ) ); + + Kokkos::wait( root_sched ); + + ASSERT_EQ( eval_fib( i ), f.get() ); + } + + ASSERT_EQ(root_sched.queue().allocation_count(), 0); - Kokkos::wait( root_sched ); - ASSERT_EQ( eval_fib( i ), f.get() ); #if 0 fprintf( stdout, "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" @@ -172,32 +183,36 @@ struct TestFib } // namespace TestTaskScheduler +// end TestFib }}}1 +//============================================================================== + //---------------------------------------------------------------------------- +//============================================================================== +// {{{1 + namespace TestTaskScheduler { -template< class Space > +template< class Scheduler > struct TestTaskDependence { - typedef Kokkos::TaskScheduler< Space > sched_type; - typedef Kokkos::Future< Space > future_type; - typedef Kokkos::View< long, Space > accum_type; + typedef Scheduler sched_type; + typedef Kokkos::BasicFuture< void, Scheduler > future_type; + typedef Kokkos::View< long, typename sched_type::execution_space > accum_type; typedef void value_type; - sched_type m_sched; accum_type m_accum; long m_count; KOKKOS_INLINE_FUNCTION TestTaskDependence( long n - , const sched_type & arg_sched , const accum_type & arg_accum ) - : m_sched( arg_sched ) - , m_accum( arg_accum ) + : m_accum( arg_accum ) , m_count( n ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename sched_type::member_type & ) + void operator()( typename sched_type::member_type & member ) { + auto& sched = member.scheduler(); enum { CHUNK = 8 }; const int n = CHUNK < m_count ? CHUNK : m_count; @@ -206,14 +221,14 @@ struct TestTaskDependence { const int increment = ( m_count + n - 1 ) / n; future_type f = - m_sched.when_all( n , [this,increment]( int i ) { + sched.when_all( n , [this,&member,increment]( int i ) { const long inc = increment ; const long begin = i * inc ; const long count = begin + inc < m_count ? inc : m_count - begin ; return Kokkos::task_spawn - ( Kokkos::TaskSingle( m_sched ) - , TestTaskDependence( count, m_sched, m_accum ) ); + ( Kokkos::TaskSingle( member.scheduler() ) + , TestTaskDependence( count, m_accum ) ); }); m_count = 0; @@ -244,7 +259,7 @@ struct TestTaskDependence { typename accum_type::HostMirror host_accum = Kokkos::create_mirror_view( accum ); - Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskDependence( n, sched, accum ) ); + Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskDependence( n, accum ) ); Kokkos::wait( sched ); @@ -256,22 +271,25 @@ struct TestTaskDependence { } // namespace TestTaskScheduler +// end TestTaskDependence }}}1 +//============================================================================== + //---------------------------------------------------------------------------- namespace TestTaskScheduler { -template< class ExecSpace > +template< class Scheduler > struct TestTaskTeam { //enum { SPAN = 8 }; enum { SPAN = 33 }; //enum { SPAN = 1 }; typedef void value_type; - typedef Kokkos::TaskScheduler< ExecSpace > sched_type; - typedef Kokkos::Future< ExecSpace > future_type; + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture; + using ExecSpace = typename sched_type::execution_space; typedef Kokkos::View< long*, ExecSpace > view_type; - sched_type sched; future_type future; view_type parfor_result; @@ -281,14 +299,12 @@ struct TestTaskTeam { const long nvalue; KOKKOS_INLINE_FUNCTION - TestTaskTeam( const sched_type & arg_sched - , const view_type & arg_parfor_result + TestTaskTeam( const view_type & arg_parfor_result , const view_type & arg_parreduce_check , const view_type & arg_parscan_result , const view_type & arg_parscan_check , const long arg_nvalue ) - : sched( arg_sched ) - , future() + : future() , parfor_result( arg_parfor_result ) , parreduce_check( arg_parreduce_check ) , parscan_result( arg_parscan_result ) @@ -298,21 +314,22 @@ struct TestTaskTeam { KOKKOS_INLINE_FUNCTION void operator()( typename sched_type::member_type & member ) { + auto& sched = member.scheduler(); const long end = nvalue + 1; + // begin = max(end - SPAN, 0); const long begin = 0 < end - SPAN ? end - SPAN : 0; if ( 0 < begin && future.is_null() ) { if ( member.team_rank() == 0 ) { future = Kokkos::task_spawn( Kokkos::TaskTeam( sched ) - , TestTaskTeam( sched - , parfor_result + , TestTaskTeam( parfor_result , parreduce_check , parscan_result , parscan_check , begin - 1 ) ); - #ifndef __HCC_ACCELERATOR__ + #if !defined(__HCC_ACCELERATOR__) && !defined(__CUDA_ARCH__) assert( !future.is_null() ); #endif @@ -449,8 +466,7 @@ struct TestTaskTeam { host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); future_type f = Kokkos::host_spawn( Kokkos::TaskTeam( root_sched ) - , TestTaskTeam( root_sched - , root_parfor_result + , TestTaskTeam( root_parfor_result , root_parreduce_check , root_parscan_result , root_parscan_check @@ -492,27 +508,25 @@ struct TestTaskTeam { } }; -template< class ExecSpace > +template< class Scheduler > struct TestTaskTeamValue { enum { SPAN = 8 }; typedef long value_type; - typedef Kokkos::TaskScheduler< ExecSpace > sched_type; - typedef Kokkos::Future< value_type, ExecSpace > future_type; + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture< value_type, sched_type >; + using ExecSpace = typename sched_type::execution_space; typedef Kokkos::View< long*, ExecSpace > view_type; - sched_type sched; future_type future; view_type result; const long nvalue; KOKKOS_INLINE_FUNCTION - TestTaskTeamValue( const sched_type & arg_sched - , const view_type & arg_result + TestTaskTeamValue( const view_type & arg_result , const long arg_nvalue ) - : sched( arg_sched ) - , future() + : future() , result( arg_result ) , nvalue( arg_nvalue ) {} @@ -523,12 +537,16 @@ struct TestTaskTeamValue { const long end = nvalue + 1; const long begin = 0 < end - SPAN ? end - SPAN : 0; + auto& sched = member.scheduler(); + if ( 0 < begin && future.is_null() ) { if ( member.team_rank() == 0 ) { - future = sched.task_spawn( TestTaskTeamValue( sched, result, begin - 1 ) + future = sched.task_spawn( TestTaskTeamValue( result, begin - 1 ) , Kokkos::TaskTeam ); + #if !defined(__HCC_ACCELERATOR__) && !defined(__CUDA_ARCH__) assert( !future.is_null() ); + #endif sched.respawn( this , future ); } @@ -565,7 +583,7 @@ struct TestTaskTeamValue { typename view_type::HostMirror host_result = Kokkos::create_mirror_view( root_result ); - future_type fv = root_sched.host_spawn( TestTaskTeamValue( root_sched, root_result, n ) + future_type fv = root_sched.host_spawn( TestTaskTeamValue( root_result, n ) , Kokkos::TaskTeam ); Kokkos::wait( root_sched ); @@ -594,31 +612,30 @@ struct TestTaskTeamValue { namespace TestTaskScheduler { -template< class Space > +template< class Scheduler > struct TestTaskSpawnWithPool { - typedef Kokkos::TaskScheduler< Space > sched_type; - typedef Kokkos::Future< Space > future_type; + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture; typedef void value_type; + using Space = typename sched_type::execution_space; - sched_type m_sched ; int m_count ; Kokkos::MemoryPool m_pool ; KOKKOS_INLINE_FUNCTION - TestTaskSpawnWithPool( const sched_type & arg_sched - , const int & arg_count - , const Kokkos::MemoryPool & arg_pool - ) - : m_sched( arg_sched ) - , m_count( arg_count ) + TestTaskSpawnWithPool( + const int & arg_count, + const Kokkos::MemoryPool & arg_pool + ) + : m_count( arg_count ) , m_pool( arg_pool ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename sched_type::member_type & ) + void operator()( typename sched_type::member_type & member ) { if ( m_count ) { - Kokkos::task_spawn( Kokkos::TaskSingle( m_sched ) , TestTaskSpawnWithPool( m_sched , m_count - 1, m_pool ) ); + Kokkos::task_spawn( Kokkos::TaskSingle( member.scheduler() ) , TestTaskSpawnWithPool( m_count - 1, m_pool ) ); } } @@ -639,7 +656,7 @@ struct TestTaskSpawnWithPool { using other_memory_space = typename Space::memory_space; Kokkos::MemoryPool pool(other_memory_space(), 10000, 100, 200, 1000); - auto f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskSpawnWithPool( sched, 3, pool ) ); + auto f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskSpawnWithPool( 3, pool ) ); Kokkos::wait( sched ); } @@ -647,36 +664,307 @@ struct TestTaskSpawnWithPool { } -namespace Test { +//---------------------------------------------------------------------------- -TEST_F( TEST_CATEGORY, task_fib ) -{ - const int N = 27 ; - for ( int i = 0; i < N; ++i ) { - TestTaskScheduler::TestFib< TEST_EXECSPACE >::run( i , ( i + 1 ) * ( i + 1 ) * 2000 ); +namespace TestTaskScheduler { + +template< class Scheduler > +struct TestTaskCtorsDevice { + using sched_type = Scheduler; + using future_type = Kokkos::BasicFuture; + using value_type = void; + using Space = typename sched_type::execution_space; + + int m_count; + + KOKKOS_INLINE_FUNCTION + TestTaskCtorsDevice(const int & arg_count) : m_count(arg_count) { } + + KOKKOS_INLINE_FUNCTION + void operator()(typename sched_type::member_type& member ) + { + // Note: Default construction on the device is not allowed + if(m_count == 4) { + Kokkos::task_spawn( + Kokkos::TaskSingle(member.scheduler()), + TestTaskCtorsDevice(m_count - 1) + ); + } + else if(m_count == 3) { + sched_type s = member.scheduler(); // move construct + s = member.scheduler(); // move assignment + Kokkos::task_spawn( + Kokkos::TaskSingle(s), + TestTaskCtorsDevice(m_count - 1) + ); + } + else if(m_count == 2) { + sched_type s3 = member.scheduler(); // move construct from member.scheduler(); + Kokkos::task_spawn( + Kokkos::TaskSingle(s3), + TestTaskCtorsDevice(m_count - 1) + ); + } + else if(m_count == 1) { + sched_type s = member.scheduler(); // move construct from member.scheduler(); + sched_type s2 = s; // copy construct from s + Kokkos::task_spawn( + Kokkos::TaskSingle(s2), + TestTaskCtorsDevice(m_count - 1) + ); + } } -} -TEST_F( TEST_CATEGORY, task_depend ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestTaskDependence< TEST_EXECSPACE >::run( i ); + static void run() + { + using memory_space = typename sched_type::memory_space; + + enum { MemoryCapacity = 16000 }; + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 4096 }; + + sched_type sched( + memory_space(), MemoryCapacity, MinBlockSize, MaxBlockSize, SuperBlockSize + ); + + auto f = Kokkos::host_spawn( + Kokkos::TaskSingle(sched), + TestTaskCtorsDevice(4) + ); + + Kokkos::wait(sched); + + // TODO assertions and sanity checks + } -} - -TEST_F( TEST_CATEGORY, task_team ) -{ - TestTaskScheduler::TestTaskTeam< TEST_EXECSPACE >::run( 1000 ); - //TestTaskScheduler::TestTaskTeamValue< TEST_EXECSPACE >::run( 1000 ); // Put back after testing. -} - -TEST_F( TEST_CATEGORY, task_with_mempool ) -{ - TestTaskScheduler::TestTaskSpawnWithPool< TEST_EXECSPACE >::run(); -} +}; } +//---------------------------------------------------------------------------- + + +namespace TestTaskScheduler { + +template +struct TestMultipleDependence { + + using sched_type = Scheduler; + using future_bool = Kokkos::BasicFuture; + using future_int = Kokkos::BasicFuture; + using value_type = bool; + using execution_space = typename sched_type::execution_space; + + enum : int { NPerDepth = 6 }; + enum : int { NFanout = 3 }; + + // xlC doesn't like incomplete aggregate constructors, so we have do do this manually: + KOKKOS_INLINE_FUNCTION + TestMultipleDependence(int depth, int max_depth) + : m_depth(depth), + m_max_depth(max_depth), + m_dep() + { + // gcc 4.8 has an internal compile error when I give the initializer in the class, so I have do do it here + for(int i = 0; i < NPerDepth; ++i) { + m_result_futures[i] = future_bool(); + } + } + + // xlC doesn't like incomplete aggregate constructors, so we have do do this manually: + KOKKOS_INLINE_FUNCTION + TestMultipleDependence(int depth, int max_depth, future_int dep) + : m_depth(depth), + m_max_depth(max_depth), + m_dep(dep) + { + // gcc 4.8 has an internal compile error when I give the initializer in the class, so I have do do it here + for(int i = 0; i < NPerDepth; ++i) { + m_result_futures[i] = future_bool(); + } + } + + int m_depth; + int m_max_depth; + future_int m_dep; + future_bool m_result_futures[NPerDepth]; + + + struct TestCheckReady { + future_int m_dep; + using value_type = bool; + KOKKOS_INLINE_FUNCTION + void operator()(typename Scheduler::member_type&, bool& value) { + // if it was "transiently" ready, this could be false even if we made it a dependence of this task + value = m_dep.is_ready(); + return; + } + }; + + + struct TestComputeValue { + using value_type = int; + KOKKOS_INLINE_FUNCTION + void operator()(typename Scheduler::member_type&, int& result) { + double value = 0; + // keep this one busy for a while + for(int i = 0; i < 10000; ++i) { + value += i * i / 7.138 / value; + } + // Do something irrelevant + result = int(value) << 2; + return; + } + }; + + + KOKKOS_INLINE_FUNCTION + void operator()(typename sched_type::member_type & member, bool& value) + { + if(m_result_futures[0].is_null()) { + if (m_depth == 0) { + // Spawn one expensive task at the root + m_dep = Kokkos::task_spawn(Kokkos::TaskSingle(member.scheduler()), TestComputeValue{}); + } + + // Then check for it to be ready in a whole bunch of other tasks that race + int n_checkers = NPerDepth; + if(m_depth < m_max_depth) { + n_checkers -= NFanout; + for(int i = n_checkers; i < NPerDepth; ++i) { + m_result_futures[i] = Kokkos::task_spawn(Kokkos::TaskSingle(member.scheduler()), + TestMultipleDependence(m_depth + 1, m_max_depth, m_dep) + ); + } + } + + for(int i = 0; i < n_checkers; ++i) { + m_result_futures[i] = member.scheduler().spawn(Kokkos::TaskSingle(m_dep), TestCheckReady{m_dep}); + } + auto done = member.scheduler().when_all(m_result_futures, NPerDepth); + Kokkos::respawn(this, done); + + return; + } + else { + value = true; + for(int i = 0; i < NPerDepth; ++i) { + value = value && !m_result_futures[i].is_null(); + if(value) { + value = value && m_result_futures[i].get(); + } + } + return; + } + } + + static void run(int depth) + { + typedef typename sched_type::memory_space memory_space; + + enum { MemoryCapacity = 1 << 30 }; + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 4096 }; + + sched_type sched( memory_space() + , MemoryCapacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize ); + + auto f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestMultipleDependence( 0, depth ) ); + + Kokkos::wait( sched ); + + ASSERT_TRUE( f.get() ); + + } +}; + +} + +//---------------------------------------------------------------------------- + +#define KOKKOS_PP_CAT_IMPL(x, y) x ## y +#define KOKKOS_TEST_WITH_SUFFIX(x, y) KOKKOS_PP_CAT_IMPL(x, y) + +#define TEST_SCHEDULER_SUFFIX _deprecated +#define TEST_SCHEDULER Kokkos::DeprecatedTaskScheduler +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + +#define TEST_SCHEDULER_SUFFIX _deprecated_multiple +#define TEST_SCHEDULER Kokkos::DeprecatedTaskSchedulerMultiple +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + + +#define TEST_SCHEDULER_SUFFIX _single +#define TEST_SCHEDULER Kokkos::TaskScheduler +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + +#define TEST_SCHEDULER_SUFFIX _multiple +#define TEST_SCHEDULER Kokkos::TaskSchedulerMultiple +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + + +#define TEST_SCHEDULER_SUFFIX _chase_lev +#define TEST_SCHEDULER Kokkos::ChaseLevTaskScheduler +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + +#if 0 +#define TEST_SCHEDULER_SUFFIX _fixed_mempool +#define TEST_SCHEDULER \ + Kokkos::SimpleTaskScheduler< \ + TEST_EXECSPACE, \ + Kokkos::Impl::SingleTaskQueue< \ + TEST_EXECSPACE, \ + Kokkos::Impl::default_tasking_memory_space_for_execution_space_t, \ + Kokkos::Impl::TaskQueueTraitsLockBased, \ + Kokkos::Impl::FixedBlockSizeMemoryPool< \ + Kokkos::Device>, \ + 128, \ + 16 \ + > \ + > \ + > +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX + +#define TEST_SCHEDULER_SUFFIX _fixed_mempool_multiple +#define TEST_SCHEDULER \ + Kokkos::SimpleTaskScheduler< \ + TEST_EXECSPACE, \ + Kokkos::Impl::MultipleTaskQueue< \ + TEST_EXECSPACE, \ + Kokkos::Impl::default_tasking_memory_space_for_execution_space_t, \ + Kokkos::Impl::TaskQueueTraitsLockBased, \ + Kokkos::Impl::FixedBlockSizeMemoryPool< \ + Kokkos::Device>, \ + 128, \ + 16 \ + > \ + > \ + > +#include "TestTaskScheduler_single.hpp" +#undef TEST_SCHEDULER +#undef TEST_SCHEDULER_SUFFIX +#endif + +#undef KOKKOS_TEST_WITH_SUFFIX +#undef KOKKOS_PP_CAT_IMPL + #endif // #if defined( KOKKOS_ENABLE_TASKDAG ) #endif // #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler_single.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler_single.hpp new file mode 100644 index 0000000000..6ac9a6d740 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTaskScheduler_single.hpp @@ -0,0 +1,92 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace Test { + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_fib, TEST_SCHEDULER_SUFFIX) ) +{ + const int N = 27 ; + for ( int i = 0; i < N; ++i ) { + TestTaskScheduler::TestFib< TEST_SCHEDULER >::run( i , ( i + 1 ) * ( i + 1 ) * 64000 ); + } +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_depend, TEST_SCHEDULER_SUFFIX) ) +{ + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< TEST_SCHEDULER >::run( i ); + } +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_team, TEST_SCHEDULER_SUFFIX) ) +{ + TestTaskScheduler::TestTaskTeam< TEST_SCHEDULER >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< TEST_EXECSPACE >::run( 1000 ); // Put back after testing. +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_with_mempool, TEST_SCHEDULER_SUFFIX) ) +{ + TestTaskScheduler::TestTaskSpawnWithPool::run(); +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_multiple_depend, TEST_SCHEDULER_SUFFIX) ) +{ + for ( int i = 2; i < 6; ++i ) { + TestTaskScheduler::TestMultipleDependence::run( i ); + } +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_scheduler_ctors, TEST_SCHEDULER_SUFFIX) ) +{ + TEST_SCHEDULER sched; + TEST_SCHEDULER sched2 = sched; + sched = sched2; +} + +TEST_F( TEST_CATEGORY, KOKKOS_TEST_WITH_SUFFIX(task_scheduer_ctors_device, TEST_SCHEDULER_SUFFIX) ) +{ + TestTaskScheduler::TestTaskCtorsDevice::run(); +} + + +} // end namespace Test \ No newline at end of file diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp index 487a4d581c..5f325eb905 100644 --- a/lib/kokkos/core/unit_test/TestTeam.hpp +++ b/lib/kokkos/core/unit_test/TestTeam.hpp @@ -72,6 +72,7 @@ struct TestTeamPolicy { const int tid = member.team_rank() + member.team_size() * member.league_rank(); m_flags( member.team_rank(), member.league_rank() ) = tid; + static_assert((std::is_same::value),"TeamMember::execution_space is not the same as TeamPolicy<>::execution_space"); } KOKKOS_INLINE_FUNCTION @@ -265,7 +266,7 @@ public: Kokkos::parallel_reduce( team_exec, functor_type( nwork ), tmp ); } - execution_space::fence(); + execution_space().fence(); for ( unsigned i = 0; i < Repeat; ++i ) { for ( unsigned j = 0; j < Count; ++j ) { @@ -391,7 +392,7 @@ public: Kokkos::deep_copy( functor.accum, total ); Kokkos::parallel_reduce( team_exec, functor, result_type( & error ) ); - DeviceType::fence(); + DeviceType().fence(); Kokkos::deep_copy( accum, functor.accum ); Kokkos::deep_copy( total, functor.total ); @@ -400,7 +401,7 @@ public: ASSERT_EQ( total, accum ); } - execution_space::fence(); + execution_space().fence(); } }; @@ -495,6 +496,7 @@ struct TestSharedTeam { typename Functor::value_type error_count = 0; Kokkos::parallel_reduce( team_exec, Functor(), result_type( & error_count ) ); + Kokkos::fence(); ASSERT_EQ( error_count, 0 ); } @@ -569,6 +571,8 @@ struct TestLambdaSharedTeam { } }, result_type( & error_count ) ); + Kokkos::fence(); + ASSERT_EQ( error_count, 0 ); } }; @@ -679,6 +683,7 @@ struct TestScratchTeam { Kokkos::parallel_reduce( team_exec.set_scratch_size( 1, Kokkos::PerTeam( team_scratch_size ), Kokkos::PerThread( thread_scratch_size ) ), Functor(), result_type( & error_count ) ); + Kokkos::fence(); ASSERT_EQ( error_count, 0 ); } }; @@ -822,7 +827,6 @@ struct ClassNoShmemSizeFunction { Kokkos::TeamPolicy< TagReduce, ExecSpace, ScheduleType > policy( 10, team_size, 16 ); Kokkos::parallel_reduce( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), *this, error ); - Kokkos::fence(); ASSERT_EQ( error, 0 ); } @@ -877,7 +881,6 @@ struct ClassWithShmemSizeFunction { Kokkos::parallel_reduce( policy.set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), *this, error ); - Kokkos::fence(); ASSERT_EQ( error, 0 ); } @@ -929,7 +932,6 @@ void test_team_mulit_level_scratch_test_lambda() { count += test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); }, error ); ASSERT_EQ( error, 0 ); - Kokkos::fence(); #endif #endif } diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index 498d156db3..45433012f9 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -290,17 +290,23 @@ struct functor_team_reduce { functor_team_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} typedef typename ExecutionSpace::scratch_memory_space shmem_space; - typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; - unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_scalar_t; + unsigned team_shmem_size( int team_size ) const { return shared_scalar_t::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { Scalar value = Scalar(); + shared_scalar_t shared_value(team.team_scratch(0),1); Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) { val += i - team.league_rank() + team.league_size() + team.team_size(); }, value ); + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, shared_value(0) ); team.team_barrier(); @@ -314,11 +320,20 @@ struct functor_team_reduce { if ( test != value ) { if ( team.league_rank() == 0 ) { - printf( "FAILED team_parallel_reduce %i %i %f %f %lu\n", + printf( "FAILED team_parallel_reduce %i %i %lf %lf %lu\n", team.league_rank(), team.team_rank(), static_cast( test ), static_cast( value ), sizeof( Scalar ) ); } + flag() = 1; + } + if ( test != shared_value(0) ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED team_parallel_reduce with shared result %i %i %lf %lf %lu\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( shared_value(0) ), sizeof( Scalar ) ); + } + flag() = 1; } }); @@ -335,12 +350,13 @@ struct functor_team_reduce_reducer { functor_team_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} typedef typename ExecutionSpace::scratch_memory_space shmem_space; - typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; - unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(team_size*13); } + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_scalar_t; + unsigned team_shmem_size( int team_size ) const { return shared_scalar_t::shmem_size(team_size*13); } KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { Scalar value = 0; + shared_scalar_t shared_value(team.team_scratch(0),1); Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) { @@ -348,6 +364,13 @@ struct functor_team_reduce_reducer { }, Kokkos::Sum(value) ); + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + Kokkos::Sum(shared_value(0)) + ); team.team_barrier(); @@ -360,12 +383,19 @@ struct functor_team_reduce_reducer { } if ( test != value ) { - printf( "FAILED team_vector_parallel_reduce_reducer %i %i %f %f\n", + printf( "FAILED team_vector_parallel_reduce_reducer %i %i %lf %lf\n", team.league_rank(), team.team_rank(), static_cast( test ), static_cast( value ) ); flag() = 1; } + if ( test != shared_value(0) ) { + printf( "FAILED team_vector_parallel_reduce_reducer shared value %i %i %lf %lf\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( shared_value(0) ) ); + + flag() = 1; + } }); } }; @@ -823,7 +853,6 @@ namespace Test { // ( modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || (defined( KOKKOS_ENABLE_CUDA_LAMBDA ) && (8000 <= CUDA_VERSION)) - template< typename ScalarType, class DeviceType > class TestTripleNestedReduce { @@ -843,6 +872,14 @@ public: if( team_size > size_type(DeviceType::execution_space::concurrency())) team_size = size_type(DeviceType::execution_space::concurrency()); +#ifdef KOKKOS_ENABLE_HPX + team_size = 1; + if (!std::is_same::value) + { + team_size = 1; + } +#endif + //typedef Kokkos::LayoutLeft Layout; typedef Kokkos::LayoutRight Layout; @@ -962,6 +999,8 @@ TEST_F( TEST_CATEGORY, triple_nested_parallelism ) } #endif TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 16 ); + TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 33 ); + TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 19 ); #ifdef KOKKOS_ENABLE_ROCM // ROCm doesn't support team sizes not powers of two if (!std::is_same::value) #endif diff --git a/lib/kokkos/core/unit_test/TestTeamVectorRange.hpp b/lib/kokkos/core/unit_test/TestTeamVectorRange.hpp new file mode 100644 index 0000000000..86c8dab3ff --- /dev/null +++ b/lib/kokkos/core/unit_test/TestTeamVectorRange.hpp @@ -0,0 +1,464 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include +#include +#include +#include +#include + +namespace TestTeamVectorRange { + +struct my_complex { + double re, im; + int dummy; + + KOKKOS_INLINE_FUNCTION + my_complex() { + re = 0.0; + im = 0.0; + dummy = 0; + } + + KOKKOS_INLINE_FUNCTION + my_complex( const my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const volatile my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + volatile my_complex & operator=( const my_complex & src ) volatile { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + volatile my_complex & operator=( const volatile my_complex & src ) volatile { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + + KOKKOS_INLINE_FUNCTION + my_complex( const volatile my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex( const double & val ) { + re = val; + im = 0.0; + dummy = 0; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator+=( const my_complex & src ) { + re += src.re; + im += src.im; + dummy += src.dummy; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=( const volatile my_complex & src ) volatile { + re += src.re; + im += src.im; + dummy += src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex operator +( const my_complex & src ) { + my_complex tmp = *this; + tmp.re += src.re; + tmp.im += src.im; + tmp.dummy += src.dummy; + return tmp; + } + + KOKKOS_INLINE_FUNCTION + my_complex operator+( const volatile my_complex & src ) volatile { + my_complex tmp = *this; + tmp.re += src.re; + tmp.im += src.im; + tmp.dummy += src.dummy; + return tmp; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator*=( const my_complex & src ) { + double re_tmp = re * src.re - im * src.im; + double im_tmp = re * src.im + im * src.re; + re = re_tmp; + im = im_tmp; + dummy *= src.dummy; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator*=( const volatile my_complex & src ) volatile { + double re_tmp = re * src.re - im * src.im; + double im_tmp = re * src.im + im * src.re; + re = re_tmp; + im = im_tmp; + dummy *= src.dummy; + } + + KOKKOS_INLINE_FUNCTION + bool operator==( const my_complex & src ) { + return ( re == src.re ) && ( im == src.im ) && ( dummy == src.dummy ); + } + + KOKKOS_INLINE_FUNCTION + bool operator!=( const my_complex & src ) { + return ( re != src.re ) || ( im != src.im ) || ( dummy != src.dummy ); + } + + KOKKOS_INLINE_FUNCTION + bool operator!=( const double & val ) { + return ( re != val ) || ( im != 0 ) || ( dummy != 0 ); + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const int & val ) { + re = val; + im = 0.0; + dummy = 0; + return *this; + } + + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const double & val ) { + re = val; + im = 0.0; + dummy = 0; + return *this; + } + + KOKKOS_INLINE_FUNCTION + operator double() { + return re; + } +}; +} + +namespace Kokkos { +template<> +struct reduction_identity { + typedef reduction_identity t_red_ident; + KOKKOS_FORCEINLINE_FUNCTION static TestTeamVectorRange::my_complex sum() + {return TestTeamVectorRange::my_complex(t_red_ident::sum());} + KOKKOS_FORCEINLINE_FUNCTION static TestTeamVectorRange::my_complex prod() + {return TestTeamVectorRange::my_complex(t_red_ident::prod());} +}; +} + +namespace TestTeamVectorRange { + +template< typename Scalar, class ExecutionSpace > +struct functor_teamvector_for { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_teamvector_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; + unsigned team_shmem_size( int team_size ) const { return shared_int::shmem_size(131); } + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + typedef typename shmem_space::size_type size_type; + const size_type shmemSize = 131; + shared_int values = shared_int( team.team_shmem(), shmemSize ); + + if ( values.data() == nullptr || values.extent(0) < shmemSize ) { + printf( "FAILED to allocate shared memory of size %u\n", + static_cast( shmemSize ) ); + } + else { + // Initialize shared memory. + Kokkos::parallel_for( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i ) { + values( i ) = 0; + }); + // Wait for all memory to be written. + team.team_barrier(); + + // Accumulate value into per thread shared memory. + // This is non blocking. + Kokkos::parallel_for( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i ) + { + values( i ) += i - team.league_rank() + team.league_size() + team.team_size(); + }); + + // Wait for all memory to be written. + team.team_barrier(); + + // One thread per team executes the comparison. + Kokkos::single( Kokkos::PerTeam( team ), [&] () + { + Scalar test = 0; + Scalar value = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + for ( int i = 0; i < 131; ++i ) { + value += values( i ); + } + + if ( test != value ) { + printf ( "FAILED teamvector_parallel_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); + flag() = 1; + } + }); + } + } +}; + +template< typename Scalar, class ExecutionSpace > +struct functor_teamvector_reduce { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_teamvector_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_scalar_t; + unsigned team_shmem_size( int team_size ) const { return shared_scalar_t::shmem_size(team_size*13); } + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + Scalar value = Scalar(); + shared_scalar_t shared_value(team.team_scratch(0),1); + + Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, shared_value(0) ); + + team.team_barrier(); + Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, value ); + +// Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) +// { +// val += i - team.league_rank() + team.league_size() + team.team_size(); +// }, shared_value(0) ); + + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () + { + Scalar test = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + if ( test != value ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED teamvector_parallel_reduce %i %i %lf %lf %lu\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ), sizeof( Scalar ) ); + } + + flag() = 1; + } + if ( test != shared_value(0) ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED teamvector_parallel_reduce with shared result %i %i %lf %lf %lu\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( shared_value(0) ), sizeof( Scalar ) ); + } + + flag() = 1; + } + }); + } +}; + +template< typename Scalar, class ExecutionSpace > +struct functor_teamvector_reduce_reducer { + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; + typedef ExecutionSpace execution_space; + + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_teamvector_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_scalar_t; + unsigned team_shmem_size( int team_size ) const { return shared_scalar_t::shmem_size(team_size*13); } + + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + Scalar value = 0; + shared_scalar_t shared_value(team.team_scratch(0),1); + + Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + Kokkos::Sum(value) + ); + + Kokkos::parallel_reduce( Kokkos::TeamVectorRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + Kokkos::Sum(shared_value(0)) + ); + + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () + { + Scalar test = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + if ( test != value ) { + printf( "FAILED teamvector_parallel_reduce_reducer %i %i %lf %lf\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( value ) ); + + flag() = 1; + } + if ( test != shared_value(0) ) { + printf( "FAILED teamvector_parallel_reduce_reducer shared value %i %i %lf %lf\n", + team.league_rank(), team.team_rank(), + static_cast( test ), static_cast( shared_value(0) ) ); + + flag() = 1; + } + }); + } +}; + +template< typename Scalar, class ExecutionSpace > +bool test_scalar( int nteams, int team_size, int test ) { + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > d_flag( "flag" ); + typename Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace >::HostMirror h_flag( "h_flag" ); + h_flag() = 0; + Kokkos::deep_copy( d_flag, h_flag ); + + if ( test == 0 ) { + Kokkos::parallel_for( "Test::TeamVectorFor", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_teamvector_for< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 1 ) { + Kokkos::parallel_for( "Test::TeamVectorReduce", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_teamvector_reduce< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 2 ) { + Kokkos::parallel_for( "Test::TeamVectorReduceReducer", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_teamvector_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) ); + } + + Kokkos::deep_copy( h_flag, d_flag ); + + return ( h_flag() == 0 ); +} + +template< class ExecutionSpace > +bool Test( int test ) { + bool passed = true; + + int team_size = 33; + if( team_size > int(ExecutionSpace::concurrency())) + team_size = int(ExecutionSpace::concurrency()); + passed = passed && test_scalar< int, ExecutionSpace >( 317, team_size, test ); + passed = passed && test_scalar< long long int, ExecutionSpace >( 317, team_size, test ); + passed = passed && test_scalar< float, ExecutionSpace >( 317, team_size, test ); + passed = passed && test_scalar< double, ExecutionSpace >( 317, team_size, test ); + passed = passed && test_scalar< my_complex, ExecutionSpace >( 317, team_size, test ); + + return passed; +} + +} // namespace TestTeamVectorRange + +namespace Test { + +TEST_F( TEST_CATEGORY, team_teamvector_range ) +{ + ASSERT_TRUE( ( TestTeamVectorRange::Test< TEST_EXECSPACE >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVectorRange::Test< TEST_EXECSPACE >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVectorRange::Test< TEST_EXECSPACE >( 2 ) ) ); +} +} diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp index 704c7f9940..a58755dc9b 100644 --- a/lib/kokkos/core/unit_test/TestTile.hpp +++ b/lib/kokkos/core/unit_test/TestTile.hpp @@ -42,6 +42,9 @@ #ifndef TEST_TILE_HPP #define TEST_TILE_HPP +//======================================================================== +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + #include #include @@ -166,4 +169,8 @@ TEST_F( TEST_CATEGORY, tile_layout ) } } + +#endif // KOKKOS_ENABLE_DEPRECATED_CODE +//===================================================================== + #endif //TEST_TILE_HPP diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp index 2ebd48cd61..e332bebff0 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp @@ -827,6 +827,48 @@ struct TestViewMirror ASSERT_EQ( a_org(5), a_h3(5) ); } + template< class MemoryTraits, class Space > + struct CopyUnInit { + typedef typename Kokkos::Impl::MirrorViewType::view_type mirror_view_type; + + mirror_view_type a_d; + + KOKKOS_INLINE_FUNCTION + CopyUnInit( mirror_view_type & a_d_ ) : a_d(a_d_) { + } + + KOKKOS_INLINE_FUNCTION + void operator() (const typename Space::size_type i) const { + a_d(i) = (double)(10-i); + } + + }; + + template< class MemoryTraits > + void static test_mirror_no_initialize() { + Kokkos::View< double*, Layout, Kokkos::HostSpace > a_org( "A", 10 ); + Kokkos::View< double*, Layout, Kokkos::HostSpace, MemoryTraits > a_h = a_org; + + for (int i = 0; i < 10; i++) + { + a_h(i) = (double)i; + } + auto a_d = Kokkos::create_mirror_view( DeviceType(), a_h, Kokkos::WithoutInitializing ); + + int equal_ptr_h_d = (a_h.data() == a_d.data()) ? 1 : 0; + constexpr int is_same_memspace = std::is_same< Kokkos::HostSpace, typename DeviceType::memory_space >::value ? 1 : 0; + + ASSERT_EQ( equal_ptr_h_d, is_same_memspace); + + Kokkos::parallel_for( Kokkos::RangePolicy< typename DeviceType::execution_space >( 0, int(10)), CopyUnInit< MemoryTraits, DeviceType >(a_d)); + + Kokkos::deep_copy( a_h, a_d ); + + for (int i = 0; i < 10; i++) + { + ASSERT_EQ(a_h(i), (double)(10-i)); + } + } void static testit() { test_mirror< Kokkos::MemoryTraits<0> >(); @@ -835,6 +877,8 @@ struct TestViewMirror test_mirror_view< Kokkos::MemoryTraits >(); test_mirror_copy< Kokkos::MemoryTraits<0> >(); test_mirror_copy< Kokkos::MemoryTraits >(); + test_mirror_no_initialize< Kokkos::MemoryTraits<0> >(); + test_mirror_no_initialize< Kokkos::MemoryTraits >(); } }; @@ -865,7 +909,7 @@ public: } static void run_test_view_operator_a() { - {TestViewOperator< T, device > f; Kokkos::parallel_for(int(N0),f);} + {TestViewOperator< T, device > f; Kokkos::parallel_for(int(N0),f); Kokkos::fence();} #ifndef KOKKOS_ENABLE_OPENMPTARGET TestViewOperator_LeftAndRight< int[2][3][4][2][3][4], device >f6; f6.testit(); TestViewOperator_LeftAndRight< int[2][3][4][2][3], device >f5; f5.testit(); diff --git a/lib/kokkos/core/unit_test/TestViewAPI_e.hpp b/lib/kokkos/core/unit_test/TestViewAPI_e.hpp index efb34a64cc..76815dc112 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI_e.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI_e.hpp @@ -194,6 +194,7 @@ inline void test_anonymous_space() { d_anon_dyn_view(j) += 42; } }); + Kokkos::fence(); #endif } @@ -201,4 +202,45 @@ TEST_F( TEST_CATEGORY, anonymous_space ) { test_anonymous_space(); } + +template +struct TestViewOverloadResolution { + // Overload based on value_type and rank + static int foo(Kokkos::View a) { + return 1; + } + static int foo(Kokkos::View a) { + return 2; + } + static int foo(Kokkos::View a) { + return 3; + } + + // Overload based on compile time dimensions + static int bar(Kokkos::View a) { + return 4; + } + static int bar(Kokkos::View a) { + return 5; + } + + static void test_function_overload() { + Kokkos::View a("A",10,3); + int data_type_1 = foo(a); + int data_type_3 = foo(Kokkos::View(a)); + Kokkos::View b("B",10,3,4); + int data_type_2 = foo(b); + Kokkos::View c(a); + int static_extent = bar(c); + ASSERT_EQ(1,data_type_1); + ASSERT_EQ(3,data_type_2); + ASSERT_EQ(1,data_type_3); + ASSERT_EQ(4,static_extent); + } +}; + +TEST_F( TEST_CATEGORY, view_overload_resolution ) +{ + TestViewOverloadResolution::test_function_overload(); +} } diff --git a/lib/kokkos/core/unit_test/TestViewMapping_a.hpp b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp index 03d5e501b9..69247902cd 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping_a.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp @@ -1012,12 +1012,14 @@ void test_view_mapping() ASSERT_EQ( a.use_count(), 1 ); ASSERT_EQ( b.use_count(), 0 ); -#if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) && !defined( KOKKOS_ENABLE_ROCM ) +// TODO: a.use_count() and x.use_count() are 0 with the asynchronous HPX backend. Why? +#if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) && !defined( KOKKOS_ENABLE_ROCM ) && \ + !(defined( KOKKOS_ENABLE_HPX ) && defined( KOKKOS_ENABLE_HPX_ASYNC_DISPATCH )) // Cannot launch host lambda when CUDA lambda is enabled. typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space host_exec_space; - Kokkos::parallel_for( Kokkos::RangePolicy< host_exec_space >( 0, 10 ), KOKKOS_LAMBDA ( int i ) { + Kokkos::parallel_for( Kokkos::RangePolicy< host_exec_space >( 0, 10 ), KOKKOS_LAMBDA ( int ) { // 'a' is captured by copy, and the capture mechanism converts 'a' to an // unmanaged copy. When the parallel dispatch accepts a move for the // lambda, this count should become 1. diff --git a/lib/kokkos/core/unit_test/TestViewMapping_b.hpp b/lib/kokkos/core/unit_test/TestViewMapping_b.hpp index 7c7807f60d..36fc0461a4 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping_b.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_b.hpp @@ -173,12 +173,12 @@ void test_view_mapping_class_value() { typedef typename Space::execution_space ExecSpace; - ExecSpace::fence(); + ExecSpace().fence(); { Kokkos::View< MappingClassValueType, ExecSpace > a( "a" ); - ExecSpace::fence(); + ExecSpace().fence(); } - ExecSpace::fence(); + ExecSpace().fence(); } TEST_F( TEST_CATEGORY , view_mapping_class_value ) diff --git a/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp b/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp index 0c2d22e013..62bd582871 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp @@ -201,6 +201,7 @@ struct TestViewMappingSubview long error_count = -1; Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, 1 ), *this, error_count ); + ASSERT_EQ( error_count, 0 ); } }; diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp index 207fbb148d..bffc77181f 100644 --- a/lib/kokkos/core/unit_test/TestViewSubview.hpp +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -48,6 +48,86 @@ #include #include #include +#include + +// TODO @refactoring move this to somewhere common + +//------------------------------------------------------------------------------ + +template +struct _kokkos____________________static_test_failure_____; + +template +struct static_predicate_message {}; + +//------------------------------------------------------------------------------ + +template class, class...> +struct static_assert_predicate_true_impl; + +template