Kokkos lib update

This commit is contained in:
Steve Plimpton
2016-09-08 13:56:18 -06:00
parent 0252347d43
commit 236ebf7fab
212 changed files with 18902 additions and 13466 deletions

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -5,13 +5,14 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
KOKKOS_CUDA_OPTIONS = "enable_lambda"
else
CXX = g++
CXXFLAGS = -O3

View File

@ -62,7 +62,8 @@ int main (int narg, char* args[]) {
// Set up a policy that launches 12 teams, with the maximum number
// of threads per team.
const team_policy policy (12, team_policy::team_size_max ( [=]{} ));
const team_policy policy (12, Kokkos::AUTO);
// This is a reduction with a team policy. The team policy changes
// the first argument of the lambda. Rather than an integer index

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -141,11 +141,11 @@ int main(int narg, char* args[]) {
// Each team handles a slice of the data
// Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes.
// The team_size_max function will determine the maximum number of threads taking into account
// shared memory requirements of the Functor.
// Kokkos::AUTO will determine the number of threads
// The maximum vector length is hardware dependent but can always be smaller than the hardware allows.
// The vector length must be a power of 2.
const Kokkos::TeamPolicy<> policy( 512 , Kokkos::TeamPolicy<>::team_size_max(SomeCorrelation(data,gsum)) , 16);
const Kokkos::TeamPolicy<> policy( 512 , Kokkos::AUTO , 16);
Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) );

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -117,7 +117,7 @@ int main(int narg, char* args[]) {
Kokkos::DualView<int**> histogram("histogram",TEAM_SIZE,TEAM_SIZE);
Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
// threads/team is automatically limited to maximum supported by the device.
Kokkos::parallel_for( team_policy( nchunks , TEAM_SIZE )
, find_2_tuples(chunk_size,data,histogram) );