Kokkos lib update
This commit is contained in:
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -142,7 +142,7 @@ int main (int narg, char* arg[]) {
|
||||
// Measure time to execute the contraction kernel when giving it a
|
||||
// LayoutLeft view for v1 and a LayoutRight view for v2. This should be
|
||||
// fast on GPUs and slow on CPUs
|
||||
Kokkos::Impl::Timer time1;
|
||||
Kokkos::Timer time1;
|
||||
Kokkos::parallel_for(size,contraction<left_type,right_type>(a,l,r));
|
||||
Kokkos::fence();
|
||||
double sec1 = time1.seconds();
|
||||
@ -154,7 +154,7 @@ int main (int narg, char* arg[]) {
|
||||
// Measure time to execute the contraction kernel when giving it a
|
||||
// LayoutRight view for v1 and a LayoutLeft view for v2. This should be
|
||||
// fast on CPUs and slow on GPUs
|
||||
Kokkos::Impl::Timer time2;
|
||||
Kokkos::Timer time2;
|
||||
Kokkos::parallel_for(size,contraction<right_type,left_type>(a,r,l));
|
||||
Kokkos::fence();
|
||||
double sec2 = time2.seconds();
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -124,12 +124,12 @@ int main(int narg, char* arg[]) {
|
||||
// Run the localsum functor using the RandomAccess trait. On CPUs there should
|
||||
// not be any different in performance to not using the RandomAccess trait.
|
||||
// On GPUs where can be a dramatic difference
|
||||
Kokkos::Impl::Timer time1;
|
||||
Kokkos::Timer time1;
|
||||
Kokkos::parallel_for(size,localsum<view_type,view_type_rnd>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec1 = time1.seconds();
|
||||
|
||||
Kokkos::Impl::Timer time2;
|
||||
Kokkos::Timer time2;
|
||||
Kokkos::parallel_for(size,localsum<view_type,view_type>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec2 = time2.seconds();
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -87,9 +87,9 @@ struct localsum {
|
||||
// For example, the const_data_type version of double** is const
|
||||
// double**.
|
||||
Kokkos::View<idx_type::const_data_type, idx_type::array_layout, memory_space> idx;
|
||||
// "array_intrinsic_type" is a typedef in ViewTraits (and DualView) which is the
|
||||
// "scalar_array_type" is a typedef in ViewTraits (and DualView) which is the
|
||||
// array version of the value(s) stored in the View.
|
||||
Kokkos::View<view_type::array_intrinsic_type, view_type::array_layout, memory_space> dest;
|
||||
Kokkos::View<view_type::scalar_array_type, view_type::array_layout, memory_space> dest;
|
||||
Kokkos::View<view_type::const_data_type, view_type::array_layout,
|
||||
memory_space, Kokkos::MemoryRandomAccess> src;
|
||||
|
||||
@ -150,6 +150,9 @@ protected:
|
||||
int main (int narg, char* arg[]) {
|
||||
Kokkos::initialize (narg, arg);
|
||||
|
||||
// If View is non-trivial constructible type then add braces so it is out of scope
|
||||
// before Kokkos::finalize() call
|
||||
{
|
||||
ParticleTypes test("Test");
|
||||
Kokkos::fence();
|
||||
test.h_view(0) = ParticleType(-1e4,1);
|
||||
@ -182,7 +185,7 @@ int main (int narg, char* arg[]) {
|
||||
|
||||
// Run on the device. This will cause a sync of idx to the device,
|
||||
// since it was marked as modified on the host.
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec1_dev = timer.seconds();
|
||||
@ -208,6 +211,7 @@ int main (int narg, char* arg[]) {
|
||||
|
||||
printf("Device Time with Sync: %f without Sync: %f \n",sec1_dev,sec2_dev);
|
||||
printf("Host Time with Sync: %f without Sync: %f \n",sec1_host,sec2_host);
|
||||
}
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -97,7 +97,7 @@ int main(int narg, char* arg[]) {
|
||||
Kokkos::fence();
|
||||
// Run on the device
|
||||
// This will cause a sync of idx to the device since it was modified on the host
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec1_dev = timer.seconds();
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3 --default-stream per-thread
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -116,7 +116,7 @@ int main(int argc, char * argv[]) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(0.0,d_a));
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(1.3513,d_b));
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),ComputeADevice(20,d_a,d_b));
|
||||
|
||||
if(synch==1)
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -122,7 +122,7 @@ int main(int argc, char* args[]) {
|
||||
Kokkos::DualView<uint64_t*> vals("Vals",size*samples);
|
||||
|
||||
// Run some performance comparisons
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::Timer timer;
|
||||
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples));
|
||||
Kokkos::fence();
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,13 +5,14 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
|
||||
@ -62,7 +62,8 @@ int main (int narg, char* args[]) {
|
||||
|
||||
// Set up a policy that launches 12 teams, with the maximum number
|
||||
// of threads per team.
|
||||
const team_policy policy (12, team_policy::team_size_max ( [=]{} ));
|
||||
|
||||
const team_policy policy (12, Kokkos::AUTO);
|
||||
|
||||
// This is a reduction with a team policy. The team policy changes
|
||||
// the first argument of the lambda. Rather than an integer index
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -141,11 +141,11 @@ int main(int narg, char* args[]) {
|
||||
|
||||
// Each team handles a slice of the data
|
||||
// Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes.
|
||||
// The team_size_max function will determine the maximum number of threads taking into account
|
||||
// shared memory requirements of the Functor.
|
||||
// Kokkos::AUTO will determine the number of threads
|
||||
// The maximum vector length is hardware dependent but can always be smaller than the hardware allows.
|
||||
// The vector length must be a power of 2.
|
||||
const Kokkos::TeamPolicy<> policy( 512 , Kokkos::TeamPolicy<>::team_size_max(SomeCorrelation(data,gsum)) , 16);
|
||||
|
||||
const Kokkos::TeamPolicy<> policy( 512 , Kokkos::AUTO , 16);
|
||||
|
||||
Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) );
|
||||
|
||||
|
||||
@ -5,7 +5,7 @@ default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXX = ../../../../config/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
|
||||
@ -117,7 +117,7 @@ int main(int narg, char* args[]) {
|
||||
Kokkos::DualView<int**> histogram("histogram",TEAM_SIZE,TEAM_SIZE);
|
||||
|
||||
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::Timer timer;
|
||||
// threads/team is automatically limited to maximum supported by the device.
|
||||
Kokkos::parallel_for( team_policy( nchunks , TEAM_SIZE )
|
||||
, find_2_tuples(chunk_size,data,histogram) );
|
||||
|
||||
Reference in New Issue
Block a user