Kokkos lib update

This commit is contained in:
Steve Plimpton
2016-09-08 13:56:18 -06:00
parent 0252347d43
commit 236ebf7fab
212 changed files with 18902 additions and 13466 deletions

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -142,7 +142,7 @@ int main (int narg, char* arg[]) {
// Measure time to execute the contraction kernel when giving it a
// LayoutLeft view for v1 and a LayoutRight view for v2. This should be
// fast on GPUs and slow on CPUs
Kokkos::Impl::Timer time1;
Kokkos::Timer time1;
Kokkos::parallel_for(size,contraction<left_type,right_type>(a,l,r));
Kokkos::fence();
double sec1 = time1.seconds();
@ -154,7 +154,7 @@ int main (int narg, char* arg[]) {
// Measure time to execute the contraction kernel when giving it a
// LayoutRight view for v1 and a LayoutLeft view for v2. This should be
// fast on CPUs and slow on GPUs
Kokkos::Impl::Timer time2;
Kokkos::Timer time2;
Kokkos::parallel_for(size,contraction<right_type,left_type>(a,r,l));
Kokkos::fence();
double sec2 = time2.seconds();

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -124,12 +124,12 @@ int main(int narg, char* arg[]) {
// Run the localsum functor using the RandomAccess trait. On CPUs there should
// not be any different in performance to not using the RandomAccess trait.
// On GPUs where can be a dramatic difference
Kokkos::Impl::Timer time1;
Kokkos::Timer time1;
Kokkos::parallel_for(size,localsum<view_type,view_type_rnd>(idx,dest,src));
Kokkos::fence();
double sec1 = time1.seconds();
Kokkos::Impl::Timer time2;
Kokkos::Timer time2;
Kokkos::parallel_for(size,localsum<view_type,view_type>(idx,dest,src));
Kokkos::fence();
double sec2 = time2.seconds();

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -87,9 +87,9 @@ struct localsum {
// For example, the const_data_type version of double** is const
// double**.
Kokkos::View<idx_type::const_data_type, idx_type::array_layout, memory_space> idx;
// "array_intrinsic_type" is a typedef in ViewTraits (and DualView) which is the
// "scalar_array_type" is a typedef in ViewTraits (and DualView) which is the
// array version of the value(s) stored in the View.
Kokkos::View<view_type::array_intrinsic_type, view_type::array_layout, memory_space> dest;
Kokkos::View<view_type::scalar_array_type, view_type::array_layout, memory_space> dest;
Kokkos::View<view_type::const_data_type, view_type::array_layout,
memory_space, Kokkos::MemoryRandomAccess> src;
@ -150,6 +150,9 @@ protected:
int main (int narg, char* arg[]) {
Kokkos::initialize (narg, arg);
// If View is non-trivial constructible type then add braces so it is out of scope
// before Kokkos::finalize() call
{
ParticleTypes test("Test");
Kokkos::fence();
test.h_view(0) = ParticleType(-1e4,1);
@ -182,7 +185,7 @@ int main (int narg, char* arg[]) {
// Run on the device. This will cause a sync of idx to the device,
// since it was marked as modified on the host.
Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();
@ -208,6 +211,7 @@ int main (int narg, char* arg[]) {
printf("Device Time with Sync: %f without Sync: %f \n",sec1_dev,sec2_dev);
printf("Host Time with Sync: %f without Sync: %f \n",sec1_host,sec2_host);
}
Kokkos::finalize();
}

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -97,7 +97,7 @@ int main(int narg, char* arg[]) {
Kokkos::fence();
// Run on the device
// This will cause a sync of idx to the device since it was modified on the host
Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =

View File

@ -5,7 +5,7 @@ default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXX = ../../../../config/nvcc_wrapper
CXXFLAGS = -O3 --default-stream per-thread
LINK = ${CXX}
LINKFLAGS =

View File

@ -116,7 +116,7 @@ int main(int argc, char * argv[]) {
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(0.0,d_a));
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(1.3513,d_b));
Kokkos::fence();
Kokkos::Impl::Timer timer;
Kokkos::Timer timer;
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),ComputeADevice(20,d_a,d_b));
if(synch==1)