git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14370 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-12-15 22:26:37 +00:00
parent 31f22919ab
commit 06a217aa08
327 changed files with 0 additions and 95949 deletions

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,130 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
#include <typeinfo>
//
// "Hello world" parallel_for example:
// 1. Start up Kokkos
// 2. Execute a parallel for loop in the default execution space,
// using a functor to define the loop body
// 3. Shut down Kokkos
//
// If Kokkos was built with C++11 enabled, try comparing this example
// to 01_hello_world_lambda. The latter uses C++11 lambdas (anonymous
// functions) to define the loop body of the parallel_for. That makes
// the code much more concise and readable. On the other hand,
// breaking out the loop body into an explicit functor makes it easier
// to test the loop independently of the parallel pattern.
//
// Functor that defines the parallel_for's loop body.
//
// A "functor" is just a class or struct with a public operator()
// instance method.
struct hello_world {
// If a functor has an "execution_space" (or "execution_space", for
// backwards compatibility) public typedef, parallel_* will only run
// the functor in that execution space. That's a good way to mark a
// functor as specific to an execution space. If the functor lacks
// this typedef, parallel_for will run it in the default execution
// space, unless you tell it otherwise (that's an advanced topic;
// see "execution policies").
// The functor's operator() defines the loop body. It takes an
// integer argument which is the parallel for loop index. Other
// arguments are possible; see the "hierarchical parallelism" part
// of the tutorial.
//
// The operator() method must be const, and must be marked with the
// KOKKOS_INLINE_FUNCTION macro. If building with CUDA, this macro
// will mark your method as suitable for running on the CUDA device
// (as well as on the host). If not building with CUDA, the macro
// is unnecessary but harmless.
KOKKOS_INLINE_FUNCTION
void operator() (const int i) const {
printf ("Hello from i = %i\n", i);
}
};
int main (int argc, char* argv[]) {
// You must call initialize() before you may call Kokkos.
//
// With no arguments, this initializes the default execution space
// (and potentially its host execution space) with default
// parameters. You may also pass in argc and argv, analogously to
// MPI_Init(). It reads and removes command-line arguments that
// start with "--kokkos-".
Kokkos::initialize (argc, argv);
// Print the name of Kokkos' default execution space. We're using
// typeid here, so the name might get a bit mangled by the linker,
// but you should still be able to figure out what it is.
printf ("Hello World on Kokkos execution space %s\n",
typeid (Kokkos::DefaultExecutionSpace).name ());
// Run the above functor on the default Kokkos execution space in
// parallel, with a parallel for loop count of 15.
//
// The Kokkos::DefaultExecutionSpace typedef gives the default
// execution space. Depending on how Kokkos was configured, this
// could be OpenMP, Threads, Cuda, Serial, or even some other
// execution space.
//
// The following line of code would look like this in OpenMP:
//
// #pragma omp parallel for
// for (int i = 0; i < 15; ++i) {
// printf ("Hello from i = %i\n", i);
// }
//
// You may notice that the printed numbers do not print out in
// order. Parallel for loops may execute in any order.
Kokkos::parallel_for ("HelloWorld",15, hello_world ());
// You must call finalize() after you are done using Kokkos.
Kokkos::finalize ();
}

View File

@ -1,44 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
KOKKOS_CUDA_OPTIONS = "enable_lambda"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,109 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
#include <typeinfo>
//
// "Hello world" parallel_for example:
// 1. Start up Kokkos
// 2. Execute a parallel for loop in the default execution space,
// using a C++11 lambda to define the loop body
// 3. Shut down Kokkos
//
// This example only builds if C++11 is enabled. Compare this example
// to 01_hello_world, which uses functors (explicitly defined classes)
// to define the loop body of the parallel_for. Both functors and
// lambdas have their places.
//
int main (int argc, char* argv[]) {
// You must call initialize() before you may call Kokkos.
//
// With no arguments, this initializes the default execution space
// (and potentially its host execution space) with default
// parameters. You may also pass in argc and argv, analogously to
// MPI_Init(). It reads and removes command-line arguments that
// start with "--kokkos-".
Kokkos::initialize (argc, argv);
// Print the name of Kokkos' default execution space. We're using
// typeid here, so the name might get a bit mangled by the linker,
// but you should still be able to figure out what it is.
printf ("Hello World on Kokkos execution space %s\n",
typeid (Kokkos::DefaultExecutionSpace).name ());
// Run lambda on the default Kokkos execution space in parallel,
// with a parallel for loop count of 15. The lambda's argument is
// an integer which is the parallel for's loop index. As you learn
// about different kinds of parallelism, you will find out that
// there are other valid argument types as well.
//
// For a single level of parallelism, we prefer that you use the
// KOKKOS_LAMBDA macro. If CUDA is disabled, this just turns into
// [=]. That captures variables from the surrounding scope by
// value. Do NOT capture them by reference! If CUDA is enabled,
// this macro may have a special definition that makes the lambda
// work correctly with CUDA. Compare to the KOKKOS_INLINE_FUNCTION
// macro, which has a special meaning if CUDA is enabled.
//
// The following parallel_for would look like this if we were using
// OpenMP by itself, instead of Kokkos:
//
// #pragma omp parallel for
// for (int i = 0; i < 15; ++i) {
// printf ("Hello from i = %i\n", i);
// }
//
// You may notice that the printed numbers do not print out in
// order. Parallel for loops may execute in any order.
Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) {
// printf works in a CUDA parallel kernel; std::ostream does not.
printf ("Hello from i = %i\n", i);
});
// You must call finalize() after you are done using Kokkos.
Kokkos::finalize ();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,101 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
//
// First reduction (parallel_reduce) example:
// 1. Start up Kokkos
// 2. Execute a parallel_reduce loop in the default execution space,
// using a functor to define the loop body
// 3. Shut down Kokkos
//
// Compare this example to 02_simple_reduce_lambda, which uses a C++11
// lambda to define the loop body of the parallel_reduce.
//
// Reduction functor for computing the sum of squares.
//
// More advanced reduction examples will show how to control the
// reduction's "join" operator. If the join operator is not provided,
// it defaults to binary operator+ (adding numbers together).
struct squaresum {
// Specify the type of the reduction value with a "value_type"
// typedef. In this case, the reduction value has type int.
typedef int value_type;
// The reduction functor's operator() looks a little different than
// the parallel_for functor's operator(). For the reduction, we
// pass in both the loop index i, and the intermediate reduction
// value lsum. The latter MUST be passed in by nonconst reference.
// (If the reduction type is an array like int[], indicating an
// array reduction result, then the second argument is just int[].)
KOKKOS_INLINE_FUNCTION
void operator () (const int i, int& lsum) const {
lsum += i*i; // compute the sum of squares
}
};
int main (int argc, char* argv[]) {
Kokkos::initialize (argc, argv);
const int n = 10;
// Compute the sum of squares of integers from 0 to n-1, in
// parallel, using Kokkos.
int sum = 0;
Kokkos::parallel_reduce (n, squaresum (), sum);
printf ("Sum of squares of integers from 0 to %i, "
"computed in parallel, is %i\n", n - 1, sum);
// Compare to a sequential loop.
int seqSum = 0;
for (int i = 0; i < n; ++i) {
seqSum += i*i;
}
printf ("Sum of squares of integers from 0 to %i, "
"computed sequentially, is %i\n", n - 1, seqSum);
Kokkos::finalize ();
return (sum == seqSum) ? 0 : -1;
}

View File

@ -1,44 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
KOKKOS_CUDA_OPTIONS = "enable_lambda"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,86 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
//
// First reduction (parallel_reduce) example:
// 1. Start up Kokkos
// 2. Execute a parallel_reduce loop in the default execution space,
// using a C++11 lambda to define the loop body
// 3. Shut down Kokkos
//
// This example only builds if C++11 is enabled. Compare this example
// to 02_simple_reduce, which uses a functor to define the loop body
// of the parallel_reduce.
//
int main (int argc, char* argv[]) {
Kokkos::initialize (argc, argv);
const int n = 10;
// Compute the sum of squares of integers from 0 to n-1, in
// parallel, using Kokkos. This time, use a lambda instead of a
// functor. The lambda takes the same arguments as the functor's
// operator().
int sum = 0;
// The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=].
// It also handles any other syntax needed for CUDA.
Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) {
lsum += i*i;
}, sum);
printf ("Sum of squares of integers from 0 to %i, "
"computed in parallel, is %i\n", n - 1, sum);
// Compare to a sequential loop.
int seqSum = 0;
for (int i = 0; i < n; ++i) {
seqSum += i*i;
}
printf ("Sum of squares of integers from 0 to %i, "
"computed sequentially, is %i\n", n - 1, seqSum);
Kokkos::finalize ();
return (sum == seqSum) ? 0 : -1;
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,142 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
//
// First Kokkos::View (multidimensional array) example:
// 1. Start up Kokkos
// 2. Allocate a Kokkos::View
// 3. Execute a parallel_for and a parallel_reduce over that View's data
// 4. Shut down Kokkos
//
// Compare this example to 03_simple_view_lambda, which uses C++11
// lambdas to define the loop bodies of the parallel_for and
// parallel_reduce.
//
#include <Kokkos_Core.hpp>
#include <cstdio>
// A Kokkos::View is an array of zero or more dimensions. The number
// of dimensions is specified at compile time, as part of the type of
// the View. This array has two dimensions. The first one
// (represented by the asterisk) is a run-time dimension, and the
// second (represented by [3]) is a compile-time dimension. Thus,
// this View type is an N x 3 array of type double, where N is
// specified at run time in the View's constructor.
//
// The first dimension of the View is the dimension over which it is
// efficient for Kokkos to parallelize.
typedef Kokkos::View<double*[3]> view_type;
// parallel_for functor that fills the View given to its constructor.
// The View must already have been allocated.
struct InitView {
view_type a;
// Views have "view semantics." This means that they behave like
// pointers, not like std::vector. Their copy constructor and
// operator= only do shallow copies. Thus, you can pass View
// objects around by "value"; they won't do a deep copy unless you
// explicitly ask for a deep copy.
InitView (view_type a_) :
a (a_)
{}
// Fill the View with some data. The parallel_for loop will iterate
// over the View's first dimension N.
KOKKOS_INLINE_FUNCTION
void operator () (const int i) const {
// Acesss the View just like a Fortran array. The layout depends
// on the View's memory space, so don't rely on the View's
// physical memory layout unless you know what you're doing.
a(i,0) = 1.0*i;
a(i,1) = 1.0*i*i;
a(i,2) = 1.0*i*i*i;
}
};
// Reduction functor that reads the View given to its constructor.
struct ReduceFunctor {
view_type a;
// Constructor takes View by "value"; this does a shallow copy.
ReduceFunctor (view_type a_) : a (a_) {}
// If you write a functor to do a reduction, you must specify the
// type of the reduction result via a public 'value_type' typedef.
typedef double value_type;
KOKKOS_INLINE_FUNCTION
void operator() (int i, double &lsum) const {
lsum += a(i,0)*a(i,1)/(a(i,2)+0.1);
}
};
int main (int argc, char* argv[]) {
Kokkos::initialize (argc, argv);
const int N = 10;
// Allocate the View. The first dimension is a run-time parameter
// N. We set N = 10 here. The second dimension is a compile-time
// parameter, 3. We don't specify it here because we already set it
// by declaring the type of the View.
//
// Views get initialized to zero by default. This happens in
// parallel, using the View's memory space's default execution
// space. Parallel initialization ensures first-touch allocation.
// There is a way to shut off default initialization.
//
// You may NOT allocate a View inside of a parallel_{for, reduce,
// scan}. Treat View allocation as a "thread collective."
//
// The string "A" is just the label; it only matters for debugging.
// Different Views may have the same label.
view_type a ("A", N);
Kokkos::parallel_for (N, InitView (a));
double sum = 0;
Kokkos::parallel_reduce (N, ReduceFunctor (a), sum);
printf ("Result: %f\n", sum);
Kokkos::finalize ();
}

View File

@ -1,44 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
KOKKOS_CUDA_OPTIONS = "enable_lambda"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,116 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
//
// First Kokkos::View (multidimensional array) example:
// 1. Start up Kokkos
// 2. Allocate a Kokkos::View
// 3. Execute a parallel_for and a parallel_reduce over that View's data
// 4. Shut down Kokkos
//
// Compare this example to 03_simple_view, which uses functors to
// define the loop bodies of the parallel_for and parallel_reduce.
//
#include <Kokkos_Core.hpp>
#include <cstdio>
// A Kokkos::View is an array of zero or more dimensions. The number
// of dimensions is specified at compile time, as part of the type of
// the View. This array has two dimensions. The first one
// (represented by the asterisk) is a run-time dimension, and the
// second (represented by [3]) is a compile-time dimension. Thus,
// this View type is an N x 3 array of type double, where N is
// specified at run time in the View's constructor.
//
// The first dimension of the View is the dimension over which it is
// efficient for Kokkos to parallelize.
typedef Kokkos::View<double*[3]> view_type;
int main (int argc, char* argv[]) {
Kokkos::initialize (argc, argv);
// Allocate the View. The first dimension is a run-time parameter
// N. We set N = 10 here. The second dimension is a compile-time
// parameter, 3. We don't specify it here because we already set it
// by declaring the type of the View.
//
// Views get initialized to zero by default. This happens in
// parallel, using the View's memory space's default execution
// space. Parallel initialization ensures first-touch allocation.
// There is a way to shut off default initialization.
//
// You may NOT allocate a View inside of a parallel_{for, reduce,
// scan}. Treat View allocation as a "thread collective."
//
// The string "A" is just the label; it only matters for debugging.
// Different Views may have the same label.
view_type a ("A", 10);
// Fill the View with some data. The parallel_for loop will iterate
// over the View's first dimension N.
//
// Note that the View is passed by value into the lambda. The macro
// KOKKOS_LAMBDA includes the "capture by value" clause [=]. This
// tells the lambda to "capture all variables in the enclosing scope
// by value." Views have "view semantics"; they behave like
// pointers, not like std::vector. Passing them by value does a
// shallow copy. A deep copy never happens unless you explicitly
// ask for one.
Kokkos::parallel_for (10, KOKKOS_LAMBDA (const int i) {
// Acesss the View just like a Fortran array. The layout depends
// on the View's memory space, so don't rely on the View's
// physical memory layout unless you know what you're doing.
a(i,0) = 1.0*i;
a(i,1) = 1.0*i*i;
a(i,2) = 1.0*i*i*i;
});
// Reduction functor that reads the View given to its constructor.
double sum = 0;
Kokkos::parallel_reduce (10, KOKKOS_LAMBDA (const int i, double& lsum) {
lsum += a(i,0)*a(i,1)/(a(i,2)+0.1);
}, sum);
printf ("Result: %f\n", sum);
Kokkos::finalize ();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,101 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
// The type of a two-dimensional N x 3 array of double.
// It lives in Kokkos' default memory space.
typedef Kokkos::View<double*[3]> view_type;
// The "HostMirror" type corresponding to view_type above is also a
// two-dimensional N x 3 array of double. However, it lives in the
// host memory space corresponding to view_type's memory space. For
// example, if view_type lives in CUDA device memory, host_view_type
// lives in host (CPU) memory. Furthermore, declaring host_view_type
// as the host mirror of view_type means that host_view_type has the
// same layout as view_type. This makes it easier to copy between the
// two Views.
// Advanced issues: If a memory space is accessible from the host without
// performance penalties then it is its own host_mirror_space. This is
// the case for HostSpace, CudaUVMSpace and CudaHostPinnedSpace.
typedef view_type::HostMirror host_view_type;
struct ReduceFunctor {
view_type a;
ReduceFunctor (view_type a_) : a (a_) {}
typedef int value_type; //Specify type for reduction value, lsum
KOKKOS_INLINE_FUNCTION
void operator() (int i, int &lsum) const {
lsum += a(i,0)-a(i,1)+a(i,2);
}
};
int main() {
Kokkos::initialize();
view_type a ("A", 10);
// If view_type and host_mirror_type live in the same memory space,
// a "mirror view" is just an alias, and deep_copy does nothing.
// Otherwise, a mirror view of a device View lives in host memory,
// and deep_copy does a deep copy.
host_view_type h_a = Kokkos::create_mirror_view (a);
// The View h_a lives in host (CPU) memory, so it's legal to fill
// the view sequentially using ordinary code, like this.
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 3; j++) {
h_a(i,j) = i*10 + j;
}
}
Kokkos::deep_copy (a, h_a); // Copy from host to device.
int sum = 0;
Kokkos::parallel_reduce (10, ReduceFunctor (a), sum);
printf ("Result is %i\n",sum);
Kokkos::finalize ();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,137 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
#include <cstdlib>
#include <cmath>
// Type of a one-dimensional length-N array of int.
typedef Kokkos::View<int*> view_type;
typedef view_type::HostMirror host_view_type;
// This is a "zero-dimensional" View, that is, a View of a single
// value (an int, in this case). Access the value using operator()
// with no arguments: e.g., 'count()'.
//
// Zero-dimensional Views are useful for reduction results that stay
// resident in device memory, as well as for irregularly updated
// shared state. We use it for the latter in this example.
typedef Kokkos::View<int> count_type;
typedef count_type::HostMirror host_count_type;
// Functor for finding a list of primes in a given set of numbers. If
// run in parallel, the order of results is nondeterministic, because
// hardware atomic updates do not guarantee an order of execution.
struct findprimes {
view_type data;
view_type result;
count_type count;
findprimes (view_type data_, view_type result_, count_type count_) :
data (data_), result (result_), count (count_)
{}
// Test if data(i) is prime. If it is, increment the count of
// primes (stored in the zero-dimensional View 'count') and add the
// value to the current list of primes 'result'.
KOKKOS_INLINE_FUNCTION
void operator() (const int i) const {
const int number = data(i); // the current number
// Test all numbers from 3 to ceiling(sqrt(data(i))), to see if
// they are factors of data(i). It's not the most efficient prime
// test, but it works.
const int upper_bound = sqrt(1.0*number)+1;
bool is_prime = !(number%2 == 0);
int k = 3;
while (k < upper_bound && is_prime) {
is_prime = !(number%k == 0);
k += 2; // don't have to test even numbers
}
if (is_prime) {
// Use an atomic update both to update the current count of
// primes, and to find a place in the current list of primes for
// the new result.
//
// atomic_fetch_add results the _current_ count, but increments
// it (by 1 in this case). The current count of primes indexes
// into the first unoccupied position of the 'result' array.
const int idx = Kokkos::atomic_fetch_add (&count(), 1);
result(idx) = number;
}
}
};
int main () {
Kokkos::initialize ();
srand (61391); // Set the random seed
int nnumbers = 100000;
view_type data ("RND", nnumbers);
view_type result ("Prime", nnumbers);
count_type count ("Count");
host_view_type h_data = Kokkos::create_mirror_view (data);
host_view_type h_result = Kokkos::create_mirror_view (result);
host_count_type h_count = Kokkos::create_mirror_view (count);
typedef view_type::size_type size_type;
// Fill the 'data' array on the host with random numbers. We assume
// that they come from some process which is only implemented on the
// host, via some library. (That's true in this case.)
for (size_type i = 0; i < data.dimension_0 (); ++i) {
h_data(i) = rand () % nnumbers;
}
Kokkos::deep_copy (data, h_data); // copy from host to device
Kokkos::parallel_for (data.dimension_0 (), findprimes (data, result, count));
Kokkos::deep_copy (h_count, count); // copy from device to host
printf ("Found %i prime numbers in %i random numbers\n", h_count(), nnumbers);
Kokkos::finalize ();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,171 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdio>
// These two View types are both 2-D arrays of double. However, they
// have different layouts in memory. left_type has "layout left,"
// which means "column major," the same as in Fortran, the BLAS, or
// LAPACK. right_type has "layout right," which means "row major,"
// the same as in C, C++, or Java.
typedef Kokkos::View<double**, Kokkos::LayoutLeft> left_type;
typedef Kokkos::View<double**, Kokkos::LayoutRight> right_type;
// This is a one-dimensional View, so the layout matters less.
// However, it still has a layout! Since its layout is not specified
// explicitly in the type, its layout is a function of the memory
// space. For example, the default Cuda layout is LayoutLeft, and the
// default Host layout is LayoutRight.
typedef Kokkos::View<double*> view_type;
// parallel_for functor that fills the given View with some data. It
// expects to access the View by rows in parallel: each call i of
// operator() accesses a row.
template<class ViewType>
struct init_view {
ViewType a;
init_view (ViewType a_) : a (a_) {}
KOKKOS_INLINE_FUNCTION
void operator() (const typename ViewType::size_type i) const {
// On CPUs this loop could be vectorized so j should do stride 1
// access on a for optimal performance. I.e. a should be LayoutRight.
// On GPUs threads should do coalesced loads and stores. That means
// that i should be the stride one access for optimal performance.
for (typename ViewType::size_type j = 0; j < a.dimension_1 (); ++j) {
a(i,j) = 1.0*a.dimension_0()*i + 1.0*j;
}
}
};
// Compute a contraction of v1 and v2 into a:
//
// a(i) := sum_j (v1(i,j) * v2(j,i))
//
// Since the functor is templated on the ViewTypes itself it doesn't matter what
// there layouts are. That means you can use different layouts on different
// architectures.
template<class ViewType1, class ViewType2>
struct contraction {
view_type a;
typename ViewType1::const_type v1;
typename ViewType2::const_type v2;
contraction (view_type a_, ViewType1 v1_, ViewType2 v2_) :
a (a_), v1 (v1_), v2 (v2_)
{}
// As with the initialization functor the performance of this operator
// depends on the architecture and the chosen data layouts.
// On CPUs optimal would be to vectorize the inner loop, so j should be the
// stride 1 access. That means v1 should be LayoutRight and v2 LayoutLeft.
// In order to get coalesced access on GPUs where i corresponds closely to
// the thread Index, i must be the stride 1 dimension. That means v1 should be
// LayoutLeft and v2 LayoutRight.
KOKKOS_INLINE_FUNCTION
void operator() (const view_type::size_type i) const {
for (view_type::size_type j = 0; j < v1.dimension_1 (); ++j) {
a(i) = v1(i,j)*v2(j,i);
}
}
};
// Compute a dot product. This is used for result verification.
struct dot {
view_type a;
dot (view_type a_) : a (a_) {}
typedef double value_type; //Specify type for reduction target, lsum
KOKKOS_INLINE_FUNCTION
void operator() (const view_type::size_type i, double &lsum) const {
lsum += a(i)*a(i);
}
};
int main (int narg, char* arg[]) {
// When initializing Kokkos, you may pass in command-line arguments,
// just like with MPI_Init(). Kokkos reserves the right to remove
// arguments from the list that start with '--kokkos-'.
Kokkos::initialize (narg, arg);
int size = 10000;
view_type a("A",size);
// Define two views with LayoutLeft and LayoutRight.
left_type l("L",size,10000);
right_type r("R",size,10000);
// Initialize the data in the views.
Kokkos::parallel_for(size,init_view<left_type>(l));
Kokkos::parallel_for(size,init_view<right_type>(r));
Kokkos::fence();
// Measure time to execute the contraction kernel when giving it a
// LayoutLeft view for v1 and a LayoutRight view for v2. This should be
// fast on GPUs and slow on CPUs
Kokkos::Impl::Timer time1;
Kokkos::parallel_for(size,contraction<left_type,right_type>(a,l,r));
Kokkos::fence();
double sec1 = time1.seconds();
double sum1 = 0;
Kokkos::parallel_reduce(size,dot(a),sum1);
Kokkos::fence();
// Measure time to execute the contraction kernel when giving it a
// LayoutRight view for v1 and a LayoutLeft view for v2. This should be
// fast on CPUs and slow on GPUs
Kokkos::Impl::Timer time2;
Kokkos::parallel_for(size,contraction<right_type,left_type>(a,r,l));
Kokkos::fence();
double sec2 = time2.seconds();
double sum2 = 0;
Kokkos::parallel_reduce(size,dot(a),sum2);
// Kokkos' reductions are deterministic.
// The results should always be equal.
printf("Result Left/Right %f Right/Left %f (equal result: %i)\n",sec1,sec2,sum2==sum1);
Kokkos::finalize();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,141 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdio>
#include <cstdlib>
typedef Kokkos::View<double*> view_type;
// Kokkos::Views have an MemoryTraits template parameter which
// allows users to specify usage scenarios of a View.
// Some of those act simply as hints, which can be used to insert
// optimal load and store paths, others change the symantics of the
// access. The trait Kokkos::Atomic is one of the latter. A view with
// that MemoryTrait will perform any access atomicly (read, write, update).
//
// In this example we use a view with a usage hint for RandomAccess.
// Kokkos::RandomAccess means that we expect to use this view
// with indirect indexing.
//
// In CUDA, RandomAccess allows accesses through the texture
// cache. This only works if the View is read-only, which we enforce
// through the first template parameter.
//
// Note that we are still talking about views of the data, its not a new allocation.
// For example you can have an atomic view of a default view. While you even
// could use both in the same kernel, this could lead to undefined behaviour because
// one of your access paths is not atomic. Think of it in the same way as you think of
// pointers to const data and pointers to non-const data (i.e. const double* and double*).
// While these pointers can point to the same data you should not use them together if that
// brakes the const guarantee of the first pointer.
typedef Kokkos::View<const double*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > view_type_rnd;
typedef Kokkos::View<int**> idx_type;
typedef idx_type::HostMirror idx_type_host;
// We template this functor on the ViewTypes to show the effect of the RandomAccess trait.
template<class DestType, class SrcType>
struct localsum {
idx_type::const_type idx;
DestType dest;
SrcType src;
localsum (idx_type idx_, DestType dest_, SrcType src_) :
idx (idx_), dest (dest_), src (src_)
{}
// Calculate a local sum of values
KOKKOS_INLINE_FUNCTION
void operator() (const int i) const {
double tmp = 0.0;
for (int j = 0; j < (int) idx.dimension_1 (); ++j) {
// This is an indirect access on src
const double val = src(idx(i,j));
tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
}
dest(i) = tmp;
}
};
int main(int narg, char* arg[]) {
Kokkos::initialize (narg, arg);
int size = 1000000;
idx_type idx("Idx",size,64);
idx_type_host h_idx = Kokkos::create_mirror_view (idx);
view_type dest ("Dest", size);
view_type src ("Src", size);
srand(134231);
for (int i = 0; i < size; i++) {
for (view_type::size_type j = 0; j < h_idx.dimension_1 (); ++j) {
h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size;
}
}
// Deep copy the initial data to the device
Kokkos::deep_copy(idx,h_idx);
// Run the first kernel to warmup caches
Kokkos::parallel_for(size,localsum<view_type,view_type_rnd>(idx,dest,src));
Kokkos::fence();
// Run the localsum functor using the RandomAccess trait. On CPUs there should
// not be any different in performance to not using the RandomAccess trait.
// On GPUs where can be a dramatic difference
Kokkos::Impl::Timer time1;
Kokkos::parallel_for(size,localsum<view_type,view_type_rnd>(idx,dest,src));
Kokkos::fence();
double sec1 = time1.seconds();
Kokkos::Impl::Timer time2;
Kokkos::parallel_for(size,localsum<view_type,view_type>(idx,dest,src));
Kokkos::fence();
double sec2 = time2.seconds();
printf("Time with Trait RandomAccess: %f with Plain: %f \n",sec1,sec2);
Kokkos::finalize();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,190 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
// This example simulates one timestep of an explicit
// finite-difference discretization of a time-dependent partial
// differential equation (PDE). It shows how to take subviews of the
// mesh in order to represent particular boundaries or the interior of
// the mesh.
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdio>
typedef Kokkos::View<double***, Kokkos::LayoutRight> mesh_type;
// These View types represent subviews of the mesh. Some of the Views
// have layout LayoutStride, meaning that they have run-time "strides"
// in each dimension which may differ from that dimension. For
// example, inner_mesh_type (which represents the interior of the
// mesh) has to skip over the boundaries when computing its stride;
// the dimensions of the interior mesh differ from these strides. You
// may safely always use a LayoutStride layout when taking a subview
// of a LayoutRight or LayoutLeft subview, but strided accesses may
// cost a bit more, especially for 1-D Views.
typedef Kokkos::View<double**, Kokkos::LayoutStride> xz_plane_type;
typedef Kokkos::View<double**, Kokkos::LayoutRight> yz_plane_type;
typedef Kokkos::View<double**, Kokkos::LayoutStride> xy_plane_type;
typedef Kokkos::View<double***, Kokkos::LayoutStride> inner_mesh_type;
// Functor to set all entries of a boundary of the mesh to a constant
// value. The functor is templated on ViewType because different
// boundaries may have different layouts.
template<class ViewType>
struct set_boundary {
ViewType a;
double value;
set_boundary (ViewType a_, double value_) :
a (a_), value (value_)
{}
KOKKOS_INLINE_FUNCTION
void operator() (const typename ViewType::size_type i) const {
for (typename ViewType::size_type j = 0; j < a.dimension_1 (); ++j) {
a(i,j) = value;
}
}
};
// Functor to set all entries of a boundary of the mesh to a constant
// value. The functor is templated on ViewType because different
// boundaries may have different layouts.
template<class ViewType>
struct set_inner {
ViewType a;
double value;
set_inner (ViewType a_, double value_) :
a (a_), value (value_)
{}
KOKKOS_INLINE_FUNCTION
void operator () (const typename ViewType::size_type i) const {
typedef typename ViewType::size_type size_type;
for (size_type j = 0; j < a.dimension_1 (); ++j) {
for (size_type k = 0; k < a.dimension_2 (); ++k) {
a(i,j,k) = value;
}
}
}
};
// Update the interior of the mesh. This simulates one timestep of a
// finite-difference method.
template<class ViewType>
struct update {
ViewType a;
const double dt;
update (ViewType a_, const double dt_) :
a (a_), dt (dt_)
{}
KOKKOS_INLINE_FUNCTION
void operator() (typename ViewType::size_type i) const {
typedef typename ViewType::size_type size_type;
i++;
for (size_type j = 1; j < a.dimension_1()-1; j++) {
for (size_type k = 1; k < a.dimension_2()-1; k++) {
a(i,j,k) += dt* (a(i,j,k+1) - a(i,j,k-1) +
a(i,j+1,k) - a(i,j-1,k) +
a(i+1,j,k) - a(i-1,j,k));
}
}
}
};
int main (int narg, char* arg[]) {
using Kokkos::ALL;
using Kokkos::pair;
using Kokkos::parallel_for;
using Kokkos::subview;
typedef mesh_type::size_type size_type;
Kokkos::initialize (narg, arg);
// The number of mesh points along each dimension of the mesh, not
// including boundaries.
const size_type size = 100;
// A is the full cubic 3-D mesh, including the boundaries.
mesh_type A ("A", size+2, size+2, size+2);
// Ai is the "inner" part of A, _not_ including the boundaries.
//
// A pair of indices in a particular dimension means the contiguous
// zero-based index range in that dimension, including the first
// entry of the pair but _not_ including the second entry.
inner_mesh_type Ai = subview(A, pair<size_type, size_type> (1, size+1),
pair<size_type, size_type> (1, size+1),
pair<size_type, size_type> (1, size+1));
// A has six boundaries, one for each face of the cube.
// Create a View of each of these boundaries.
// ALL() means "select all indices in that dimension."
xy_plane_type Zneg_halo = subview(A, ALL (), ALL (), 0);
xy_plane_type Zpos_halo = subview(A, ALL (), ALL (), 101);
xz_plane_type Yneg_halo = subview(A, ALL (), 0, ALL ());
xz_plane_type Ypos_halo = subview(A, ALL (), 101, ALL ());
yz_plane_type Xneg_halo = subview(A, 0, ALL (), ALL ());
yz_plane_type Xpos_halo = subview(A, 101, ALL (), ALL ());
// Set the boundaries to their initial conditions.
parallel_for (Zneg_halo.dimension_0 (), set_boundary<xy_plane_type> (Zneg_halo, 1));
parallel_for (Zpos_halo.dimension_0 (), set_boundary<xy_plane_type> (Zpos_halo, -1));
parallel_for (Yneg_halo.dimension_0 (), set_boundary<xz_plane_type> (Yneg_halo, 2));
parallel_for (Ypos_halo.dimension_0 (), set_boundary<xz_plane_type> (Ypos_halo, -2));
parallel_for (Xneg_halo.dimension_0 (), set_boundary<yz_plane_type> (Xneg_halo, 3));
parallel_for (Xpos_halo.dimension_0 (), set_boundary<yz_plane_type> (Xpos_halo, -3));
// Set the interior of the mesh to its initial condition.
parallel_for (Ai.dimension_0 (), set_inner<inner_mesh_type> (Ai, 0));
// Update the interior of the mesh.
// This simulates one timestep with dt = 0.1.
parallel_for (Ai.dimension_0 (), update<mesh_type> (A, 0.1));
printf ("Done\n");
Kokkos::finalize ();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,214 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdio>
#include <cstdlib>
// DualView helps you manage data and computations that take place on
// two different memory spaces. Examples include CUDA device memory
// and (CPU) host memory (currently implemented), or Intel Knights
// Landing MCDRAM and DRAM (not yet implemented). For example, if you
// have ported only some parts of you application to run in CUDA,
// DualView can help manage moving data between the parts of your
// application that work best with CUDA, and the parts that work
// better on the CPU.
//
// A DualView takes the same template parameters as a View, but
// contains two Views: One that lives in the DualView's memory space,
// and one that lives in that memory space's host mirror space. If
// both memory spaces are the same, then the two Views just alias one
// another. This means that you can use DualView all the time, even
// when not running in a memory space like CUDA. DualView's
// operations to help you manage memory take almost no time in that
// case. This makes your code even more performance portable.
typedef Kokkos::DualView<double*> view_type;
typedef Kokkos::DualView<int**> idx_type;
template<class ExecutionSpace>
struct localsum {
// If the functor has a public 'execution_space' typedef, that defines
// the functor's execution space (where it runs in parallel). This
// overrides Kokkos' default execution space.
typedef ExecutionSpace execution_space;
typedef typename Kokkos::Impl::if_c<Kokkos::Impl::is_same<ExecutionSpace,Kokkos::DefaultExecutionSpace>::value ,
idx_type::memory_space, idx_type::host_mirror_space>::type memory_space;
// Get the view types on the particular device for which the functor
// is instantiated.
//
// "const_data_type" is a typedef in View (and DualView) which is
// the const version of the first template parameter of the View.
// For example, the const_data_type version of double** is const
// double**.
Kokkos::View<idx_type::const_data_type, idx_type::array_layout, memory_space> idx;
// "array_intrinsic_type" is a typedef in ViewTraits (and DualView) which is the
// array version of the value(s) stored in the View.
Kokkos::View<view_type::array_intrinsic_type, view_type::array_layout, memory_space> dest;
Kokkos::View<view_type::const_data_type, view_type::array_layout,
memory_space, Kokkos::MemoryRandomAccess> src;
// Constructor takes DualViews, synchronizes them to the device,
// then marks them as modified on the device.
localsum (idx_type dv_idx, view_type dv_dest, view_type dv_src)
{
// Extract the view on the correct Device (i.e., the correct
// memory space) from the DualView. DualView has a template
// method, view(), which is templated on the memory space. If the
// DualView has a View from that memory space, view() returns the
// View in that space.
idx = dv_idx.view<memory_space> ();
dest = dv_dest.template view<memory_space> ();
src = dv_src.template view<memory_space> ();
// Synchronize the DualView to the correct Device.
//
// DualView's sync() method is templated on a memory space, and
// synchronizes the DualView in a one-way fashion to that memory
// space. "Synchronizing" means copying, from the other memory
// space to the Device memory space. sync() does _nothing_ if the
// Views on the two memory spaces are in sync. DualView
// determines this by the user manually marking one side or the
// other as modified; see the modify() call below.
dv_idx.sync<memory_space> ();
dv_dest.template sync<memory_space> ();
dv_src.template sync<memory_space> ();
// Mark dest as modified on Device.
dv_dest.template modify<memory_space> ();
}
KOKKOS_INLINE_FUNCTION
void operator() (const int i) const {
double tmp = 0.0;
for (int j = 0; j < (int) idx.dimension_1(); ++j) {
const double val = src(idx(i,j));
tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
}
dest(i) += tmp;
}
};
class ParticleType {
public:
double q;
double m;
double q_over_m;
KOKKOS_INLINE_FUNCTION
ParticleType(double q_ = -1, double m_ = 1):
q(q_), m(m_), q_over_m(q/m) {}
protected:
};
typedef Kokkos::DualView<ParticleType[10]> ParticleTypes;
int main (int narg, char* arg[]) {
Kokkos::initialize (narg, arg);
ParticleTypes test("Test");
Kokkos::fence();
test.h_view(0) = ParticleType(-1e4,1);
Kokkos::fence();
int size = 1000000;
// Create DualViews. This will allocate on both the device and its
// host_mirror_device.
idx_type idx ("Idx",size,64);
view_type dest ("Dest",size);
view_type src ("Src",size);
srand (134231);
// Get a reference to the host view of idx directly (equivalent to
// idx.view<idx_type::host_mirror_space>() )
idx_type::t_host h_idx = idx.h_view;
for (int i = 0; i < size; ++i) {
for (view_type::size_type j = 0; j < h_idx.dimension_1 (); ++j) {
h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size;
}
}
// Mark idx as modified on the host_mirror_space so that a
// sync to the device will actually move data. The sync happens in
// the functor's constructor.
idx.modify<idx_type::host_mirror_space> ();
// Run on the device. This will cause a sync of idx to the device,
// since it was marked as modified on the host.
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_dev = timer.seconds();
// Run on the host's default execution space (could be the same as device).
// This will cause a sync back to the host of dest. Note that if the Device is CUDA,
// the data layout will not be optimal on host, so performance is
// lower than what it would be for a pure host compilation.
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_host = timer.seconds();
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_host = timer.seconds();
printf("Device Time with Sync: %f without Sync: %f \n",sec1_dev,sec2_dev);
printf("Host Time with Sync: %f without Sync: %f \n",sec1_host,sec2_host);
Kokkos::finalize();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,134 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdio>
#include <cstdlib>
typedef Kokkos::View<double*> view_type;
typedef Kokkos::View<int**> idx_type;
template<class Device>
struct localsum {
// Define the execution space for the functor (overrides the DefaultExecutionSpace)
typedef Device execution_space;
// Get the view types on the particular device the functor is instantiated for
idx_type::const_type idx;
view_type dest;
Kokkos::View<view_type::const_data_type, view_type::array_layout, view_type::execution_space, Kokkos::MemoryRandomAccess > src;
localsum(idx_type idx_, view_type dest_,
view_type src_):idx(idx_),dest(dest_),src(src_) {
}
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
double tmp = 0.0;
for(int j = 0; j < idx.dimension_1(); j++) {
const double val = src(idx(i,j));
tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
}
dest(i) += tmp;
}
};
int main(int narg, char* arg[]) {
Kokkos::initialize(narg,arg);
int size = 1000000;
// Create Views
idx_type idx("Idx",size,64);
view_type dest("Dest",size);
view_type src("Src",size);
srand(134231);
// When using UVM Cuda views can be accessed on the Host directly
for(int i=0; i<size; i++) {
for(int j=0; j<idx.dimension_1(); j++)
idx(i,j) = (size + i + (rand()%500 - 250))%size;
}
Kokkos::fence();
// Run on the device
// This will cause a sync of idx to the device since it was modified on the host
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();
// No data transfer will happen now, since nothing is accessed on the host
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_dev = timer.seconds();
// Run on the host
// This will cause a sync back to the host of dest which was changed on the device
// Compare runtime here with the dual_view example: dest will be copied back in 4k blocks
// when they are accessed the first time during the parallel_for. Due to the latency of a memcpy
// this gives lower effective bandwidth when doing a manual copy via dual views
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_host = timer.seconds();
// No data transfers will happen now
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_host = timer.seconds();
printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
printf("Host Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);
Kokkos::finalize();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3 --default-stream per-thread
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,148 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
#include <typeinfo>
#include <cmath>
#include <impl/Kokkos_Timer.hpp>
struct FillDevice {
double value;
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a;
FillDevice(const double& val, const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a):
value(val),a(d_a){}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
a(i) = value;
}
};
struct ComputeADevice {
int iter;
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a;
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> b;
ComputeADevice(const int& iter_,
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a,
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_b):
iter(iter_),a(d_a),b(d_b){}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
for(int j=1;j<iter;j++) {
a(i) += std::pow(b(i),1.0+1.0/iter);
}
}
};
struct ComputeAHost {
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> a;
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> b;
ComputeAHost( const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace>& d_a,
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace>& d_b):
a(d_a),b(d_b){}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
a(i) += b(i);
}
};
struct MergeDevice {
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a;
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> b;
MergeDevice(
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a,
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_b):
a(d_a),b(d_b){}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
a(i) += b(i);
}
};
int main(int argc, char * argv[]) {
int size = 100000000;
Kokkos::initialize();
int synch = atoi(argv[1]);
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_a("Device A",size);
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_b("Device B",size);
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_tmp("Device tmp",size);
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> h_a("Host A",size);
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> h_b("Host B",size);
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(0.0,d_a));
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(1.3513,d_b));
Kokkos::fence();
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),ComputeADevice(20,d_a,d_b));
if(synch==1)
Kokkos::deep_copy(Kokkos::OpenMP(),h_b,d_b);
if(synch==2)
Kokkos::deep_copy(h_b,d_b);
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::OpenMP>(0,size),[=] (const int& i) {
h_a(i) = 0.0;
});
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::OpenMP>(0,size),ComputeAHost(h_a,h_b));
Kokkos::OpenMP::fence();
if(synch==1)
Kokkos::deep_copy(Kokkos::OpenMP(), d_tmp,h_a);
if(synch==2)
Kokkos::deep_copy(d_tmp,h_a);
Kokkos::fence();
std::cout << "Time " << timer.seconds() << std::endl;
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),MergeDevice(d_a,d_tmp));
Kokkos::deep_copy(h_a,d_a);
std::cout << "h_a(0): " << h_a(0) << " ( Correct: 27.4154 )" << std::endl;
Kokkos::finalize();
}

View File

@ -1,84 +0,0 @@
default:
cd ./01_data_layouts; \
make -j 4
cd ./02_memory_traits; \
make -j 4
cd ./03_subviews; \
make -j 4
cd ./04_dualviews; \
make -j 4
cd ./05_NVIDIA_UVM; \
make -j 4
cd ./06_AtomicViews; \
make -j 4
openmp:
cd ./01_data_layouts; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./02_memory_traits; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./03_subviews; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./04_dualviews; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./05_NVIDIA_UVM; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./06_AtomicViews; \
make -j 4 KOKKOS_DEVICES=OpenMP
pthreads:
cd ./01_data_layouts; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./02_memory_traits; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./03_subviews; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./04_dualviews; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./05_NVIDIA_UVM; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./06_AtomicViews; \
make -j 4 KOKKOS_DEVICES=Pthreads
serial:
cd ./01_data_layouts; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./02_memory_traits; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./03_subviews; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./04_dualviews; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./05_NVIDIA_UVM; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./06_AtomicViews; \
make -j 4 KOKKOS_DEVICES=Serial
cuda:
cd ./01_data_layouts; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./02_memory_traits; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./03_subviews; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./04_dualviews; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./05_NVIDIA_UVM; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./06_AtomicViews; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
clean:
cd ./01_data_layouts; \
make clean
cd ./02_memory_traits; \
make clean
cd ./03_subviews; \
make clean
cd ./04_dualviews; \
make clean
cd ./05_NVIDIA_UVM; \
make clean
cd ./06_AtomicViews; \
make clean

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,152 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdlib>
typedef Kokkos::HostSpace::execution_space DefaultHostType;
// Kokkos provides two different random number generators with a 64 bit and a 1024 bit state.
// These generators are based on Vigna, Sebastiano (2014). "An experimental exploration of Marsaglia's xorshift generators, scrambled"
// See: http://arxiv.org/abs/1402.6246
// The generators can be used fully independently on each thread and have been tested to
// produce good statistics for both inter and intra thread numbers.
// Note that within a kernel NO random number operations are (team) collective operations.
// Everything can be called within branches. This is a difference to the curand library where
// certain operations are required to be called by all threads in a block.
//
// In Kokkos you are required to create a pool of generator states, so that threads can
// grep their own. On CPU architectures the pool size is equal to the thread number,
// on CUDA about 128k states are generated (enough to give every potentially simultaneously
// running thread its own state). With a kernel a thread is required to aquire a state from the
// pool and later return it.
// On CPUs the Random number generator is deterministic if using the same number of threads.
// On GPUs (i.e. using the CUDA backend it is not deterministic because threads aquire states via
// atomics.
// A Functor for generating uint64_t random numbers templated on the GeneratorPool type
template<class GeneratorPool>
struct generate_random {
// The GeneratorPool
GeneratorPool rand_pool;
// Output View for the random numbers
Kokkos::View<uint64_t*> vals;
int samples;
// Initialize all members
generate_random(Kokkos::View<uint64_t*> vals_,
GeneratorPool rand_pool_,
int samples_):
vals(vals_),rand_pool(rand_pool_),samples(samples_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
// Get a random number state from the pool for the active thread
typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();
// Draw samples numbers from the pool as urand64 between 0 and rand_pool.MAX_URAND64
// Note there are function calls to get other type of scalars, and also to specify
// Ranges or get a normal distributed float.
for(int k = 0;k<samples;k++)
vals(i*samples+k) = rand_gen.urand64();
// Give the state back, which will allow another thread to aquire it
rand_pool.free_state(rand_gen);
}
};
int main(int argc, char* args[]) {
if (argc != 3){
printf("Please pass two integers on the command line\n");
}
else {
// Initialize Kokkos
Kokkos::initialize(argc,args);
int size = atoi(args[1]);
int samples = atoi(args[2]);
// Create two random number generator pools one for 64bit states and one for 1024 bit states
// Both take an 64 bit unsigned integer seed to initialize a Random_XorShift64 generator which
// is used to fill the generators of the pool.
Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
Kokkos::Random_XorShift1024_Pool<> rand_pool1024(5374857);
Kokkos::DualView<uint64_t*> vals("Vals",size*samples);
// Run some performance comparisons
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples));
Kokkos::fence();
timer.reset();
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples));
Kokkos::fence();
double time_64 = timer.seconds();
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples));
Kokkos::fence();
timer.reset();
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples));
Kokkos::fence();
double time_1024 = timer.seconds();
printf("#Time XorShift64*: %lf %lf\n",time_64,1.0e-9*samples*size/time_64 );
printf("#Time XorShift1024*: %lf %lf\n",time_1024,1.0e-9*samples*size/time_1024 );
Kokkos::deep_copy(vals.h_view,vals.d_view);
Kokkos::finalize();
}
return 0;
}

View File

@ -1,24 +0,0 @@
default:
cd ./01_random_numbers; \
make -j 4
openmp:
cd ./01_random_numbers; \
make -j 4 KOKKOS_DEVICES=OpenMP
pthreads:
cd ./01_random_numbers; \
make -j 4 KOKKOS_DEVICES=Pthreads
serial:
cd ./01_random_numbers; \
make -j 4 KOKKOS_DEVICES=Serial
cuda:
cd ./01_random_numbers; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
clean:
cd ./01_random_numbers; \
make clean

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,94 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
// Using default execution space define a TeamPolicy and its member_type
// The member_type is what the operator of a functor or Lambda gets, for
// a simple RangePolicy the member_type is simply an integer
// For a TeamPolicy its a much richer object, since it provides all information
// to identify a thread uniquely and some team related function calls such as a
// barrier (which will be used in a subsequent example).
// A ThreadTeam consists of 1 to n threads where the maxmimum value of n is
// determined by the hardware. On a dual socket CPU machine with 8 cores per socket
// the maximum size of a team is 8. The number of teams (i.e. the league_size) is
// not limited by physical constraints. Its a pure logical number.
typedef Kokkos::TeamPolicy<> team_policy ;
typedef team_policy::member_type team_member ;
// Define a functor which can be launched using the TeamPolicy
struct hello_world {
typedef int value_type; //Specify value type for reduction target, sum
// This is a reduction operator which now takes as first argument the
// TeamPolicy member_type. Every member of the team contributes to the
// total sum.
// It is helpful to think of this operator as a parallel region for a team
// (i.e. every team member is active and will execute the code).
KOKKOS_INLINE_FUNCTION
void operator() ( const team_member & thread, int& sum) const {
sum+=1;
// The TeamPolicy<>::member_type provides functions to query the multi
// dimensional index of a thread as well as the number of thread-teams and the size
// of each team.
printf("Hello World: %i %i // %i %i\n",thread.league_rank(),thread.team_rank(),thread.league_size(),thread.team_size());
}
};
int main(int narg, char* args[]) {
Kokkos::initialize(narg,args);
// Launch 12 teams of the maximum number of threads per team
const team_policy policy( 12 , team_policy::team_size_max( hello_world() ) );
int sum = 0;
Kokkos::parallel_reduce( policy , hello_world() , sum );
// The result will be 12*team_policy::team_size_max( hello_world())
printf("Result %i\n",sum);
Kokkos::finalize();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,93 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
// Demonstrate a parallel reduction using thread teams (TeamPolicy).
//
// A thread team consists of 1 to n threads. The hardware determines
// the maxmimum value of n. On a dual-socket CPU machine with 8 cores
// per socket, the maximum size of a team is 8. The number of teams
// (the league_size) is not limited by physical constraints (up to
// some reasonable bound, which eventually depends upon the hardware
// and programming model implementation).
int main (int narg, char* args[]) {
using Kokkos::parallel_reduce;
typedef Kokkos::TeamPolicy<> team_policy;
typedef typename team_policy::member_type team_member;
Kokkos::initialize (narg, args);
// Set up a policy that launches 12 teams, with the maximum number
// of threads per team.
const team_policy policy (12, team_policy::team_size_max ( [=]{} ));
// This is a reduction with a team policy. The team policy changes
// the first argument of the lambda. Rather than an integer index
// (as with RangePolicy), it's now TeamPolicy::member_type. This
// object provides all information to identify a thread uniquely.
// It also provides some team-related function calls such as a team
// barrier (which a subsequent example will use).
//
// Every member of the team contributes to the total sum. It is
// helpful to think of the lambda's body as a "team parallel
// region." That is, every team member is active and will execute
// the body of the lambda.
int sum = 0;
parallel_reduce (policy, KOKKOS_LAMBDA (const team_member& thread, int& lsum) {
lsum += 1;
// TeamPolicy<>::member_type provides functions to query the
// multidimensional index of a thread, as well as the number of
// thread teams and the size of each team.
printf ("Hello World: %i %i // %i %i\n", thread.league_rank (),
thread.team_rank (), thread.league_size (), thread.team_size ());
}, sum);
// The result will be 12*team_policy::team_size_max([=]{})
printf ("Result %i\n",sum);
Kokkos::finalize ();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,89 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
// See 01_thread_teams for an explanation of a basic TeamPolicy
typedef Kokkos::TeamPolicy<> team_policy ;
typedef typename team_policy::member_type team_member ;
struct hello_world {
typedef int value_type; //Specify value type for reduction target, sum
KOKKOS_INLINE_FUNCTION
void operator() ( const team_member & thread, int& sum) const {
sum+=1;
// When using the TeamPolicy Kokkos allows for nested parallel loops.
// All three Kokkos parallel patterns are allowed (for, reduce, scan) and they
// largely follow the same syntax as on the global level.
// The execution policy for the Thread level nesting (the Vector level is in the next
// tutorial example) is Kokkos::TeamThreadRange. This means the loop will be executed
// by all members of the team and the loop count will be split between threads of the
// team. Its arguments are the team_member, and a loop count.
// Not every thread will do the same amount of iterations. On a GPU for example with
// a team_size() larger than 31 only the first 31 threads would actually do anything.
// On a CPU with 8 threads 7 would execute 4 loop iterations, and 1 thread would do
// 3. Note also that the mode of splitting the count is architecture dependent similar
// to what the RangePolicy on a global level does.
// The call itself is not guaranteed to be synchronous. Also keep in mind that the
// operator using a team_policy acts like a parallel region for the team. That means
// that everything outside of the nested parallel_for is also executed by all threads
// of the team.
Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,31), [&] (const int& i) {
printf("Hello World: (%i , %i) executed loop %i \n",thread.league_rank(),thread.team_rank(),i);
});
}
};
int main(int narg, char* args[]) {
Kokkos::initialize(narg,args);
// Launch 3 teams of the maximum number of threads per team
const team_policy policy( 3 , team_policy::team_size_max( hello_world() ) );
int sum = 0;
Kokkos::parallel_reduce( policy , hello_world() , sum );
printf("Result %i\n",sum);
Kokkos::finalize();
}

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,162 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <cstdio>
#ifdef KOKKOS_HAVE_CXX11
// The TeamPolicy actually supports 3D parallelism: Teams, Threads, Vector
// Kokkos::parallel_{for/reduce/scan} calls can be completely free nested.
// The execution policies for the nested layers are TeamThreadRange and
// ThreadVectorRange.
// The only restriction on nesting is that a given level can only be nested in a
// higher one. e.g. a ThreadVectorRange can be nested inside a TeamPolicy operator
// and inside a TeamThreadRange, but you can not nest a ThreadVectorRange or a
// TeamThreadRange inside another ThreadVectorRange.
// As with the 2D execution of TeamPolicy the operator has to be considered as
// a parallel region even with respect to VectorLanes. That means even outside
// a TeamThread or VectorThread loop all threads of a team and all vector lanes
// of a thread execute every line of the operator as long as there are no restricitons
// on them.
// Code lines can be restricted using Kokkos::single to either execute once PerThread
// or execute once PerTeam.
typedef typename Kokkos::TeamPolicy<>::member_type team_member ;
struct SomeCorrelation {
typedef int value_type; //Specify value type for reduction target, sum
typedef Kokkos::DefaultExecutionSpace::scratch_memory_space shared_space;
typedef Kokkos::View<int*,shared_space,Kokkos::MemoryUnmanaged> shared_1d_int;
Kokkos::View<const int***,Kokkos::LayoutRight> data;
Kokkos::View<int> gsum;
SomeCorrelation(Kokkos::View<int***,Kokkos::LayoutRight> data_in,
Kokkos::View<int> sum):data(data_in),gsum(sum){}
KOKKOS_INLINE_FUNCTION
void operator() ( const team_member & thread) const {
int i = thread.league_rank();
// Allocate a shared array for the team.
shared_1d_int count(thread.team_shmem(),data.dimension_1());
// With each team run a parallel_for with its threads
Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,data.dimension_1()), [=] (const int& j) {
int tsum;
// Run a vector loop reduction over the inner dimension of data
// Count how many values are multiples of 4
// Every vector lane gets the same reduction value (tsum) back, it is broadcast to all vector lanes
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(thread,data.dimension_2()), [=] (const int& k, int & vsum) {
vsum+= (data(i,j,k) % 4 == 0)?1:0;
},tsum);
// Make sure only one vector lane adds the reduction value to the shared array, i.e. execute
// the next line only once PerThread
Kokkos::single(Kokkos::PerThread(thread),[=] () {
count(j) = tsum;
});
});
// Wait for all threads to finish the parallel_for so that all shared memory writes are done
thread.team_barrier();
// Check with one vector lane from each thread how many consecutive
// data segments have the same number of values divisible by 4
// The team reduction value is again broadcast to every team member (and every vector lane)
int team_sum = 0;
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(thread, data.dimension_1()-1), [=] (const int& j, int& thread_sum) {
// It is not valid to directly add to thread_sum
// Use a single function with broadcast instead
// team_sum will be used as input to the operator (i.e. it is used to initialize sum)
// the end value of sum will be broadcast to all vector lanes in the thread.
Kokkos::single(Kokkos::PerThread(thread),[=] (int& sum) {
if(count(j)==count(j+1)) sum++;
},thread_sum);
},team_sum);
// Add with one thread and vectorlane of the team the team_sum to the global value
Kokkos::single(Kokkos::PerTeam(thread),[=] () {
Kokkos::atomic_add(&gsum(),team_sum);
});
}
// The functor needs to define how much shared memory it requests given a team_size.
size_t team_shmem_size( int team_size ) const {
return shared_1d_int::shmem_size(data.dimension_1());
}
};
int main(int narg, char* args[]) {
Kokkos::initialize(narg,args);
// Produce some 3D random data (see Algorithms/01_random_numbers for more info)
Kokkos::View<int***,Kokkos::LayoutRight> data("Data",512,512,32);
Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
Kokkos::fill_random(data,rand_pool64,100);
// A global value to put the result in
Kokkos::View<int> gsum("Sum");
// Each team handles a slice of the data
// Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes.
// The team_size_max function will determine the maximum number of threads taking into account
// shared memory requirements of the Functor.
// The maximum vector length is hardware dependent but can always be smaller than the hardware allows.
// The vector length must be a power of 2.
const Kokkos::TeamPolicy<> policy( 512 , Kokkos::TeamPolicy<>::team_size_max(SomeCorrelation(data,gsum)) , 16);
Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) );
Kokkos::fence();
// Copy result value back
int sum = 0;
Kokkos::deep_copy(sum,gsum);
printf("Result %i\n",sum);
Kokkos::finalize();
}
#endif //KOKKOS_HAVE_CXX11

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,141 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdio>
#include <cstdlib>
typedef Kokkos::DefaultExecutionSpace Device ;
typedef Kokkos::HostSpace::execution_space Host ;
typedef Kokkos::TeamPolicy< Device > team_policy ;
typedef team_policy::member_type team_member ;
static const int TEAM_SIZE = 16 ;
struct find_2_tuples {
int chunk_size;
Kokkos::View<const int*> data;
Kokkos::View<int**> histogram;
find_2_tuples(int chunk_size_, Kokkos::DualView<int*> data_,
Kokkos::DualView<int**> histogram_):chunk_size(chunk_size_),
data(data_.d_view),histogram(histogram_.d_view) {
data_.sync<Device>();
histogram_.sync<Device>();
histogram_.modify<Device>();
}
KOKKOS_INLINE_FUNCTION
void operator() ( const team_member & dev) const {
Kokkos::View<int**,Kokkos::MemoryUnmanaged> l_histogram(dev.team_shmem(),TEAM_SIZE,TEAM_SIZE);
Kokkos::View<int*,Kokkos::MemoryUnmanaged> l_data(dev.team_shmem(),chunk_size+1);
const int i = dev.league_rank() * chunk_size;
for(int j = dev.team_rank(); j<chunk_size+1; j+=dev.team_size())
l_data(j) = data(i+j);
for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size())
for(int l = 0; l < TEAM_SIZE; l++)
l_histogram(k,l) = 0;
dev.team_barrier();
for(int j = 0; j<chunk_size; j++) {
for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size())
for(int l = 0; l < TEAM_SIZE; l++) {
if((l_data(j) == k) && (l_data(j+1)==l))
l_histogram(k,l)++;
}
}
for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size())
for(int l = 0; l < TEAM_SIZE; l++) {
Kokkos::atomic_fetch_add(&histogram(k,l),l_histogram(k,l));
}
dev.team_barrier();
}
size_t team_shmem_size( int team_size ) const { return sizeof(int)*(chunk_size+2 + team_size * team_size ); }
};
int main(int narg, char* args[]) {
Kokkos::initialize(narg,args);
int chunk_size = 1024;
int nchunks = 100000; //1024*1024;
Kokkos::DualView<int*> data("data",nchunks*chunk_size+1);
srand(1231093);
for(int i = 0; i < (int) data.dimension_0(); i++) {
data.h_view(i) = rand()%TEAM_SIZE;
}
data.modify<Host>();
data.sync<Device>();
Kokkos::DualView<int**> histogram("histogram",TEAM_SIZE,TEAM_SIZE);
Kokkos::Impl::Timer timer;
// threads/team is automatically limited to maximum supported by the device.
Kokkos::parallel_for( team_policy( nchunks , TEAM_SIZE )
, find_2_tuples(chunk_size,data,histogram) );
Kokkos::fence();
double time = timer.seconds();
histogram.sync<Host>();
printf("Time: %f \n\n",time);
int sum = 0;
for(int k=0; k<TEAM_SIZE; k++) {
for(int l=0; l<TEAM_SIZE; l++) {
printf("%i ",histogram.h_view(k,l));
sum += histogram.h_view(k,l);
}
printf("\n");
}
printf("Result: %i %i\n",sum,chunk_size*nchunks);
Kokkos::finalize();
}

View File

@ -1,72 +0,0 @@
default:
cd ./01_thread_teams; \
make -j 4
cd ./01_thread_teams_lambda; \
make -j 4
cd ./02_nested_parallel_for; \
make -j 4
cd ./03_vectorization; \
make -j 4
cd ./04_team_scan; \
make -j 4
openmp:
cd ./01_thread_teams; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./01_thread_teams_lambda; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./02_nested_parallel_for; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./03_vectorization; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./04_team_scan; \
make -j 4 KOKKOS_DEVICES=OpenMP
pthreads:
cd ./01_thread_teams; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./01_thread_teams_lambda; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./02_nested_parallel_for; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./03_vectorization; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./04_team_scan; \
make -j 4 KOKKOS_DEVICES=Pthreads
serial:
cd ./01_thread_teams; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./01_thread_teams_lambda; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./02_nested_parallel_for; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./03_vectorization; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./04_team_scan; \
make -j 4 KOKKOS_DEVICES=Serial
cuda:
cd ./01_thread_teams; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./01_thread_teams_lambda; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./02_nested_parallel_for; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./03_vectorization; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./04_team_scan; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
clean:
cd ./01_thread_teams; \
make clean
cd ./01_thread_teams_lambda; \
make clean
cd ./02_nested_parallel_for; \
make clean
cd ./03_vectorization; \
make clean
cd ./04_team_scan; \
make clean

View File

@ -1,144 +0,0 @@
default:
cd ./01_hello_world; \
make -j 4
cd ./01_hello_world_lambda; \
make -j 4
cd ./02_simple_reduce; \
make -j 4
cd ./02_simple_reduce_lambda; \
make -j 4
cd ./03_simple_view; \
make -j 4
cd ./03_simple_view_lambda; \
make -j 4
cd ./04_simple_memoryspaces; \
make -j 4
cd ./05_simple_atomics; \
make -j 4
cd ./Advanced_Views; \
make -j 4
cd ./Algorithms; \
make -j 4
cd ./Hierarchical_Parallelism; \
make -j 4
openmp:
cd ./01_hello_world; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./01_hello_world_lambda; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./02_simple_reduce; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./02_simple_reduce_lambda; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./03_simple_view; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./03_simple_view_lambda; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./04_simple_memoryspaces; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./05_simple_atomics; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./Advanced_Views; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./Algorithms; \
make -j 4 KOKKOS_DEVICES=OpenMP
cd ./Hierarchical_Parallelism; \
make -j 4 KOKKOS_DEVICES=OpenMP
pthreads:
cd ./01_hello_world; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./01_hello_world_lambda; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./02_simple_reduce; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./02_simple_reduce_lambda; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./03_simple_view; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./03_simple_view_lambda; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./04_simple_memoryspaces; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./05_simple_atomics; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./Advanced_Views; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./Algorithms; \
make -j 4 KOKKOS_DEVICES=Pthreads
cd ./Hierarchical_Parallelism; \
make -j 4 KOKKOS_DEVICES=Pthreads
serial:
cd ./01_hello_world; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./01_hello_world_lambda; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./02_simple_reduce; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./02_simple_reduce_lambda; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./03_simple_view; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./03_simple_view_lambda; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./04_simple_memoryspaces; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./05_simple_atomics; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./Advanced_Views; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./Algorithms; \
make -j 4 KOKKOS_DEVICES=Serial
cd ./Hierarchical_Parallelism; \
make -j 4 KOKKOS_DEVICES=Serial
cuda:
cd ./01_hello_world; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./01_hello_world_lambda; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./02_simple_reduce; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./02_simple_reduce_lambda; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./03_simple_view; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./03_simple_view_lambda; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./04_simple_memoryspaces; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./05_simple_atomics; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./Advanced_Views; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./Algorithms; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
cd ./Hierarchical_Parallelism; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
clean:
cd ./01_hello_world; \
make clean
cd ./01_hello_world_lambda; \
make clean
cd ./02_simple_reduce; \
make clean
cd ./02_simple_reduce_lambda; \
make clean
cd ./03_simple_view; \
make clean
cd ./03_simple_view_lambda; \
make clean
cd ./04_simple_memoryspaces; \
make clean
cd ./05_simple_atomics; \
make clean
cd ./Advanced_Views; \
make clean
cd ./Algorithms; \
make clean
cd ./Hierarchical_Parallelism; \
make clean

View File

@ -1,17 +0,0 @@
Build the examples by typing in each directory:
make -j 16
To specify a target device:
make openmp -j 16
make pthreads -j 16
make serial -j 16
make cuda -j 16
The lambda variants can not be build with CUDA=yes at the moment, since
CUDA does not support lambdas from the host.
Some of the advanced topics try to highlight performance impacts by timing
different variants of doing the same thing.
Also some of the advanced topics (in particular hierarchical parallelism)
require C++11 even with out using host side lambdas. CUDA 6.5 can be used
to compile those.