git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14370 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,130 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
#include <typeinfo>
|
||||
|
||||
//
|
||||
// "Hello world" parallel_for example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Execute a parallel for loop in the default execution space,
|
||||
// using a functor to define the loop body
|
||||
// 3. Shut down Kokkos
|
||||
//
|
||||
// If Kokkos was built with C++11 enabled, try comparing this example
|
||||
// to 01_hello_world_lambda. The latter uses C++11 lambdas (anonymous
|
||||
// functions) to define the loop body of the parallel_for. That makes
|
||||
// the code much more concise and readable. On the other hand,
|
||||
// breaking out the loop body into an explicit functor makes it easier
|
||||
// to test the loop independently of the parallel pattern.
|
||||
//
|
||||
|
||||
// Functor that defines the parallel_for's loop body.
|
||||
//
|
||||
// A "functor" is just a class or struct with a public operator()
|
||||
// instance method.
|
||||
struct hello_world {
|
||||
// If a functor has an "execution_space" (or "execution_space", for
|
||||
// backwards compatibility) public typedef, parallel_* will only run
|
||||
// the functor in that execution space. That's a good way to mark a
|
||||
// functor as specific to an execution space. If the functor lacks
|
||||
// this typedef, parallel_for will run it in the default execution
|
||||
// space, unless you tell it otherwise (that's an advanced topic;
|
||||
// see "execution policies").
|
||||
|
||||
// The functor's operator() defines the loop body. It takes an
|
||||
// integer argument which is the parallel for loop index. Other
|
||||
// arguments are possible; see the "hierarchical parallelism" part
|
||||
// of the tutorial.
|
||||
//
|
||||
// The operator() method must be const, and must be marked with the
|
||||
// KOKKOS_INLINE_FUNCTION macro. If building with CUDA, this macro
|
||||
// will mark your method as suitable for running on the CUDA device
|
||||
// (as well as on the host). If not building with CUDA, the macro
|
||||
// is unnecessary but harmless.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int i) const {
|
||||
printf ("Hello from i = %i\n", i);
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
// You must call initialize() before you may call Kokkos.
|
||||
//
|
||||
// With no arguments, this initializes the default execution space
|
||||
// (and potentially its host execution space) with default
|
||||
// parameters. You may also pass in argc and argv, analogously to
|
||||
// MPI_Init(). It reads and removes command-line arguments that
|
||||
// start with "--kokkos-".
|
||||
Kokkos::initialize (argc, argv);
|
||||
|
||||
// Print the name of Kokkos' default execution space. We're using
|
||||
// typeid here, so the name might get a bit mangled by the linker,
|
||||
// but you should still be able to figure out what it is.
|
||||
printf ("Hello World on Kokkos execution space %s\n",
|
||||
typeid (Kokkos::DefaultExecutionSpace).name ());
|
||||
|
||||
// Run the above functor on the default Kokkos execution space in
|
||||
// parallel, with a parallel for loop count of 15.
|
||||
//
|
||||
// The Kokkos::DefaultExecutionSpace typedef gives the default
|
||||
// execution space. Depending on how Kokkos was configured, this
|
||||
// could be OpenMP, Threads, Cuda, Serial, or even some other
|
||||
// execution space.
|
||||
//
|
||||
// The following line of code would look like this in OpenMP:
|
||||
//
|
||||
// #pragma omp parallel for
|
||||
// for (int i = 0; i < 15; ++i) {
|
||||
// printf ("Hello from i = %i\n", i);
|
||||
// }
|
||||
//
|
||||
// You may notice that the printed numbers do not print out in
|
||||
// order. Parallel for loops may execute in any order.
|
||||
Kokkos::parallel_for ("HelloWorld",15, hello_world ());
|
||||
|
||||
// You must call finalize() after you are done using Kokkos.
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,109 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
#include <typeinfo>
|
||||
|
||||
//
|
||||
// "Hello world" parallel_for example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Execute a parallel for loop in the default execution space,
|
||||
// using a C++11 lambda to define the loop body
|
||||
// 3. Shut down Kokkos
|
||||
//
|
||||
// This example only builds if C++11 is enabled. Compare this example
|
||||
// to 01_hello_world, which uses functors (explicitly defined classes)
|
||||
// to define the loop body of the parallel_for. Both functors and
|
||||
// lambdas have their places.
|
||||
//
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
// You must call initialize() before you may call Kokkos.
|
||||
//
|
||||
// With no arguments, this initializes the default execution space
|
||||
// (and potentially its host execution space) with default
|
||||
// parameters. You may also pass in argc and argv, analogously to
|
||||
// MPI_Init(). It reads and removes command-line arguments that
|
||||
// start with "--kokkos-".
|
||||
Kokkos::initialize (argc, argv);
|
||||
|
||||
// Print the name of Kokkos' default execution space. We're using
|
||||
// typeid here, so the name might get a bit mangled by the linker,
|
||||
// but you should still be able to figure out what it is.
|
||||
printf ("Hello World on Kokkos execution space %s\n",
|
||||
typeid (Kokkos::DefaultExecutionSpace).name ());
|
||||
|
||||
// Run lambda on the default Kokkos execution space in parallel,
|
||||
// with a parallel for loop count of 15. The lambda's argument is
|
||||
// an integer which is the parallel for's loop index. As you learn
|
||||
// about different kinds of parallelism, you will find out that
|
||||
// there are other valid argument types as well.
|
||||
//
|
||||
// For a single level of parallelism, we prefer that you use the
|
||||
// KOKKOS_LAMBDA macro. If CUDA is disabled, this just turns into
|
||||
// [=]. That captures variables from the surrounding scope by
|
||||
// value. Do NOT capture them by reference! If CUDA is enabled,
|
||||
// this macro may have a special definition that makes the lambda
|
||||
// work correctly with CUDA. Compare to the KOKKOS_INLINE_FUNCTION
|
||||
// macro, which has a special meaning if CUDA is enabled.
|
||||
//
|
||||
// The following parallel_for would look like this if we were using
|
||||
// OpenMP by itself, instead of Kokkos:
|
||||
//
|
||||
// #pragma omp parallel for
|
||||
// for (int i = 0; i < 15; ++i) {
|
||||
// printf ("Hello from i = %i\n", i);
|
||||
// }
|
||||
//
|
||||
// You may notice that the printed numbers do not print out in
|
||||
// order. Parallel for loops may execute in any order.
|
||||
Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) {
|
||||
// printf works in a CUDA parallel kernel; std::ostream does not.
|
||||
printf ("Hello from i = %i\n", i);
|
||||
});
|
||||
|
||||
// You must call finalize() after you are done using Kokkos.
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,101 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
//
|
||||
// First reduction (parallel_reduce) example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Execute a parallel_reduce loop in the default execution space,
|
||||
// using a functor to define the loop body
|
||||
// 3. Shut down Kokkos
|
||||
//
|
||||
// Compare this example to 02_simple_reduce_lambda, which uses a C++11
|
||||
// lambda to define the loop body of the parallel_reduce.
|
||||
//
|
||||
|
||||
// Reduction functor for computing the sum of squares.
|
||||
//
|
||||
// More advanced reduction examples will show how to control the
|
||||
// reduction's "join" operator. If the join operator is not provided,
|
||||
// it defaults to binary operator+ (adding numbers together).
|
||||
struct squaresum {
|
||||
// Specify the type of the reduction value with a "value_type"
|
||||
// typedef. In this case, the reduction value has type int.
|
||||
typedef int value_type;
|
||||
|
||||
// The reduction functor's operator() looks a little different than
|
||||
// the parallel_for functor's operator(). For the reduction, we
|
||||
// pass in both the loop index i, and the intermediate reduction
|
||||
// value lsum. The latter MUST be passed in by nonconst reference.
|
||||
// (If the reduction type is an array like int[], indicating an
|
||||
// array reduction result, then the second argument is just int[].)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator () (const int i, int& lsum) const {
|
||||
lsum += i*i; // compute the sum of squares
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
Kokkos::initialize (argc, argv);
|
||||
const int n = 10;
|
||||
|
||||
// Compute the sum of squares of integers from 0 to n-1, in
|
||||
// parallel, using Kokkos.
|
||||
int sum = 0;
|
||||
Kokkos::parallel_reduce (n, squaresum (), sum);
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed in parallel, is %i\n", n - 1, sum);
|
||||
|
||||
// Compare to a sequential loop.
|
||||
int seqSum = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
seqSum += i*i;
|
||||
}
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed sequentially, is %i\n", n - 1, seqSum);
|
||||
Kokkos::finalize ();
|
||||
return (sum == seqSum) ? 0 : -1;
|
||||
}
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,86 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
//
|
||||
// First reduction (parallel_reduce) example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Execute a parallel_reduce loop in the default execution space,
|
||||
// using a C++11 lambda to define the loop body
|
||||
// 3. Shut down Kokkos
|
||||
//
|
||||
// This example only builds if C++11 is enabled. Compare this example
|
||||
// to 02_simple_reduce, which uses a functor to define the loop body
|
||||
// of the parallel_reduce.
|
||||
//
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
Kokkos::initialize (argc, argv);
|
||||
const int n = 10;
|
||||
|
||||
// Compute the sum of squares of integers from 0 to n-1, in
|
||||
// parallel, using Kokkos. This time, use a lambda instead of a
|
||||
// functor. The lambda takes the same arguments as the functor's
|
||||
// operator().
|
||||
int sum = 0;
|
||||
// The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=].
|
||||
// It also handles any other syntax needed for CUDA.
|
||||
Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) {
|
||||
lsum += i*i;
|
||||
}, sum);
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed in parallel, is %i\n", n - 1, sum);
|
||||
|
||||
// Compare to a sequential loop.
|
||||
int seqSum = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
seqSum += i*i;
|
||||
}
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed sequentially, is %i\n", n - 1, seqSum);
|
||||
Kokkos::finalize ();
|
||||
return (sum == seqSum) ? 0 : -1;
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,142 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
//
|
||||
// First Kokkos::View (multidimensional array) example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Allocate a Kokkos::View
|
||||
// 3. Execute a parallel_for and a parallel_reduce over that View's data
|
||||
// 4. Shut down Kokkos
|
||||
//
|
||||
// Compare this example to 03_simple_view_lambda, which uses C++11
|
||||
// lambdas to define the loop bodies of the parallel_for and
|
||||
// parallel_reduce.
|
||||
//
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
// A Kokkos::View is an array of zero or more dimensions. The number
|
||||
// of dimensions is specified at compile time, as part of the type of
|
||||
// the View. This array has two dimensions. The first one
|
||||
// (represented by the asterisk) is a run-time dimension, and the
|
||||
// second (represented by [3]) is a compile-time dimension. Thus,
|
||||
// this View type is an N x 3 array of type double, where N is
|
||||
// specified at run time in the View's constructor.
|
||||
//
|
||||
// The first dimension of the View is the dimension over which it is
|
||||
// efficient for Kokkos to parallelize.
|
||||
typedef Kokkos::View<double*[3]> view_type;
|
||||
|
||||
// parallel_for functor that fills the View given to its constructor.
|
||||
// The View must already have been allocated.
|
||||
struct InitView {
|
||||
view_type a;
|
||||
|
||||
// Views have "view semantics." This means that they behave like
|
||||
// pointers, not like std::vector. Their copy constructor and
|
||||
// operator= only do shallow copies. Thus, you can pass View
|
||||
// objects around by "value"; they won't do a deep copy unless you
|
||||
// explicitly ask for a deep copy.
|
||||
InitView (view_type a_) :
|
||||
a (a_)
|
||||
{}
|
||||
|
||||
// Fill the View with some data. The parallel_for loop will iterate
|
||||
// over the View's first dimension N.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator () (const int i) const {
|
||||
// Acesss the View just like a Fortran array. The layout depends
|
||||
// on the View's memory space, so don't rely on the View's
|
||||
// physical memory layout unless you know what you're doing.
|
||||
a(i,0) = 1.0*i;
|
||||
a(i,1) = 1.0*i*i;
|
||||
a(i,2) = 1.0*i*i*i;
|
||||
}
|
||||
};
|
||||
|
||||
// Reduction functor that reads the View given to its constructor.
|
||||
struct ReduceFunctor {
|
||||
view_type a;
|
||||
|
||||
// Constructor takes View by "value"; this does a shallow copy.
|
||||
ReduceFunctor (view_type a_) : a (a_) {}
|
||||
|
||||
// If you write a functor to do a reduction, you must specify the
|
||||
// type of the reduction result via a public 'value_type' typedef.
|
||||
typedef double value_type;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i, double &lsum) const {
|
||||
lsum += a(i,0)*a(i,1)/(a(i,2)+0.1);
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
Kokkos::initialize (argc, argv);
|
||||
const int N = 10;
|
||||
|
||||
// Allocate the View. The first dimension is a run-time parameter
|
||||
// N. We set N = 10 here. The second dimension is a compile-time
|
||||
// parameter, 3. We don't specify it here because we already set it
|
||||
// by declaring the type of the View.
|
||||
//
|
||||
// Views get initialized to zero by default. This happens in
|
||||
// parallel, using the View's memory space's default execution
|
||||
// space. Parallel initialization ensures first-touch allocation.
|
||||
// There is a way to shut off default initialization.
|
||||
//
|
||||
// You may NOT allocate a View inside of a parallel_{for, reduce,
|
||||
// scan}. Treat View allocation as a "thread collective."
|
||||
//
|
||||
// The string "A" is just the label; it only matters for debugging.
|
||||
// Different Views may have the same label.
|
||||
view_type a ("A", N);
|
||||
|
||||
Kokkos::parallel_for (N, InitView (a));
|
||||
double sum = 0;
|
||||
Kokkos::parallel_reduce (N, ReduceFunctor (a), sum);
|
||||
printf ("Result: %f\n", sum);
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,116 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
//
|
||||
// First Kokkos::View (multidimensional array) example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Allocate a Kokkos::View
|
||||
// 3. Execute a parallel_for and a parallel_reduce over that View's data
|
||||
// 4. Shut down Kokkos
|
||||
//
|
||||
// Compare this example to 03_simple_view, which uses functors to
|
||||
// define the loop bodies of the parallel_for and parallel_reduce.
|
||||
//
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
// A Kokkos::View is an array of zero or more dimensions. The number
|
||||
// of dimensions is specified at compile time, as part of the type of
|
||||
// the View. This array has two dimensions. The first one
|
||||
// (represented by the asterisk) is a run-time dimension, and the
|
||||
// second (represented by [3]) is a compile-time dimension. Thus,
|
||||
// this View type is an N x 3 array of type double, where N is
|
||||
// specified at run time in the View's constructor.
|
||||
//
|
||||
// The first dimension of the View is the dimension over which it is
|
||||
// efficient for Kokkos to parallelize.
|
||||
typedef Kokkos::View<double*[3]> view_type;
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
Kokkos::initialize (argc, argv);
|
||||
|
||||
// Allocate the View. The first dimension is a run-time parameter
|
||||
// N. We set N = 10 here. The second dimension is a compile-time
|
||||
// parameter, 3. We don't specify it here because we already set it
|
||||
// by declaring the type of the View.
|
||||
//
|
||||
// Views get initialized to zero by default. This happens in
|
||||
// parallel, using the View's memory space's default execution
|
||||
// space. Parallel initialization ensures first-touch allocation.
|
||||
// There is a way to shut off default initialization.
|
||||
//
|
||||
// You may NOT allocate a View inside of a parallel_{for, reduce,
|
||||
// scan}. Treat View allocation as a "thread collective."
|
||||
//
|
||||
// The string "A" is just the label; it only matters for debugging.
|
||||
// Different Views may have the same label.
|
||||
view_type a ("A", 10);
|
||||
|
||||
// Fill the View with some data. The parallel_for loop will iterate
|
||||
// over the View's first dimension N.
|
||||
//
|
||||
// Note that the View is passed by value into the lambda. The macro
|
||||
// KOKKOS_LAMBDA includes the "capture by value" clause [=]. This
|
||||
// tells the lambda to "capture all variables in the enclosing scope
|
||||
// by value." Views have "view semantics"; they behave like
|
||||
// pointers, not like std::vector. Passing them by value does a
|
||||
// shallow copy. A deep copy never happens unless you explicitly
|
||||
// ask for one.
|
||||
Kokkos::parallel_for (10, KOKKOS_LAMBDA (const int i) {
|
||||
// Acesss the View just like a Fortran array. The layout depends
|
||||
// on the View's memory space, so don't rely on the View's
|
||||
// physical memory layout unless you know what you're doing.
|
||||
a(i,0) = 1.0*i;
|
||||
a(i,1) = 1.0*i*i;
|
||||
a(i,2) = 1.0*i*i*i;
|
||||
});
|
||||
// Reduction functor that reads the View given to its constructor.
|
||||
double sum = 0;
|
||||
Kokkos::parallel_reduce (10, KOKKOS_LAMBDA (const int i, double& lsum) {
|
||||
lsum += a(i,0)*a(i,1)/(a(i,2)+0.1);
|
||||
}, sum);
|
||||
printf ("Result: %f\n", sum);
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,101 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
// The type of a two-dimensional N x 3 array of double.
|
||||
// It lives in Kokkos' default memory space.
|
||||
typedef Kokkos::View<double*[3]> view_type;
|
||||
|
||||
// The "HostMirror" type corresponding to view_type above is also a
|
||||
// two-dimensional N x 3 array of double. However, it lives in the
|
||||
// host memory space corresponding to view_type's memory space. For
|
||||
// example, if view_type lives in CUDA device memory, host_view_type
|
||||
// lives in host (CPU) memory. Furthermore, declaring host_view_type
|
||||
// as the host mirror of view_type means that host_view_type has the
|
||||
// same layout as view_type. This makes it easier to copy between the
|
||||
// two Views.
|
||||
// Advanced issues: If a memory space is accessible from the host without
|
||||
// performance penalties then it is its own host_mirror_space. This is
|
||||
// the case for HostSpace, CudaUVMSpace and CudaHostPinnedSpace.
|
||||
|
||||
typedef view_type::HostMirror host_view_type;
|
||||
|
||||
struct ReduceFunctor {
|
||||
view_type a;
|
||||
ReduceFunctor (view_type a_) : a (a_) {}
|
||||
typedef int value_type; //Specify type for reduction value, lsum
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i, int &lsum) const {
|
||||
lsum += a(i,0)-a(i,1)+a(i,2);
|
||||
}
|
||||
};
|
||||
|
||||
int main() {
|
||||
Kokkos::initialize();
|
||||
|
||||
view_type a ("A", 10);
|
||||
// If view_type and host_mirror_type live in the same memory space,
|
||||
// a "mirror view" is just an alias, and deep_copy does nothing.
|
||||
// Otherwise, a mirror view of a device View lives in host memory,
|
||||
// and deep_copy does a deep copy.
|
||||
host_view_type h_a = Kokkos::create_mirror_view (a);
|
||||
|
||||
// The View h_a lives in host (CPU) memory, so it's legal to fill
|
||||
// the view sequentially using ordinary code, like this.
|
||||
for (int i = 0; i < 10; i++) {
|
||||
for (int j = 0; j < 3; j++) {
|
||||
h_a(i,j) = i*10 + j;
|
||||
}
|
||||
}
|
||||
Kokkos::deep_copy (a, h_a); // Copy from host to device.
|
||||
|
||||
int sum = 0;
|
||||
Kokkos::parallel_reduce (10, ReduceFunctor (a), sum);
|
||||
printf ("Result is %i\n",sum);
|
||||
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,137 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
// Type of a one-dimensional length-N array of int.
|
||||
typedef Kokkos::View<int*> view_type;
|
||||
typedef view_type::HostMirror host_view_type;
|
||||
// This is a "zero-dimensional" View, that is, a View of a single
|
||||
// value (an int, in this case). Access the value using operator()
|
||||
// with no arguments: e.g., 'count()'.
|
||||
//
|
||||
// Zero-dimensional Views are useful for reduction results that stay
|
||||
// resident in device memory, as well as for irregularly updated
|
||||
// shared state. We use it for the latter in this example.
|
||||
typedef Kokkos::View<int> count_type;
|
||||
typedef count_type::HostMirror host_count_type;
|
||||
|
||||
|
||||
// Functor for finding a list of primes in a given set of numbers. If
|
||||
// run in parallel, the order of results is nondeterministic, because
|
||||
// hardware atomic updates do not guarantee an order of execution.
|
||||
struct findprimes {
|
||||
view_type data;
|
||||
view_type result;
|
||||
count_type count;
|
||||
|
||||
findprimes (view_type data_, view_type result_, count_type count_) :
|
||||
data (data_), result (result_), count (count_)
|
||||
{}
|
||||
|
||||
// Test if data(i) is prime. If it is, increment the count of
|
||||
// primes (stored in the zero-dimensional View 'count') and add the
|
||||
// value to the current list of primes 'result'.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int i) const {
|
||||
const int number = data(i); // the current number
|
||||
|
||||
// Test all numbers from 3 to ceiling(sqrt(data(i))), to see if
|
||||
// they are factors of data(i). It's not the most efficient prime
|
||||
// test, but it works.
|
||||
const int upper_bound = sqrt(1.0*number)+1;
|
||||
bool is_prime = !(number%2 == 0);
|
||||
int k = 3;
|
||||
while (k < upper_bound && is_prime) {
|
||||
is_prime = !(number%k == 0);
|
||||
k += 2; // don't have to test even numbers
|
||||
}
|
||||
|
||||
if (is_prime) {
|
||||
// Use an atomic update both to update the current count of
|
||||
// primes, and to find a place in the current list of primes for
|
||||
// the new result.
|
||||
//
|
||||
// atomic_fetch_add results the _current_ count, but increments
|
||||
// it (by 1 in this case). The current count of primes indexes
|
||||
// into the first unoccupied position of the 'result' array.
|
||||
const int idx = Kokkos::atomic_fetch_add (&count(), 1);
|
||||
result(idx) = number;
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
int main () {
|
||||
Kokkos::initialize ();
|
||||
|
||||
srand (61391); // Set the random seed
|
||||
|
||||
int nnumbers = 100000;
|
||||
view_type data ("RND", nnumbers);
|
||||
view_type result ("Prime", nnumbers);
|
||||
count_type count ("Count");
|
||||
|
||||
host_view_type h_data = Kokkos::create_mirror_view (data);
|
||||
host_view_type h_result = Kokkos::create_mirror_view (result);
|
||||
host_count_type h_count = Kokkos::create_mirror_view (count);
|
||||
|
||||
typedef view_type::size_type size_type;
|
||||
// Fill the 'data' array on the host with random numbers. We assume
|
||||
// that they come from some process which is only implemented on the
|
||||
// host, via some library. (That's true in this case.)
|
||||
for (size_type i = 0; i < data.dimension_0 (); ++i) {
|
||||
h_data(i) = rand () % nnumbers;
|
||||
}
|
||||
Kokkos::deep_copy (data, h_data); // copy from host to device
|
||||
|
||||
Kokkos::parallel_for (data.dimension_0 (), findprimes (data, result, count));
|
||||
Kokkos::deep_copy (h_count, count); // copy from device to host
|
||||
|
||||
printf ("Found %i prime numbers in %i random numbers\n", h_count(), nnumbers);
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,171 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
// These two View types are both 2-D arrays of double. However, they
|
||||
// have different layouts in memory. left_type has "layout left,"
|
||||
// which means "column major," the same as in Fortran, the BLAS, or
|
||||
// LAPACK. right_type has "layout right," which means "row major,"
|
||||
// the same as in C, C++, or Java.
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutLeft> left_type;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutRight> right_type;
|
||||
// This is a one-dimensional View, so the layout matters less.
|
||||
// However, it still has a layout! Since its layout is not specified
|
||||
// explicitly in the type, its layout is a function of the memory
|
||||
// space. For example, the default Cuda layout is LayoutLeft, and the
|
||||
// default Host layout is LayoutRight.
|
||||
typedef Kokkos::View<double*> view_type;
|
||||
|
||||
// parallel_for functor that fills the given View with some data. It
|
||||
// expects to access the View by rows in parallel: each call i of
|
||||
// operator() accesses a row.
|
||||
template<class ViewType>
|
||||
struct init_view {
|
||||
ViewType a;
|
||||
init_view (ViewType a_) : a (a_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const typename ViewType::size_type i) const {
|
||||
// On CPUs this loop could be vectorized so j should do stride 1
|
||||
// access on a for optimal performance. I.e. a should be LayoutRight.
|
||||
// On GPUs threads should do coalesced loads and stores. That means
|
||||
// that i should be the stride one access for optimal performance.
|
||||
for (typename ViewType::size_type j = 0; j < a.dimension_1 (); ++j) {
|
||||
a(i,j) = 1.0*a.dimension_0()*i + 1.0*j;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Compute a contraction of v1 and v2 into a:
|
||||
//
|
||||
// a(i) := sum_j (v1(i,j) * v2(j,i))
|
||||
//
|
||||
// Since the functor is templated on the ViewTypes itself it doesn't matter what
|
||||
// there layouts are. That means you can use different layouts on different
|
||||
// architectures.
|
||||
template<class ViewType1, class ViewType2>
|
||||
struct contraction {
|
||||
view_type a;
|
||||
typename ViewType1::const_type v1;
|
||||
typename ViewType2::const_type v2;
|
||||
contraction (view_type a_, ViewType1 v1_, ViewType2 v2_) :
|
||||
a (a_), v1 (v1_), v2 (v2_)
|
||||
{}
|
||||
|
||||
// As with the initialization functor the performance of this operator
|
||||
// depends on the architecture and the chosen data layouts.
|
||||
// On CPUs optimal would be to vectorize the inner loop, so j should be the
|
||||
// stride 1 access. That means v1 should be LayoutRight and v2 LayoutLeft.
|
||||
// In order to get coalesced access on GPUs where i corresponds closely to
|
||||
// the thread Index, i must be the stride 1 dimension. That means v1 should be
|
||||
// LayoutLeft and v2 LayoutRight.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const view_type::size_type i) const {
|
||||
for (view_type::size_type j = 0; j < v1.dimension_1 (); ++j) {
|
||||
a(i) = v1(i,j)*v2(j,i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Compute a dot product. This is used for result verification.
|
||||
struct dot {
|
||||
view_type a;
|
||||
dot (view_type a_) : a (a_) {}
|
||||
typedef double value_type; //Specify type for reduction target, lsum
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const view_type::size_type i, double &lsum) const {
|
||||
lsum += a(i)*a(i);
|
||||
}
|
||||
};
|
||||
|
||||
int main (int narg, char* arg[]) {
|
||||
// When initializing Kokkos, you may pass in command-line arguments,
|
||||
// just like with MPI_Init(). Kokkos reserves the right to remove
|
||||
// arguments from the list that start with '--kokkos-'.
|
||||
Kokkos::initialize (narg, arg);
|
||||
|
||||
int size = 10000;
|
||||
view_type a("A",size);
|
||||
|
||||
// Define two views with LayoutLeft and LayoutRight.
|
||||
left_type l("L",size,10000);
|
||||
right_type r("R",size,10000);
|
||||
|
||||
// Initialize the data in the views.
|
||||
Kokkos::parallel_for(size,init_view<left_type>(l));
|
||||
Kokkos::parallel_for(size,init_view<right_type>(r));
|
||||
Kokkos::fence();
|
||||
|
||||
// Measure time to execute the contraction kernel when giving it a
|
||||
// LayoutLeft view for v1 and a LayoutRight view for v2. This should be
|
||||
// fast on GPUs and slow on CPUs
|
||||
Kokkos::Impl::Timer time1;
|
||||
Kokkos::parallel_for(size,contraction<left_type,right_type>(a,l,r));
|
||||
Kokkos::fence();
|
||||
double sec1 = time1.seconds();
|
||||
|
||||
double sum1 = 0;
|
||||
Kokkos::parallel_reduce(size,dot(a),sum1);
|
||||
Kokkos::fence();
|
||||
|
||||
// Measure time to execute the contraction kernel when giving it a
|
||||
// LayoutRight view for v1 and a LayoutLeft view for v2. This should be
|
||||
// fast on CPUs and slow on GPUs
|
||||
Kokkos::Impl::Timer time2;
|
||||
Kokkos::parallel_for(size,contraction<right_type,left_type>(a,r,l));
|
||||
Kokkos::fence();
|
||||
double sec2 = time2.seconds();
|
||||
|
||||
double sum2 = 0;
|
||||
Kokkos::parallel_reduce(size,dot(a),sum2);
|
||||
|
||||
// Kokkos' reductions are deterministic.
|
||||
// The results should always be equal.
|
||||
printf("Result Left/Right %f Right/Left %f (equal result: %i)\n",sec1,sec2,sum2==sum1);
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,141 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
typedef Kokkos::View<double*> view_type;
|
||||
// Kokkos::Views have an MemoryTraits template parameter which
|
||||
// allows users to specify usage scenarios of a View.
|
||||
// Some of those act simply as hints, which can be used to insert
|
||||
// optimal load and store paths, others change the symantics of the
|
||||
// access. The trait Kokkos::Atomic is one of the latter. A view with
|
||||
// that MemoryTrait will perform any access atomicly (read, write, update).
|
||||
//
|
||||
// In this example we use a view with a usage hint for RandomAccess.
|
||||
// Kokkos::RandomAccess means that we expect to use this view
|
||||
// with indirect indexing.
|
||||
//
|
||||
// In CUDA, RandomAccess allows accesses through the texture
|
||||
// cache. This only works if the View is read-only, which we enforce
|
||||
// through the first template parameter.
|
||||
//
|
||||
// Note that we are still talking about views of the data, its not a new allocation.
|
||||
// For example you can have an atomic view of a default view. While you even
|
||||
// could use both in the same kernel, this could lead to undefined behaviour because
|
||||
// one of your access paths is not atomic. Think of it in the same way as you think of
|
||||
// pointers to const data and pointers to non-const data (i.e. const double* and double*).
|
||||
// While these pointers can point to the same data you should not use them together if that
|
||||
// brakes the const guarantee of the first pointer.
|
||||
typedef Kokkos::View<const double*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > view_type_rnd;
|
||||
typedef Kokkos::View<int**> idx_type;
|
||||
typedef idx_type::HostMirror idx_type_host;
|
||||
|
||||
// We template this functor on the ViewTypes to show the effect of the RandomAccess trait.
|
||||
template<class DestType, class SrcType>
|
||||
struct localsum {
|
||||
idx_type::const_type idx;
|
||||
DestType dest;
|
||||
SrcType src;
|
||||
localsum (idx_type idx_, DestType dest_, SrcType src_) :
|
||||
idx (idx_), dest (dest_), src (src_)
|
||||
{}
|
||||
|
||||
// Calculate a local sum of values
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int i) const {
|
||||
double tmp = 0.0;
|
||||
for (int j = 0; j < (int) idx.dimension_1 (); ++j) {
|
||||
// This is an indirect access on src
|
||||
const double val = src(idx(i,j));
|
||||
tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
|
||||
}
|
||||
dest(i) = tmp;
|
||||
}
|
||||
};
|
||||
|
||||
int main(int narg, char* arg[]) {
|
||||
Kokkos::initialize (narg, arg);
|
||||
|
||||
int size = 1000000;
|
||||
|
||||
idx_type idx("Idx",size,64);
|
||||
idx_type_host h_idx = Kokkos::create_mirror_view (idx);
|
||||
|
||||
view_type dest ("Dest", size);
|
||||
view_type src ("Src", size);
|
||||
|
||||
srand(134231);
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
for (view_type::size_type j = 0; j < h_idx.dimension_1 (); ++j) {
|
||||
h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size;
|
||||
}
|
||||
}
|
||||
|
||||
// Deep copy the initial data to the device
|
||||
Kokkos::deep_copy(idx,h_idx);
|
||||
// Run the first kernel to warmup caches
|
||||
Kokkos::parallel_for(size,localsum<view_type,view_type_rnd>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
|
||||
// Run the localsum functor using the RandomAccess trait. On CPUs there should
|
||||
// not be any different in performance to not using the RandomAccess trait.
|
||||
// On GPUs where can be a dramatic difference
|
||||
Kokkos::Impl::Timer time1;
|
||||
Kokkos::parallel_for(size,localsum<view_type,view_type_rnd>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec1 = time1.seconds();
|
||||
|
||||
Kokkos::Impl::Timer time2;
|
||||
Kokkos::parallel_for(size,localsum<view_type,view_type>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec2 = time2.seconds();
|
||||
|
||||
printf("Time with Trait RandomAccess: %f with Plain: %f \n",sec1,sec2);
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,190 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
// This example simulates one timestep of an explicit
|
||||
// finite-difference discretization of a time-dependent partial
|
||||
// differential equation (PDE). It shows how to take subviews of the
|
||||
// mesh in order to represent particular boundaries or the interior of
|
||||
// the mesh.
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutRight> mesh_type;
|
||||
|
||||
// These View types represent subviews of the mesh. Some of the Views
|
||||
// have layout LayoutStride, meaning that they have run-time "strides"
|
||||
// in each dimension which may differ from that dimension. For
|
||||
// example, inner_mesh_type (which represents the interior of the
|
||||
// mesh) has to skip over the boundaries when computing its stride;
|
||||
// the dimensions of the interior mesh differ from these strides. You
|
||||
// may safely always use a LayoutStride layout when taking a subview
|
||||
// of a LayoutRight or LayoutLeft subview, but strided accesses may
|
||||
// cost a bit more, especially for 1-D Views.
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutStride> xz_plane_type;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutRight> yz_plane_type;
|
||||
typedef Kokkos::View<double**, Kokkos::LayoutStride> xy_plane_type;
|
||||
typedef Kokkos::View<double***, Kokkos::LayoutStride> inner_mesh_type;
|
||||
|
||||
// Functor to set all entries of a boundary of the mesh to a constant
|
||||
// value. The functor is templated on ViewType because different
|
||||
// boundaries may have different layouts.
|
||||
template<class ViewType>
|
||||
struct set_boundary {
|
||||
ViewType a;
|
||||
double value;
|
||||
|
||||
set_boundary (ViewType a_, double value_) :
|
||||
a (a_), value (value_)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const typename ViewType::size_type i) const {
|
||||
for (typename ViewType::size_type j = 0; j < a.dimension_1 (); ++j) {
|
||||
a(i,j) = value;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Functor to set all entries of a boundary of the mesh to a constant
|
||||
// value. The functor is templated on ViewType because different
|
||||
// boundaries may have different layouts.
|
||||
template<class ViewType>
|
||||
struct set_inner {
|
||||
ViewType a;
|
||||
double value;
|
||||
|
||||
set_inner (ViewType a_, double value_) :
|
||||
a (a_), value (value_)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator () (const typename ViewType::size_type i) const {
|
||||
typedef typename ViewType::size_type size_type;
|
||||
for (size_type j = 0; j < a.dimension_1 (); ++j) {
|
||||
for (size_type k = 0; k < a.dimension_2 (); ++k) {
|
||||
a(i,j,k) = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Update the interior of the mesh. This simulates one timestep of a
|
||||
// finite-difference method.
|
||||
template<class ViewType>
|
||||
struct update {
|
||||
ViewType a;
|
||||
const double dt;
|
||||
|
||||
update (ViewType a_, const double dt_) :
|
||||
a (a_), dt (dt_)
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (typename ViewType::size_type i) const {
|
||||
typedef typename ViewType::size_type size_type;
|
||||
i++;
|
||||
for (size_type j = 1; j < a.dimension_1()-1; j++) {
|
||||
for (size_type k = 1; k < a.dimension_2()-1; k++) {
|
||||
a(i,j,k) += dt* (a(i,j,k+1) - a(i,j,k-1) +
|
||||
a(i,j+1,k) - a(i,j-1,k) +
|
||||
a(i+1,j,k) - a(i-1,j,k));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main (int narg, char* arg[]) {
|
||||
using Kokkos::ALL;
|
||||
using Kokkos::pair;
|
||||
using Kokkos::parallel_for;
|
||||
using Kokkos::subview;
|
||||
typedef mesh_type::size_type size_type;
|
||||
|
||||
Kokkos::initialize (narg, arg);
|
||||
|
||||
// The number of mesh points along each dimension of the mesh, not
|
||||
// including boundaries.
|
||||
const size_type size = 100;
|
||||
|
||||
// A is the full cubic 3-D mesh, including the boundaries.
|
||||
mesh_type A ("A", size+2, size+2, size+2);
|
||||
// Ai is the "inner" part of A, _not_ including the boundaries.
|
||||
//
|
||||
// A pair of indices in a particular dimension means the contiguous
|
||||
// zero-based index range in that dimension, including the first
|
||||
// entry of the pair but _not_ including the second entry.
|
||||
inner_mesh_type Ai = subview(A, pair<size_type, size_type> (1, size+1),
|
||||
pair<size_type, size_type> (1, size+1),
|
||||
pair<size_type, size_type> (1, size+1));
|
||||
// A has six boundaries, one for each face of the cube.
|
||||
// Create a View of each of these boundaries.
|
||||
// ALL() means "select all indices in that dimension."
|
||||
xy_plane_type Zneg_halo = subview(A, ALL (), ALL (), 0);
|
||||
xy_plane_type Zpos_halo = subview(A, ALL (), ALL (), 101);
|
||||
xz_plane_type Yneg_halo = subview(A, ALL (), 0, ALL ());
|
||||
xz_plane_type Ypos_halo = subview(A, ALL (), 101, ALL ());
|
||||
yz_plane_type Xneg_halo = subview(A, 0, ALL (), ALL ());
|
||||
yz_plane_type Xpos_halo = subview(A, 101, ALL (), ALL ());
|
||||
|
||||
// Set the boundaries to their initial conditions.
|
||||
parallel_for (Zneg_halo.dimension_0 (), set_boundary<xy_plane_type> (Zneg_halo, 1));
|
||||
parallel_for (Zpos_halo.dimension_0 (), set_boundary<xy_plane_type> (Zpos_halo, -1));
|
||||
parallel_for (Yneg_halo.dimension_0 (), set_boundary<xz_plane_type> (Yneg_halo, 2));
|
||||
parallel_for (Ypos_halo.dimension_0 (), set_boundary<xz_plane_type> (Ypos_halo, -2));
|
||||
parallel_for (Xneg_halo.dimension_0 (), set_boundary<yz_plane_type> (Xneg_halo, 3));
|
||||
parallel_for (Xpos_halo.dimension_0 (), set_boundary<yz_plane_type> (Xpos_halo, -3));
|
||||
|
||||
// Set the interior of the mesh to its initial condition.
|
||||
parallel_for (Ai.dimension_0 (), set_inner<inner_mesh_type> (Ai, 0));
|
||||
|
||||
// Update the interior of the mesh.
|
||||
// This simulates one timestep with dt = 0.1.
|
||||
parallel_for (Ai.dimension_0 (), update<mesh_type> (A, 0.1));
|
||||
|
||||
printf ("Done\n");
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,214 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
// DualView helps you manage data and computations that take place on
|
||||
// two different memory spaces. Examples include CUDA device memory
|
||||
// and (CPU) host memory (currently implemented), or Intel Knights
|
||||
// Landing MCDRAM and DRAM (not yet implemented). For example, if you
|
||||
// have ported only some parts of you application to run in CUDA,
|
||||
// DualView can help manage moving data between the parts of your
|
||||
// application that work best with CUDA, and the parts that work
|
||||
// better on the CPU.
|
||||
//
|
||||
// A DualView takes the same template parameters as a View, but
|
||||
// contains two Views: One that lives in the DualView's memory space,
|
||||
// and one that lives in that memory space's host mirror space. If
|
||||
// both memory spaces are the same, then the two Views just alias one
|
||||
// another. This means that you can use DualView all the time, even
|
||||
// when not running in a memory space like CUDA. DualView's
|
||||
// operations to help you manage memory take almost no time in that
|
||||
// case. This makes your code even more performance portable.
|
||||
|
||||
typedef Kokkos::DualView<double*> view_type;
|
||||
typedef Kokkos::DualView<int**> idx_type;
|
||||
|
||||
|
||||
template<class ExecutionSpace>
|
||||
struct localsum {
|
||||
// If the functor has a public 'execution_space' typedef, that defines
|
||||
// the functor's execution space (where it runs in parallel). This
|
||||
// overrides Kokkos' default execution space.
|
||||
typedef ExecutionSpace execution_space;
|
||||
|
||||
typedef typename Kokkos::Impl::if_c<Kokkos::Impl::is_same<ExecutionSpace,Kokkos::DefaultExecutionSpace>::value ,
|
||||
idx_type::memory_space, idx_type::host_mirror_space>::type memory_space;
|
||||
|
||||
// Get the view types on the particular device for which the functor
|
||||
// is instantiated.
|
||||
//
|
||||
// "const_data_type" is a typedef in View (and DualView) which is
|
||||
// the const version of the first template parameter of the View.
|
||||
// For example, the const_data_type version of double** is const
|
||||
// double**.
|
||||
Kokkos::View<idx_type::const_data_type, idx_type::array_layout, memory_space> idx;
|
||||
// "array_intrinsic_type" is a typedef in ViewTraits (and DualView) which is the
|
||||
// array version of the value(s) stored in the View.
|
||||
Kokkos::View<view_type::array_intrinsic_type, view_type::array_layout, memory_space> dest;
|
||||
Kokkos::View<view_type::const_data_type, view_type::array_layout,
|
||||
memory_space, Kokkos::MemoryRandomAccess> src;
|
||||
|
||||
// Constructor takes DualViews, synchronizes them to the device,
|
||||
// then marks them as modified on the device.
|
||||
localsum (idx_type dv_idx, view_type dv_dest, view_type dv_src)
|
||||
{
|
||||
// Extract the view on the correct Device (i.e., the correct
|
||||
// memory space) from the DualView. DualView has a template
|
||||
// method, view(), which is templated on the memory space. If the
|
||||
// DualView has a View from that memory space, view() returns the
|
||||
// View in that space.
|
||||
idx = dv_idx.view<memory_space> ();
|
||||
dest = dv_dest.template view<memory_space> ();
|
||||
src = dv_src.template view<memory_space> ();
|
||||
|
||||
// Synchronize the DualView to the correct Device.
|
||||
//
|
||||
// DualView's sync() method is templated on a memory space, and
|
||||
// synchronizes the DualView in a one-way fashion to that memory
|
||||
// space. "Synchronizing" means copying, from the other memory
|
||||
// space to the Device memory space. sync() does _nothing_ if the
|
||||
// Views on the two memory spaces are in sync. DualView
|
||||
// determines this by the user manually marking one side or the
|
||||
// other as modified; see the modify() call below.
|
||||
|
||||
dv_idx.sync<memory_space> ();
|
||||
dv_dest.template sync<memory_space> ();
|
||||
dv_src.template sync<memory_space> ();
|
||||
|
||||
// Mark dest as modified on Device.
|
||||
dv_dest.template modify<memory_space> ();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int i) const {
|
||||
double tmp = 0.0;
|
||||
for (int j = 0; j < (int) idx.dimension_1(); ++j) {
|
||||
const double val = src(idx(i,j));
|
||||
tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
|
||||
}
|
||||
dest(i) += tmp;
|
||||
}
|
||||
};
|
||||
|
||||
class ParticleType {
|
||||
public:
|
||||
double q;
|
||||
double m;
|
||||
double q_over_m;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
ParticleType(double q_ = -1, double m_ = 1):
|
||||
q(q_), m(m_), q_over_m(q/m) {}
|
||||
protected:
|
||||
};
|
||||
|
||||
typedef Kokkos::DualView<ParticleType[10]> ParticleTypes;
|
||||
int main (int narg, char* arg[]) {
|
||||
Kokkos::initialize (narg, arg);
|
||||
|
||||
ParticleTypes test("Test");
|
||||
Kokkos::fence();
|
||||
test.h_view(0) = ParticleType(-1e4,1);
|
||||
Kokkos::fence();
|
||||
|
||||
int size = 1000000;
|
||||
|
||||
// Create DualViews. This will allocate on both the device and its
|
||||
// host_mirror_device.
|
||||
idx_type idx ("Idx",size,64);
|
||||
view_type dest ("Dest",size);
|
||||
view_type src ("Src",size);
|
||||
|
||||
|
||||
srand (134231);
|
||||
|
||||
// Get a reference to the host view of idx directly (equivalent to
|
||||
// idx.view<idx_type::host_mirror_space>() )
|
||||
idx_type::t_host h_idx = idx.h_view;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
for (view_type::size_type j = 0; j < h_idx.dimension_1 (); ++j) {
|
||||
h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size;
|
||||
}
|
||||
}
|
||||
|
||||
// Mark idx as modified on the host_mirror_space so that a
|
||||
// sync to the device will actually move data. The sync happens in
|
||||
// the functor's constructor.
|
||||
idx.modify<idx_type::host_mirror_space> ();
|
||||
|
||||
// Run on the device. This will cause a sync of idx to the device,
|
||||
// since it was marked as modified on the host.
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec1_dev = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec2_dev = timer.seconds();
|
||||
|
||||
// Run on the host's default execution space (could be the same as device).
|
||||
// This will cause a sync back to the host of dest. Note that if the Device is CUDA,
|
||||
// the data layout will not be optimal on host, so performance is
|
||||
// lower than what it would be for a pure host compilation.
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec1_host = timer.seconds();
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec2_host = timer.seconds();
|
||||
|
||||
printf("Device Time with Sync: %f without Sync: %f \n",sec1_dev,sec2_dev);
|
||||
printf("Host Time with Sync: %f without Sync: %f \n",sec1_host,sec2_host);
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,134 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
typedef Kokkos::View<double*> view_type;
|
||||
typedef Kokkos::View<int**> idx_type;
|
||||
|
||||
|
||||
template<class Device>
|
||||
struct localsum {
|
||||
// Define the execution space for the functor (overrides the DefaultExecutionSpace)
|
||||
typedef Device execution_space;
|
||||
|
||||
// Get the view types on the particular device the functor is instantiated for
|
||||
idx_type::const_type idx;
|
||||
view_type dest;
|
||||
Kokkos::View<view_type::const_data_type, view_type::array_layout, view_type::execution_space, Kokkos::MemoryRandomAccess > src;
|
||||
|
||||
localsum(idx_type idx_, view_type dest_,
|
||||
view_type src_):idx(idx_),dest(dest_),src(src_) {
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i) const {
|
||||
double tmp = 0.0;
|
||||
for(int j = 0; j < idx.dimension_1(); j++) {
|
||||
const double val = src(idx(i,j));
|
||||
tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
|
||||
}
|
||||
dest(i) += tmp;
|
||||
}
|
||||
};
|
||||
|
||||
int main(int narg, char* arg[]) {
|
||||
Kokkos::initialize(narg,arg);
|
||||
|
||||
int size = 1000000;
|
||||
|
||||
// Create Views
|
||||
idx_type idx("Idx",size,64);
|
||||
view_type dest("Dest",size);
|
||||
view_type src("Src",size);
|
||||
|
||||
srand(134231);
|
||||
|
||||
// When using UVM Cuda views can be accessed on the Host directly
|
||||
for(int i=0; i<size; i++) {
|
||||
for(int j=0; j<idx.dimension_1(); j++)
|
||||
idx(i,j) = (size + i + (rand()%500 - 250))%size;
|
||||
}
|
||||
|
||||
Kokkos::fence();
|
||||
// Run on the device
|
||||
// This will cause a sync of idx to the device since it was modified on the host
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec1_dev = timer.seconds();
|
||||
|
||||
// No data transfer will happen now, since nothing is accessed on the host
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec2_dev = timer.seconds();
|
||||
|
||||
// Run on the host
|
||||
// This will cause a sync back to the host of dest which was changed on the device
|
||||
// Compare runtime here with the dual_view example: dest will be copied back in 4k blocks
|
||||
// when they are accessed the first time during the parallel_for. Due to the latency of a memcpy
|
||||
// this gives lower effective bandwidth when doing a manual copy via dual views
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec1_host = timer.seconds();
|
||||
|
||||
// No data transfers will happen now
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
|
||||
Kokkos::fence();
|
||||
double sec2_host = timer.seconds();
|
||||
|
||||
|
||||
|
||||
printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
|
||||
printf("Host Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3 --default-stream per-thread
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,148 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
#include <typeinfo>
|
||||
#include <cmath>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
struct FillDevice {
|
||||
double value;
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a;
|
||||
FillDevice(const double& val, const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a):
|
||||
value(val),a(d_a){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
a(i) = value;
|
||||
}
|
||||
};
|
||||
|
||||
struct ComputeADevice {
|
||||
int iter;
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a;
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> b;
|
||||
ComputeADevice(const int& iter_,
|
||||
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a,
|
||||
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_b):
|
||||
iter(iter_),a(d_a),b(d_b){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
for(int j=1;j<iter;j++) {
|
||||
a(i) += std::pow(b(i),1.0+1.0/iter);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct ComputeAHost {
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> a;
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> b;
|
||||
ComputeAHost( const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace>& d_a,
|
||||
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace>& d_b):
|
||||
a(d_a),b(d_b){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
a(i) += b(i);
|
||||
}
|
||||
};
|
||||
|
||||
struct MergeDevice {
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> a;
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> b;
|
||||
MergeDevice(
|
||||
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_a,
|
||||
const Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace>& d_b):
|
||||
a(d_a),b(d_b){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
a(i) += b(i);
|
||||
}
|
||||
};
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
int size = 100000000;
|
||||
Kokkos::initialize();
|
||||
int synch = atoi(argv[1]);
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_a("Device A",size);
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_b("Device B",size);
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaSpace> d_tmp("Device tmp",size);
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> h_a("Host A",size);
|
||||
Kokkos::View<double*,Kokkos::LayoutLeft,Kokkos::CudaHostPinnedSpace> h_b("Host B",size);
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(0.0,d_a));
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),FillDevice(1.3513,d_b));
|
||||
Kokkos::fence();
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),ComputeADevice(20,d_a,d_b));
|
||||
|
||||
if(synch==1)
|
||||
Kokkos::deep_copy(Kokkos::OpenMP(),h_b,d_b);
|
||||
if(synch==2)
|
||||
Kokkos::deep_copy(h_b,d_b);
|
||||
|
||||
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::OpenMP>(0,size),[=] (const int& i) {
|
||||
h_a(i) = 0.0;
|
||||
});
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::OpenMP>(0,size),ComputeAHost(h_a,h_b));
|
||||
Kokkos::OpenMP::fence();
|
||||
if(synch==1)
|
||||
Kokkos::deep_copy(Kokkos::OpenMP(), d_tmp,h_a);
|
||||
if(synch==2)
|
||||
Kokkos::deep_copy(d_tmp,h_a);
|
||||
Kokkos::fence();
|
||||
|
||||
std::cout << "Time " << timer.seconds() << std::endl;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::Cuda>(0,size),MergeDevice(d_a,d_tmp));
|
||||
|
||||
Kokkos::deep_copy(h_a,d_a);
|
||||
std::cout << "h_a(0): " << h_a(0) << " ( Correct: 27.4154 )" << std::endl;
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -1,84 +0,0 @@
|
||||
default:
|
||||
cd ./01_data_layouts; \
|
||||
make -j 4
|
||||
cd ./02_memory_traits; \
|
||||
make -j 4
|
||||
cd ./03_subviews; \
|
||||
make -j 4
|
||||
cd ./04_dualviews; \
|
||||
make -j 4
|
||||
cd ./05_NVIDIA_UVM; \
|
||||
make -j 4
|
||||
cd ./06_AtomicViews; \
|
||||
make -j 4
|
||||
|
||||
openmp:
|
||||
cd ./01_data_layouts; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./02_memory_traits; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./03_subviews; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./04_dualviews; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./05_NVIDIA_UVM; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./06_AtomicViews; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
|
||||
pthreads:
|
||||
cd ./01_data_layouts; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./02_memory_traits; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./03_subviews; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./04_dualviews; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./05_NVIDIA_UVM; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./06_AtomicViews; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
|
||||
serial:
|
||||
cd ./01_data_layouts; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./02_memory_traits; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./03_subviews; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./04_dualviews; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./05_NVIDIA_UVM; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./06_AtomicViews; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
|
||||
cuda:
|
||||
cd ./01_data_layouts; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./02_memory_traits; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./03_subviews; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./04_dualviews; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./05_NVIDIA_UVM; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./06_AtomicViews; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
|
||||
clean:
|
||||
cd ./01_data_layouts; \
|
||||
make clean
|
||||
cd ./02_memory_traits; \
|
||||
make clean
|
||||
cd ./03_subviews; \
|
||||
make clean
|
||||
cd ./04_dualviews; \
|
||||
make clean
|
||||
cd ./05_NVIDIA_UVM; \
|
||||
make clean
|
||||
cd ./06_AtomicViews; \
|
||||
make clean
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,152 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Random.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <cstdlib>
|
||||
|
||||
typedef Kokkos::HostSpace::execution_space DefaultHostType;
|
||||
|
||||
// Kokkos provides two different random number generators with a 64 bit and a 1024 bit state.
|
||||
// These generators are based on Vigna, Sebastiano (2014). "An experimental exploration of Marsaglia's xorshift generators, scrambled"
|
||||
// See: http://arxiv.org/abs/1402.6246
|
||||
// The generators can be used fully independently on each thread and have been tested to
|
||||
// produce good statistics for both inter and intra thread numbers.
|
||||
// Note that within a kernel NO random number operations are (team) collective operations.
|
||||
// Everything can be called within branches. This is a difference to the curand library where
|
||||
// certain operations are required to be called by all threads in a block.
|
||||
//
|
||||
// In Kokkos you are required to create a pool of generator states, so that threads can
|
||||
// grep their own. On CPU architectures the pool size is equal to the thread number,
|
||||
// on CUDA about 128k states are generated (enough to give every potentially simultaneously
|
||||
// running thread its own state). With a kernel a thread is required to aquire a state from the
|
||||
// pool and later return it.
|
||||
// On CPUs the Random number generator is deterministic if using the same number of threads.
|
||||
// On GPUs (i.e. using the CUDA backend it is not deterministic because threads aquire states via
|
||||
// atomics.
|
||||
|
||||
// A Functor for generating uint64_t random numbers templated on the GeneratorPool type
|
||||
template<class GeneratorPool>
|
||||
struct generate_random {
|
||||
|
||||
// The GeneratorPool
|
||||
GeneratorPool rand_pool;
|
||||
|
||||
// Output View for the random numbers
|
||||
Kokkos::View<uint64_t*> vals;
|
||||
int samples;
|
||||
|
||||
// Initialize all members
|
||||
generate_random(Kokkos::View<uint64_t*> vals_,
|
||||
GeneratorPool rand_pool_,
|
||||
int samples_):
|
||||
vals(vals_),rand_pool(rand_pool_),samples(samples_) {}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i) const {
|
||||
// Get a random number state from the pool for the active thread
|
||||
typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();
|
||||
|
||||
// Draw samples numbers from the pool as urand64 between 0 and rand_pool.MAX_URAND64
|
||||
// Note there are function calls to get other type of scalars, and also to specify
|
||||
// Ranges or get a normal distributed float.
|
||||
for(int k = 0;k<samples;k++)
|
||||
vals(i*samples+k) = rand_gen.urand64();
|
||||
|
||||
// Give the state back, which will allow another thread to aquire it
|
||||
rand_pool.free_state(rand_gen);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
int main(int argc, char* args[]) {
|
||||
if (argc != 3){
|
||||
printf("Please pass two integers on the command line\n");
|
||||
}
|
||||
else {
|
||||
|
||||
// Initialize Kokkos
|
||||
Kokkos::initialize(argc,args);
|
||||
int size = atoi(args[1]);
|
||||
int samples = atoi(args[2]);
|
||||
|
||||
// Create two random number generator pools one for 64bit states and one for 1024 bit states
|
||||
// Both take an 64 bit unsigned integer seed to initialize a Random_XorShift64 generator which
|
||||
// is used to fill the generators of the pool.
|
||||
Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
|
||||
Kokkos::Random_XorShift1024_Pool<> rand_pool1024(5374857);
|
||||
Kokkos::DualView<uint64_t*> vals("Vals",size*samples);
|
||||
|
||||
// Run some performance comparisons
|
||||
Kokkos::Impl::Timer timer;
|
||||
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples));
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples));
|
||||
Kokkos::fence();
|
||||
double time_64 = timer.seconds();
|
||||
|
||||
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples));
|
||||
Kokkos::fence();
|
||||
|
||||
timer.reset();
|
||||
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples));
|
||||
Kokkos::fence();
|
||||
double time_1024 = timer.seconds();
|
||||
|
||||
printf("#Time XorShift64*: %lf %lf\n",time_64,1.0e-9*samples*size/time_64 );
|
||||
printf("#Time XorShift1024*: %lf %lf\n",time_1024,1.0e-9*samples*size/time_1024 );
|
||||
|
||||
Kokkos::deep_copy(vals.h_view,vals.d_view);
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -1,24 +0,0 @@
|
||||
default:
|
||||
cd ./01_random_numbers; \
|
||||
make -j 4
|
||||
|
||||
openmp:
|
||||
cd ./01_random_numbers; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
|
||||
pthreads:
|
||||
cd ./01_random_numbers; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
|
||||
serial:
|
||||
cd ./01_random_numbers; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
|
||||
cuda:
|
||||
cd ./01_random_numbers; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
|
||||
clean:
|
||||
cd ./01_random_numbers; \
|
||||
make clean
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,94 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
// Using default execution space define a TeamPolicy and its member_type
|
||||
// The member_type is what the operator of a functor or Lambda gets, for
|
||||
// a simple RangePolicy the member_type is simply an integer
|
||||
// For a TeamPolicy its a much richer object, since it provides all information
|
||||
// to identify a thread uniquely and some team related function calls such as a
|
||||
// barrier (which will be used in a subsequent example).
|
||||
// A ThreadTeam consists of 1 to n threads where the maxmimum value of n is
|
||||
// determined by the hardware. On a dual socket CPU machine with 8 cores per socket
|
||||
// the maximum size of a team is 8. The number of teams (i.e. the league_size) is
|
||||
// not limited by physical constraints. Its a pure logical number.
|
||||
|
||||
typedef Kokkos::TeamPolicy<> team_policy ;
|
||||
typedef team_policy::member_type team_member ;
|
||||
|
||||
// Define a functor which can be launched using the TeamPolicy
|
||||
struct hello_world {
|
||||
typedef int value_type; //Specify value type for reduction target, sum
|
||||
|
||||
// This is a reduction operator which now takes as first argument the
|
||||
// TeamPolicy member_type. Every member of the team contributes to the
|
||||
// total sum.
|
||||
// It is helpful to think of this operator as a parallel region for a team
|
||||
// (i.e. every team member is active and will execute the code).
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() ( const team_member & thread, int& sum) const {
|
||||
sum+=1;
|
||||
// The TeamPolicy<>::member_type provides functions to query the multi
|
||||
// dimensional index of a thread as well as the number of thread-teams and the size
|
||||
// of each team.
|
||||
printf("Hello World: %i %i // %i %i\n",thread.league_rank(),thread.team_rank(),thread.league_size(),thread.team_size());
|
||||
}
|
||||
};
|
||||
|
||||
int main(int narg, char* args[]) {
|
||||
Kokkos::initialize(narg,args);
|
||||
|
||||
// Launch 12 teams of the maximum number of threads per team
|
||||
const team_policy policy( 12 , team_policy::team_size_max( hello_world() ) );
|
||||
|
||||
int sum = 0;
|
||||
Kokkos::parallel_reduce( policy , hello_world() , sum );
|
||||
|
||||
// The result will be 12*team_policy::team_size_max( hello_world())
|
||||
printf("Result %i\n",sum);
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,93 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
// Demonstrate a parallel reduction using thread teams (TeamPolicy).
|
||||
//
|
||||
// A thread team consists of 1 to n threads. The hardware determines
|
||||
// the maxmimum value of n. On a dual-socket CPU machine with 8 cores
|
||||
// per socket, the maximum size of a team is 8. The number of teams
|
||||
// (the league_size) is not limited by physical constraints (up to
|
||||
// some reasonable bound, which eventually depends upon the hardware
|
||||
// and programming model implementation).
|
||||
|
||||
int main (int narg, char* args[]) {
|
||||
using Kokkos::parallel_reduce;
|
||||
typedef Kokkos::TeamPolicy<> team_policy;
|
||||
typedef typename team_policy::member_type team_member;
|
||||
|
||||
Kokkos::initialize (narg, args);
|
||||
|
||||
// Set up a policy that launches 12 teams, with the maximum number
|
||||
// of threads per team.
|
||||
const team_policy policy (12, team_policy::team_size_max ( [=]{} ));
|
||||
|
||||
// This is a reduction with a team policy. The team policy changes
|
||||
// the first argument of the lambda. Rather than an integer index
|
||||
// (as with RangePolicy), it's now TeamPolicy::member_type. This
|
||||
// object provides all information to identify a thread uniquely.
|
||||
// It also provides some team-related function calls such as a team
|
||||
// barrier (which a subsequent example will use).
|
||||
//
|
||||
// Every member of the team contributes to the total sum. It is
|
||||
// helpful to think of the lambda's body as a "team parallel
|
||||
// region." That is, every team member is active and will execute
|
||||
// the body of the lambda.
|
||||
int sum = 0;
|
||||
parallel_reduce (policy, KOKKOS_LAMBDA (const team_member& thread, int& lsum) {
|
||||
lsum += 1;
|
||||
// TeamPolicy<>::member_type provides functions to query the
|
||||
// multidimensional index of a thread, as well as the number of
|
||||
// thread teams and the size of each team.
|
||||
printf ("Hello World: %i %i // %i %i\n", thread.league_rank (),
|
||||
thread.team_rank (), thread.league_size (), thread.team_size ());
|
||||
}, sum);
|
||||
|
||||
// The result will be 12*team_policy::team_size_max([=]{})
|
||||
printf ("Result %i\n",sum);
|
||||
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,89 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
// See 01_thread_teams for an explanation of a basic TeamPolicy
|
||||
typedef Kokkos::TeamPolicy<> team_policy ;
|
||||
typedef typename team_policy::member_type team_member ;
|
||||
|
||||
struct hello_world {
|
||||
typedef int value_type; //Specify value type for reduction target, sum
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() ( const team_member & thread, int& sum) const {
|
||||
sum+=1;
|
||||
// When using the TeamPolicy Kokkos allows for nested parallel loops.
|
||||
// All three Kokkos parallel patterns are allowed (for, reduce, scan) and they
|
||||
// largely follow the same syntax as on the global level.
|
||||
// The execution policy for the Thread level nesting (the Vector level is in the next
|
||||
// tutorial example) is Kokkos::TeamThreadRange. This means the loop will be executed
|
||||
// by all members of the team and the loop count will be split between threads of the
|
||||
// team. Its arguments are the team_member, and a loop count.
|
||||
// Not every thread will do the same amount of iterations. On a GPU for example with
|
||||
// a team_size() larger than 31 only the first 31 threads would actually do anything.
|
||||
// On a CPU with 8 threads 7 would execute 4 loop iterations, and 1 thread would do
|
||||
// 3. Note also that the mode of splitting the count is architecture dependent similar
|
||||
// to what the RangePolicy on a global level does.
|
||||
// The call itself is not guaranteed to be synchronous. Also keep in mind that the
|
||||
// operator using a team_policy acts like a parallel region for the team. That means
|
||||
// that everything outside of the nested parallel_for is also executed by all threads
|
||||
// of the team.
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,31), [&] (const int& i) {
|
||||
printf("Hello World: (%i , %i) executed loop %i \n",thread.league_rank(),thread.team_rank(),i);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
int main(int narg, char* args[]) {
|
||||
Kokkos::initialize(narg,args);
|
||||
|
||||
// Launch 3 teams of the maximum number of threads per team
|
||||
const team_policy policy( 3 , team_policy::team_size_max( hello_world() ) );
|
||||
|
||||
int sum = 0;
|
||||
Kokkos::parallel_reduce( policy , hello_world() , sum );
|
||||
printf("Result %i\n",sum);
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,162 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_Random.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
#ifdef KOKKOS_HAVE_CXX11
|
||||
|
||||
// The TeamPolicy actually supports 3D parallelism: Teams, Threads, Vector
|
||||
// Kokkos::parallel_{for/reduce/scan} calls can be completely free nested.
|
||||
// The execution policies for the nested layers are TeamThreadRange and
|
||||
// ThreadVectorRange.
|
||||
// The only restriction on nesting is that a given level can only be nested in a
|
||||
// higher one. e.g. a ThreadVectorRange can be nested inside a TeamPolicy operator
|
||||
// and inside a TeamThreadRange, but you can not nest a ThreadVectorRange or a
|
||||
// TeamThreadRange inside another ThreadVectorRange.
|
||||
// As with the 2D execution of TeamPolicy the operator has to be considered as
|
||||
// a parallel region even with respect to VectorLanes. That means even outside
|
||||
// a TeamThread or VectorThread loop all threads of a team and all vector lanes
|
||||
// of a thread execute every line of the operator as long as there are no restricitons
|
||||
// on them.
|
||||
// Code lines can be restricted using Kokkos::single to either execute once PerThread
|
||||
// or execute once PerTeam.
|
||||
typedef typename Kokkos::TeamPolicy<>::member_type team_member ;
|
||||
|
||||
struct SomeCorrelation {
|
||||
typedef int value_type; //Specify value type for reduction target, sum
|
||||
typedef Kokkos::DefaultExecutionSpace::scratch_memory_space shared_space;
|
||||
typedef Kokkos::View<int*,shared_space,Kokkos::MemoryUnmanaged> shared_1d_int;
|
||||
|
||||
Kokkos::View<const int***,Kokkos::LayoutRight> data;
|
||||
Kokkos::View<int> gsum;
|
||||
|
||||
SomeCorrelation(Kokkos::View<int***,Kokkos::LayoutRight> data_in,
|
||||
Kokkos::View<int> sum):data(data_in),gsum(sum){}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() ( const team_member & thread) const {
|
||||
int i = thread.league_rank();
|
||||
|
||||
// Allocate a shared array for the team.
|
||||
shared_1d_int count(thread.team_shmem(),data.dimension_1());
|
||||
|
||||
// With each team run a parallel_for with its threads
|
||||
Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,data.dimension_1()), [=] (const int& j) {
|
||||
int tsum;
|
||||
// Run a vector loop reduction over the inner dimension of data
|
||||
// Count how many values are multiples of 4
|
||||
// Every vector lane gets the same reduction value (tsum) back, it is broadcast to all vector lanes
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(thread,data.dimension_2()), [=] (const int& k, int & vsum) {
|
||||
vsum+= (data(i,j,k) % 4 == 0)?1:0;
|
||||
},tsum);
|
||||
|
||||
// Make sure only one vector lane adds the reduction value to the shared array, i.e. execute
|
||||
// the next line only once PerThread
|
||||
Kokkos::single(Kokkos::PerThread(thread),[=] () {
|
||||
count(j) = tsum;
|
||||
});
|
||||
});
|
||||
|
||||
// Wait for all threads to finish the parallel_for so that all shared memory writes are done
|
||||
thread.team_barrier();
|
||||
|
||||
// Check with one vector lane from each thread how many consecutive
|
||||
// data segments have the same number of values divisible by 4
|
||||
// The team reduction value is again broadcast to every team member (and every vector lane)
|
||||
int team_sum = 0;
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(thread, data.dimension_1()-1), [=] (const int& j, int& thread_sum) {
|
||||
// It is not valid to directly add to thread_sum
|
||||
// Use a single function with broadcast instead
|
||||
// team_sum will be used as input to the operator (i.e. it is used to initialize sum)
|
||||
// the end value of sum will be broadcast to all vector lanes in the thread.
|
||||
Kokkos::single(Kokkos::PerThread(thread),[=] (int& sum) {
|
||||
if(count(j)==count(j+1)) sum++;
|
||||
},thread_sum);
|
||||
},team_sum);
|
||||
|
||||
// Add with one thread and vectorlane of the team the team_sum to the global value
|
||||
Kokkos::single(Kokkos::PerTeam(thread),[=] () {
|
||||
Kokkos::atomic_add(&gsum(),team_sum);
|
||||
});
|
||||
}
|
||||
|
||||
// The functor needs to define how much shared memory it requests given a team_size.
|
||||
size_t team_shmem_size( int team_size ) const {
|
||||
return shared_1d_int::shmem_size(data.dimension_1());
|
||||
}
|
||||
};
|
||||
|
||||
int main(int narg, char* args[]) {
|
||||
Kokkos::initialize(narg,args);
|
||||
|
||||
// Produce some 3D random data (see Algorithms/01_random_numbers for more info)
|
||||
Kokkos::View<int***,Kokkos::LayoutRight> data("Data",512,512,32);
|
||||
Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
|
||||
Kokkos::fill_random(data,rand_pool64,100);
|
||||
|
||||
// A global value to put the result in
|
||||
Kokkos::View<int> gsum("Sum");
|
||||
|
||||
// Each team handles a slice of the data
|
||||
// Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes.
|
||||
// The team_size_max function will determine the maximum number of threads taking into account
|
||||
// shared memory requirements of the Functor.
|
||||
// The maximum vector length is hardware dependent but can always be smaller than the hardware allows.
|
||||
// The vector length must be a power of 2.
|
||||
const Kokkos::TeamPolicy<> policy( 512 , Kokkos::TeamPolicy<>::team_size_max(SomeCorrelation(data,gsum)) , 16);
|
||||
|
||||
Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) );
|
||||
|
||||
Kokkos::fence();
|
||||
|
||||
// Copy result value back
|
||||
int sum = 0;
|
||||
Kokkos::deep_copy(sum,gsum);
|
||||
printf("Result %i\n",sum);
|
||||
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
#endif //KOKKOS_HAVE_CXX11
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,141 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
typedef Kokkos::DefaultExecutionSpace Device ;
|
||||
typedef Kokkos::HostSpace::execution_space Host ;
|
||||
|
||||
typedef Kokkos::TeamPolicy< Device > team_policy ;
|
||||
typedef team_policy::member_type team_member ;
|
||||
|
||||
static const int TEAM_SIZE = 16 ;
|
||||
|
||||
struct find_2_tuples {
|
||||
int chunk_size;
|
||||
Kokkos::View<const int*> data;
|
||||
Kokkos::View<int**> histogram;
|
||||
|
||||
find_2_tuples(int chunk_size_, Kokkos::DualView<int*> data_,
|
||||
Kokkos::DualView<int**> histogram_):chunk_size(chunk_size_),
|
||||
data(data_.d_view),histogram(histogram_.d_view) {
|
||||
data_.sync<Device>();
|
||||
histogram_.sync<Device>();
|
||||
histogram_.modify<Device>();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() ( const team_member & dev) const {
|
||||
Kokkos::View<int**,Kokkos::MemoryUnmanaged> l_histogram(dev.team_shmem(),TEAM_SIZE,TEAM_SIZE);
|
||||
Kokkos::View<int*,Kokkos::MemoryUnmanaged> l_data(dev.team_shmem(),chunk_size+1);
|
||||
|
||||
const int i = dev.league_rank() * chunk_size;
|
||||
for(int j = dev.team_rank(); j<chunk_size+1; j+=dev.team_size())
|
||||
l_data(j) = data(i+j);
|
||||
|
||||
for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size())
|
||||
for(int l = 0; l < TEAM_SIZE; l++)
|
||||
l_histogram(k,l) = 0;
|
||||
dev.team_barrier();
|
||||
|
||||
for(int j = 0; j<chunk_size; j++) {
|
||||
for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size())
|
||||
for(int l = 0; l < TEAM_SIZE; l++) {
|
||||
if((l_data(j) == k) && (l_data(j+1)==l))
|
||||
l_histogram(k,l)++;
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = dev.team_rank(); k < TEAM_SIZE; k+=dev.team_size())
|
||||
for(int l = 0; l < TEAM_SIZE; l++) {
|
||||
Kokkos::atomic_fetch_add(&histogram(k,l),l_histogram(k,l));
|
||||
}
|
||||
dev.team_barrier();
|
||||
}
|
||||
size_t team_shmem_size( int team_size ) const { return sizeof(int)*(chunk_size+2 + team_size * team_size ); }
|
||||
};
|
||||
|
||||
int main(int narg, char* args[]) {
|
||||
Kokkos::initialize(narg,args);
|
||||
|
||||
int chunk_size = 1024;
|
||||
int nchunks = 100000; //1024*1024;
|
||||
Kokkos::DualView<int*> data("data",nchunks*chunk_size+1);
|
||||
|
||||
srand(1231093);
|
||||
|
||||
for(int i = 0; i < (int) data.dimension_0(); i++) {
|
||||
data.h_view(i) = rand()%TEAM_SIZE;
|
||||
}
|
||||
data.modify<Host>();
|
||||
data.sync<Device>();
|
||||
|
||||
Kokkos::DualView<int**> histogram("histogram",TEAM_SIZE,TEAM_SIZE);
|
||||
|
||||
|
||||
Kokkos::Impl::Timer timer;
|
||||
// threads/team is automatically limited to maximum supported by the device.
|
||||
Kokkos::parallel_for( team_policy( nchunks , TEAM_SIZE )
|
||||
, find_2_tuples(chunk_size,data,histogram) );
|
||||
Kokkos::fence();
|
||||
double time = timer.seconds();
|
||||
|
||||
histogram.sync<Host>();
|
||||
|
||||
printf("Time: %f \n\n",time);
|
||||
int sum = 0;
|
||||
for(int k=0; k<TEAM_SIZE; k++) {
|
||||
for(int l=0; l<TEAM_SIZE; l++) {
|
||||
printf("%i ",histogram.h_view(k,l));
|
||||
sum += histogram.h_view(k,l);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("Result: %i %i\n",sum,chunk_size*nchunks);
|
||||
Kokkos::finalize();
|
||||
}
|
||||
|
||||
@ -1,72 +0,0 @@
|
||||
default:
|
||||
cd ./01_thread_teams; \
|
||||
make -j 4
|
||||
cd ./01_thread_teams_lambda; \
|
||||
make -j 4
|
||||
cd ./02_nested_parallel_for; \
|
||||
make -j 4
|
||||
cd ./03_vectorization; \
|
||||
make -j 4
|
||||
cd ./04_team_scan; \
|
||||
make -j 4
|
||||
|
||||
openmp:
|
||||
cd ./01_thread_teams; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./01_thread_teams_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./02_nested_parallel_for; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./03_vectorization; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./04_team_scan; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
|
||||
pthreads:
|
||||
cd ./01_thread_teams; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./01_thread_teams_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./02_nested_parallel_for; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./03_vectorization; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./04_team_scan; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
|
||||
serial:
|
||||
cd ./01_thread_teams; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./01_thread_teams_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./02_nested_parallel_for; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./03_vectorization; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./04_team_scan; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
|
||||
cuda:
|
||||
cd ./01_thread_teams; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./01_thread_teams_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./02_nested_parallel_for; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./03_vectorization; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./04_team_scan; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
|
||||
clean:
|
||||
cd ./01_thread_teams; \
|
||||
make clean
|
||||
cd ./01_thread_teams_lambda; \
|
||||
make clean
|
||||
cd ./02_nested_parallel_for; \
|
||||
make clean
|
||||
cd ./03_vectorization; \
|
||||
make clean
|
||||
cd ./04_team_scan; \
|
||||
make clean
|
||||
|
||||
@ -1,144 +0,0 @@
|
||||
default:
|
||||
cd ./01_hello_world; \
|
||||
make -j 4
|
||||
cd ./01_hello_world_lambda; \
|
||||
make -j 4
|
||||
cd ./02_simple_reduce; \
|
||||
make -j 4
|
||||
cd ./02_simple_reduce_lambda; \
|
||||
make -j 4
|
||||
cd ./03_simple_view; \
|
||||
make -j 4
|
||||
cd ./03_simple_view_lambda; \
|
||||
make -j 4
|
||||
cd ./04_simple_memoryspaces; \
|
||||
make -j 4
|
||||
cd ./05_simple_atomics; \
|
||||
make -j 4
|
||||
cd ./Advanced_Views; \
|
||||
make -j 4
|
||||
cd ./Algorithms; \
|
||||
make -j 4
|
||||
cd ./Hierarchical_Parallelism; \
|
||||
make -j 4
|
||||
|
||||
openmp:
|
||||
cd ./01_hello_world; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./01_hello_world_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./02_simple_reduce; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./02_simple_reduce_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./03_simple_view; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./03_simple_view_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./04_simple_memoryspaces; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./05_simple_atomics; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./Advanced_Views; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./Algorithms; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
cd ./Hierarchical_Parallelism; \
|
||||
make -j 4 KOKKOS_DEVICES=OpenMP
|
||||
|
||||
pthreads:
|
||||
cd ./01_hello_world; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./01_hello_world_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./02_simple_reduce; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./02_simple_reduce_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./03_simple_view; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./03_simple_view_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./04_simple_memoryspaces; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./05_simple_atomics; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./Advanced_Views; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./Algorithms; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
cd ./Hierarchical_Parallelism; \
|
||||
make -j 4 KOKKOS_DEVICES=Pthreads
|
||||
|
||||
serial:
|
||||
cd ./01_hello_world; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./01_hello_world_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./02_simple_reduce; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./02_simple_reduce_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./03_simple_view; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./03_simple_view_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./04_simple_memoryspaces; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./05_simple_atomics; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./Advanced_Views; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./Algorithms; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
cd ./Hierarchical_Parallelism; \
|
||||
make -j 4 KOKKOS_DEVICES=Serial
|
||||
|
||||
cuda:
|
||||
cd ./01_hello_world; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./01_hello_world_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./02_simple_reduce; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./02_simple_reduce_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./03_simple_view; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./03_simple_view_lambda; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./04_simple_memoryspaces; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./05_simple_atomics; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./Advanced_Views; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./Algorithms; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
cd ./Hierarchical_Parallelism; \
|
||||
make -j 4 KOKKOS_DEVICES=Cuda,Serial
|
||||
|
||||
clean:
|
||||
cd ./01_hello_world; \
|
||||
make clean
|
||||
cd ./01_hello_world_lambda; \
|
||||
make clean
|
||||
cd ./02_simple_reduce; \
|
||||
make clean
|
||||
cd ./02_simple_reduce_lambda; \
|
||||
make clean
|
||||
cd ./03_simple_view; \
|
||||
make clean
|
||||
cd ./03_simple_view_lambda; \
|
||||
make clean
|
||||
cd ./04_simple_memoryspaces; \
|
||||
make clean
|
||||
cd ./05_simple_atomics; \
|
||||
make clean
|
||||
cd ./Advanced_Views; \
|
||||
make clean
|
||||
cd ./Algorithms; \
|
||||
make clean
|
||||
cd ./Hierarchical_Parallelism; \
|
||||
make clean
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
Build the examples by typing in each directory:
|
||||
make -j 16
|
||||
|
||||
To specify a target device:
|
||||
make openmp -j 16
|
||||
make pthreads -j 16
|
||||
make serial -j 16
|
||||
make cuda -j 16
|
||||
|
||||
The lambda variants can not be build with CUDA=yes at the moment, since
|
||||
CUDA does not support lambdas from the host.
|
||||
Some of the advanced topics try to highlight performance impacts by timing
|
||||
different variants of doing the same thing.
|
||||
Also some of the advanced topics (in particular hierarchical parallelism)
|
||||
require C++11 even with out using host side lambdas. CUDA 6.5 can be used
|
||||
to compile those.
|
||||
|
||||
Reference in New Issue
Block a user