Updating Kokkos lib
This commit is contained in:
@ -1,37 +1,42 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
KOKKOS_SRC_PATH = ${KOKKOS_PATH}
|
||||
SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/02_simple_reduce_lambda/*.cpp)
|
||||
vpath %.cpp $(sort $(dir $(SRC)))
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = ../../../config/nvcc_wrapper
|
||||
CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
EXE = 02_simple_reduce_lambda.cuda
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
KOKKOS_CUDA_OPTIONS += "enable_lambda"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
EXE = 02_simple_reduce_lambda.host
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
OBJ = $(notdir $(SRC:.cpp=.o))
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
test: $(EXE)
|
||||
./$(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
@ -41,4 +46,4 @@ clean: kokkos-clean
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@)
|
||||
|
||||
@ -67,9 +67,13 @@ int main (int argc, char* argv[]) {
|
||||
int sum = 0;
|
||||
// The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=].
|
||||
// It also handles any other syntax needed for CUDA.
|
||||
// We also need to protect the usage of a lambda against compiling
|
||||
// with a backend which doesn't support it (i.e. Cuda 6.5/7.0).
|
||||
#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
|
||||
Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) {
|
||||
lsum += i*i;
|
||||
}, sum);
|
||||
#endif
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed in parallel, is %i\n", n - 1, sum);
|
||||
|
||||
@ -81,6 +85,10 @@ int main (int argc, char* argv[]) {
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed sequentially, is %i\n", n - 1, seqSum);
|
||||
Kokkos::finalize ();
|
||||
#if (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA)
|
||||
return (sum == seqSum) ? 0 : -1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user