git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14371 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-12-15 22:29:37 +00:00
parent 06a217aa08
commit b5a1ba9bfa
411 changed files with 133413 additions and 0 deletions

View File

@ -0,0 +1,43 @@
KOKKOS_PATH = ../../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -0,0 +1,152 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <cstdlib>
typedef Kokkos::HostSpace::execution_space DefaultHostType;
// Kokkos provides two different random number generators with a 64 bit and a 1024 bit state.
// These generators are based on Vigna, Sebastiano (2014). "An experimental exploration of Marsaglia's xorshift generators, scrambled"
// See: http://arxiv.org/abs/1402.6246
// The generators can be used fully independently on each thread and have been tested to
// produce good statistics for both inter and intra thread numbers.
// Note that within a kernel NO random number operations are (team) collective operations.
// Everything can be called within branches. This is a difference to the curand library where
// certain operations are required to be called by all threads in a block.
//
// In Kokkos you are required to create a pool of generator states, so that threads can
// grep their own. On CPU architectures the pool size is equal to the thread number,
// on CUDA about 128k states are generated (enough to give every potentially simultaneously
// running thread its own state). With a kernel a thread is required to aquire a state from the
// pool and later return it.
// On CPUs the Random number generator is deterministic if using the same number of threads.
// On GPUs (i.e. using the CUDA backend it is not deterministic because threads aquire states via
// atomics.
// A Functor for generating uint64_t random numbers templated on the GeneratorPool type
template<class GeneratorPool>
struct generate_random {
// The GeneratorPool
GeneratorPool rand_pool;
// Output View for the random numbers
Kokkos::View<uint64_t*> vals;
int samples;
// Initialize all members
generate_random(Kokkos::View<uint64_t*> vals_,
GeneratorPool rand_pool_,
int samples_):
vals(vals_),rand_pool(rand_pool_),samples(samples_) {}
KOKKOS_INLINE_FUNCTION
void operator() (int i) const {
// Get a random number state from the pool for the active thread
typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();
// Draw samples numbers from the pool as urand64 between 0 and rand_pool.MAX_URAND64
// Note there are function calls to get other type of scalars, and also to specify
// Ranges or get a normal distributed float.
for(int k = 0;k<samples;k++)
vals(i*samples+k) = rand_gen.urand64();
// Give the state back, which will allow another thread to aquire it
rand_pool.free_state(rand_gen);
}
};
int main(int argc, char* args[]) {
if (argc != 3){
printf("Please pass two integers on the command line\n");
}
else {
// Initialize Kokkos
Kokkos::initialize(argc,args);
int size = atoi(args[1]);
int samples = atoi(args[2]);
// Create two random number generator pools one for 64bit states and one for 1024 bit states
// Both take an 64 bit unsigned integer seed to initialize a Random_XorShift64 generator which
// is used to fill the generators of the pool.
Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857);
Kokkos::Random_XorShift1024_Pool<> rand_pool1024(5374857);
Kokkos::DualView<uint64_t*> vals("Vals",size*samples);
// Run some performance comparisons
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples));
Kokkos::fence();
timer.reset();
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples));
Kokkos::fence();
double time_64 = timer.seconds();
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples));
Kokkos::fence();
timer.reset();
Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples));
Kokkos::fence();
double time_1024 = timer.seconds();
printf("#Time XorShift64*: %lf %lf\n",time_64,1.0e-9*samples*size/time_64 );
printf("#Time XorShift1024*: %lf %lf\n",time_1024,1.0e-9*samples*size/time_1024 );
Kokkos::deep_copy(vals.h_view,vals.d_view);
Kokkos::finalize();
}
return 0;
}

View File

@ -0,0 +1,24 @@
default:
cd ./01_random_numbers; \
make -j 4
openmp:
cd ./01_random_numbers; \
make -j 4 KOKKOS_DEVICES=OpenMP
pthreads:
cd ./01_random_numbers; \
make -j 4 KOKKOS_DEVICES=Pthreads
serial:
cd ./01_random_numbers; \
make -j 4 KOKKOS_DEVICES=Serial
cuda:
cd ./01_random_numbers; \
make -j 4 KOKKOS_DEVICES=Cuda,Serial
clean:
cd ./01_random_numbers; \
make clean