Update to Kokkos r2.04.04 and add workaround for performance regression
This commit is contained in:
@ -1265,6 +1265,243 @@ void Random_XorShift1024_Pool<Kokkos::Cuda>::free_state(const Random_XorShift102
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCM)
|
||||
|
||||
template<>
|
||||
class Random_XorShift1024<Kokkos::Experimental::ROCm> {
|
||||
private:
|
||||
int p_;
|
||||
const int state_idx_;
|
||||
uint64_t* state_;
|
||||
const int stride_;
|
||||
friend class Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>;
|
||||
public:
|
||||
|
||||
typedef Kokkos::Experimental::ROCm device_type;
|
||||
typedef Random_XorShift1024_Pool<device_type> pool_type;
|
||||
|
||||
enum {MAX_URAND = 0xffffffffU};
|
||||
enum {MAX_URAND64 = 0xffffffffffffffffULL-1};
|
||||
enum {MAX_RAND = static_cast<int>(0xffffffffU/2)};
|
||||
enum {MAX_RAND64 = static_cast<int64_t>(0xffffffffffffffffULL/2-1)};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0):
|
||||
p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint32_t urand() {
|
||||
uint64_t state_0 = state_[ p_ * stride_ ];
|
||||
uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ];
|
||||
state_1 ^= state_1 << 31;
|
||||
state_1 ^= state_1 >> 11;
|
||||
state_0 ^= state_0 >> 30;
|
||||
uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL;
|
||||
tmp = tmp>>16;
|
||||
return static_cast<uint32_t>(tmp&MAX_URAND);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint64_t urand64() {
|
||||
uint64_t state_0 = state_[ p_ * stride_ ];
|
||||
uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ];
|
||||
state_1 ^= state_1 << 31;
|
||||
state_1 ^= state_1 >> 11;
|
||||
state_0 ^= state_0 >> 30;
|
||||
return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint32_t urand(const uint32_t& range) {
|
||||
const uint32_t max_val = (MAX_URAND/range)*range;
|
||||
uint32_t tmp = urand();
|
||||
while(tmp>=max_val)
|
||||
urand();
|
||||
return tmp%range;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint32_t urand(const uint32_t& start, const uint32_t& end ) {
|
||||
return urand(end-start)+start;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint64_t urand64(const uint64_t& range) {
|
||||
const uint64_t max_val = (MAX_URAND64/range)*range;
|
||||
uint64_t tmp = urand64();
|
||||
while(tmp>=max_val)
|
||||
urand64();
|
||||
return tmp%range;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
uint64_t urand64(const uint64_t& start, const uint64_t& end ) {
|
||||
return urand64(end-start)+start;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int rand() {
|
||||
return static_cast<int>(urand()/2);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int rand(const int& range) {
|
||||
const int max_val = (MAX_RAND/range)*range;
|
||||
int tmp = rand();
|
||||
while(tmp>=max_val)
|
||||
rand();
|
||||
return tmp%range;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int rand(const int& start, const int& end ) {
|
||||
return rand(end-start)+start;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int64_t rand64() {
|
||||
return static_cast<int64_t>(urand64()/2);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int64_t rand64(const int64_t& range) {
|
||||
const int64_t max_val = (MAX_RAND64/range)*range;
|
||||
int64_t tmp = rand64();
|
||||
while(tmp>=max_val)
|
||||
rand64();
|
||||
return tmp%range;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int64_t rand64(const int64_t& start, const int64_t& end ) {
|
||||
return rand64(end-start)+start;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float frand() {
|
||||
return 1.0f * urand64()/MAX_URAND64;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float frand(const float& range) {
|
||||
return range * urand64()/MAX_URAND64;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float frand(const float& start, const float& end ) {
|
||||
return frand(end-start)+start;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double drand() {
|
||||
return 1.0 * urand64()/MAX_URAND64;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double drand(const double& range) {
|
||||
return range * urand64()/MAX_URAND64;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double drand(const double& start, const double& end ) {
|
||||
return frand(end-start)+start;
|
||||
}
|
||||
|
||||
//Marsaglia polar method for drawing a standard normal distributed random number
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal() {
|
||||
double S = 2.0;
|
||||
double U;
|
||||
while(S>=1.0) {
|
||||
U = 2.0*drand() - 1.0;
|
||||
const double V = 2.0*drand() - 1.0;
|
||||
S = U*U+V*V;
|
||||
}
|
||||
return U*std::sqrt(-2.0*log(S)/S);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double normal(const double& mean, const double& std_dev=1.0) {
|
||||
return mean + normal()*std_dev;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
inline
|
||||
Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) {
|
||||
num_states_ = 0;
|
||||
init(seed,4*32768);
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift64<Kokkos::Experimental::ROCm> Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::get_state() const {
|
||||
#ifdef __HCC_ACCELERATOR__
|
||||
const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z;
|
||||
int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) *
|
||||
blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_;
|
||||
while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) {
|
||||
i+=blockDim_x*blockDim_y*blockDim_z;
|
||||
if(i>=num_states_) {i = i_offset;}
|
||||
}
|
||||
|
||||
return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(i),i);
|
||||
#else
|
||||
return Random_XorShift64<Kokkos::Experimental::ROCm>(state_(0),0);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void Random_XorShift64_Pool<Kokkos::Experimental::ROCm>::free_state(const Random_XorShift64<Kokkos::Experimental::ROCm> &state) const {
|
||||
#ifdef __HCC_ACCELERATOR__
|
||||
state_(state.state_idx_) = state.state_;
|
||||
locks_(state.state_idx_) = 0;
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
inline
|
||||
Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) {
|
||||
num_states_ = 0;
|
||||
init(seed,4*32768);
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift1024<Kokkos::Experimental::ROCm> Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::get_state() const {
|
||||
#ifdef __HCC_ACCELERATOR__
|
||||
const int i_offset = (threadIdx_x*blockDim_y + threadIdx_y)*blockDim_z+threadIdx_z;
|
||||
int i = (((blockIdx_x*gridDim_y+blockIdx_y)*gridDim_z + blockIdx_z) *
|
||||
blockDim_x*blockDim_y*blockDim_z + i_offset)%num_states_;
|
||||
while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) {
|
||||
i+=blockDim_x*blockDim_y*blockDim_z;
|
||||
if(i>=num_states_) {i = i_offset;}
|
||||
}
|
||||
|
||||
return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(i), i);
|
||||
#else
|
||||
return Random_XorShift1024<Kokkos::Experimental::ROCm>(state_, p_(0), 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void Random_XorShift1024_Pool<Kokkos::Experimental::ROCm>::free_state(const Random_XorShift1024<Kokkos::Experimental::ROCm> &state) const {
|
||||
#ifdef __HCC_ACCELERATOR__
|
||||
for(int i=0; i<16; i++)
|
||||
state_(state.state_idx_,i) = state.state_[i];
|
||||
locks_(state.state_idx_) = 0;
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
@ -30,6 +30,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
TEST_TARGETS += test-cuda
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
OBJ_ROCM = TestROCm.o UnitTestMain.o gtest-all.o
|
||||
TARGETS += KokkosAlgorithms_UnitTest_ROCm
|
||||
TEST_TARGETS += test-rocm
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o
|
||||
TARGETS += KokkosAlgorithms_UnitTest_Threads
|
||||
@ -51,6 +57,9 @@ endif
|
||||
KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Cuda
|
||||
|
||||
KokkosAlgorithms_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_ROCm
|
||||
|
||||
KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Threads
|
||||
|
||||
@ -63,6 +72,9 @@ KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
|
||||
test-cuda: KokkosAlgorithms_UnitTest_Cuda
|
||||
./KokkosAlgorithms_UnitTest_Cuda
|
||||
|
||||
test-rocm: KokkosAlgorithms_UnitTest_ROCm
|
||||
./KokkosAlgorithms_UnitTest_ROCm
|
||||
|
||||
test-threads: KokkosAlgorithms_UnitTest_Threads
|
||||
./KokkosAlgorithms_UnitTest_Threads
|
||||
|
||||
|
||||
112
lib/kokkos/algorithms/unit_tests/TestROCm.cpp
Normal file
112
lib/kokkos/algorithms/unit_tests/TestROCm.cpp
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#ifdef KOKKOS_ENABLE_ROCM
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <TestRandom.hpp>
|
||||
#include <TestSort.hpp>
|
||||
|
||||
namespace Test {
|
||||
|
||||
class rocm : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
std::cout << std::setprecision(5) << std::scientific;
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice(0) );
|
||||
}
|
||||
static void TearDownTestCase()
|
||||
{
|
||||
Kokkos::Experimental::ROCm::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
};
|
||||
|
||||
void rocm_test_random_xorshift64( int num_draws )
|
||||
{
|
||||
Impl::test_random<Kokkos::Random_XorShift64_Pool<Kokkos::Experimental::ROCm> >(num_draws);
|
||||
}
|
||||
|
||||
void rocm_test_random_xorshift1024( int num_draws )
|
||||
{
|
||||
Impl::test_random<Kokkos::Random_XorShift1024_Pool<Kokkos::Experimental::ROCm> >(num_draws);
|
||||
}
|
||||
|
||||
|
||||
#define ROCM_RANDOM_XORSHIFT64( num_draws ) \
|
||||
TEST_F( rocm, Random_XorShift64 ) { \
|
||||
rocm_test_random_xorshift64(num_draws); \
|
||||
}
|
||||
|
||||
#define ROCM_RANDOM_XORSHIFT1024( num_draws ) \
|
||||
TEST_F( rocm, Random_XorShift1024 ) { \
|
||||
rocm_test_random_xorshift1024(num_draws); \
|
||||
}
|
||||
|
||||
#define ROCM_SORT_UNSIGNED( size ) \
|
||||
TEST_F( rocm, SortUnsigned ) { \
|
||||
Impl::test_sort< Kokkos::Experimental::ROCm, unsigned >(size); \
|
||||
}
|
||||
|
||||
ROCM_RANDOM_XORSHIFT64( 132141141 )
|
||||
ROCM_RANDOM_XORSHIFT1024( 52428813 )
|
||||
ROCM_SORT_UNSIGNED(171)
|
||||
|
||||
#undef ROCM_RANDOM_XORSHIFT64
|
||||
#undef ROCM_RANDOM_XORSHIFT1024
|
||||
#undef ROCM_SORT_UNSIGNED
|
||||
}
|
||||
#else
|
||||
void KOKKOS_ALGORITHMS_UNITTESTS_TESTROCM_PREVENT_LINK_ERROR() {}
|
||||
#endif /* #ifdef KOKKOS_ENABLE_ROCM */
|
||||
|
||||
Reference in New Issue
Block a user