Update Kokkos library in LAMMPS to v3.5.0
This commit is contained in:
@ -447,6 +447,25 @@ struct rand<Generator, unsigned long long> {
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
|
||||
template <class Generator>
|
||||
struct rand<Generator, Kokkos::Experimental::half_t> {
|
||||
using half = Kokkos::Experimental::half_t;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static half max() { return half(1.0); }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static half draw(Generator& gen) { return half(gen.frand()); }
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static half draw(Generator& gen, const half& range) {
|
||||
return half(gen.frand(float(range)));
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static half draw(Generator& gen, const half& start, const half& end) {
|
||||
return half(gen.frand(float(start), float(end)));
|
||||
}
|
||||
};
|
||||
#endif // defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT
|
||||
|
||||
template <class Generator>
|
||||
struct rand<Generator, float> {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -600,7 +619,7 @@ struct Random_XorShift1024_UseCArrayState<Kokkos::Experimental::OpenMPTarget>
|
||||
|
||||
template <class ExecutionSpace>
|
||||
struct Random_UniqueIndex {
|
||||
using locks_view_type = View<int*, ExecutionSpace>;
|
||||
using locks_view_type = View<int**, ExecutionSpace>;
|
||||
KOKKOS_FUNCTION
|
||||
static int get_state_idx(const locks_view_type) {
|
||||
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
|
||||
@ -615,7 +634,7 @@ struct Random_UniqueIndex {
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
template <>
|
||||
struct Random_UniqueIndex<Kokkos::Cuda> {
|
||||
using locks_view_type = View<int*, Kokkos::Cuda>;
|
||||
using locks_view_type = View<int**, Kokkos::Cuda>;
|
||||
KOKKOS_FUNCTION
|
||||
static int get_state_idx(const locks_view_type& locks_) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
@ -625,7 +644,7 @@ struct Random_UniqueIndex<Kokkos::Cuda> {
|
||||
blockDim.x * blockDim.y * blockDim.z +
|
||||
i_offset) %
|
||||
locks_.extent(0);
|
||||
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
|
||||
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
|
||||
i += blockDim.x * blockDim.y * blockDim.z;
|
||||
if (i >= static_cast<int>(locks_.extent(0))) {
|
||||
i = i_offset;
|
||||
@ -643,7 +662,7 @@ struct Random_UniqueIndex<Kokkos::Cuda> {
|
||||
#ifdef KOKKOS_ENABLE_HIP
|
||||
template <>
|
||||
struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
|
||||
using locks_view_type = View<int*, Kokkos::Experimental::HIP>;
|
||||
using locks_view_type = View<int**, Kokkos::Experimental::HIP>;
|
||||
KOKKOS_FUNCTION
|
||||
static int get_state_idx(const locks_view_type& locks_) {
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
@ -653,7 +672,7 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
|
||||
blockDim.x * blockDim.y * blockDim.z +
|
||||
i_offset) %
|
||||
locks_.extent(0);
|
||||
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
|
||||
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
|
||||
i += blockDim.x * blockDim.y * blockDim.z;
|
||||
if (i >= static_cast<int>(locks_.extent(0))) {
|
||||
i = i_offset;
|
||||
@ -671,15 +690,15 @@ struct Random_UniqueIndex<Kokkos::Experimental::HIP> {
|
||||
#ifdef KOKKOS_ENABLE_SYCL
|
||||
template <>
|
||||
struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
|
||||
using locks_view_type = View<int*, Kokkos::Experimental::SYCL>;
|
||||
using locks_view_type = View<int**, Kokkos::Experimental::SYCL>;
|
||||
KOKKOS_FUNCTION
|
||||
static int get_state_idx(const locks_view_type& locks_) {
|
||||
#ifdef KOKKOS_ARCH_INTEL_GEN
|
||||
#ifdef KOKKOS_ARCH_INTEL_GPU
|
||||
int i = Kokkos::Impl::clock_tic() % locks_.extent(0);
|
||||
#else
|
||||
int i = 0;
|
||||
#endif
|
||||
while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) {
|
||||
while (Kokkos::atomic_compare_exchange(&locks_(i, 0), 0, 1)) {
|
||||
i = (i + 1) % static_cast<int>(locks_.extent(0));
|
||||
}
|
||||
return i;
|
||||
@ -690,14 +709,14 @@ struct Random_UniqueIndex<Kokkos::Experimental::SYCL> {
|
||||
#ifdef KOKKOS_ENABLE_OPENMPTARGET
|
||||
template <>
|
||||
struct Random_UniqueIndex<Kokkos::Experimental::OpenMPTarget> {
|
||||
using locks_view_type = View<int*, Kokkos::Experimental::OpenMPTarget>;
|
||||
using locks_view_type = View<int**, Kokkos::Experimental::OpenMPTarget>;
|
||||
KOKKOS_FUNCTION
|
||||
static int get_state_idx(const locks_view_type& locks) {
|
||||
const int team_size = omp_get_num_threads();
|
||||
int i = omp_get_team_num() * team_size + omp_get_thread_num();
|
||||
const int lock_size = locks.extent_int(0);
|
||||
|
||||
while (Kokkos::atomic_compare_exchange(&locks(i), 0, 1)) {
|
||||
while (Kokkos::atomic_compare_exchange(&locks(i, 0), 0, 1)) {
|
||||
i = (i + 1) % lock_size;
|
||||
}
|
||||
return i;
|
||||
@ -856,18 +875,22 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
|
||||
class Random_XorShift64_Pool {
|
||||
private:
|
||||
using execution_space = typename DeviceType::execution_space;
|
||||
using locks_type = View<int*, execution_space>;
|
||||
using state_data_type = View<uint64_t*, DeviceType>;
|
||||
using locks_type = View<int**, execution_space>;
|
||||
using state_data_type = View<uint64_t**, DeviceType>;
|
||||
locks_type locks_;
|
||||
state_data_type state_;
|
||||
int num_states_;
|
||||
int padding_;
|
||||
|
||||
public:
|
||||
using generator_type = Random_XorShift64<DeviceType>;
|
||||
using device_type = DeviceType;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift64_Pool() { num_states_ = 0; }
|
||||
Random_XorShift64_Pool() {
|
||||
num_states_ = 0;
|
||||
padding_ = 0;
|
||||
}
|
||||
Random_XorShift64_Pool(uint64_t seed) {
|
||||
num_states_ = 0;
|
||||
|
||||
@ -883,16 +906,22 @@ class Random_XorShift64_Pool {
|
||||
locks_ = src.locks_;
|
||||
state_ = src.state_;
|
||||
num_states_ = src.num_states_;
|
||||
padding_ = src.padding_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void init(uint64_t seed, int num_states) {
|
||||
if (seed == 0) seed = uint64_t(1318319);
|
||||
|
||||
// I only want to pad on CPU like archs (less than 1000 threads). 64 is a
|
||||
// magic number, or random number I just wanted something not too large and
|
||||
// not too small. 64 sounded fine.
|
||||
padding_ = num_states < 1000 ? 64 : 1;
|
||||
num_states_ = num_states;
|
||||
|
||||
locks_ = locks_type("Kokkos::Random_XorShift64::locks", num_states_);
|
||||
state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_);
|
||||
locks_ =
|
||||
locks_type("Kokkos::Random_XorShift64::locks", num_states, padding_);
|
||||
state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_,
|
||||
padding_);
|
||||
|
||||
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
|
||||
typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
|
||||
@ -902,15 +931,15 @@ class Random_XorShift64_Pool {
|
||||
gen(seed, 0);
|
||||
for (int i = 0; i < 17; i++) gen.rand();
|
||||
for (int i = 0; i < num_states_; i++) {
|
||||
int n1 = gen.rand();
|
||||
int n2 = gen.rand();
|
||||
int n3 = gen.rand();
|
||||
int n4 = gen.rand();
|
||||
h_state(i) = (((static_cast<uint64_t>(n1)) & 0xffff) << 00) |
|
||||
(((static_cast<uint64_t>(n2)) & 0xffff) << 16) |
|
||||
(((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
|
||||
(((static_cast<uint64_t>(n4)) & 0xffff) << 48);
|
||||
h_lock(i) = 0;
|
||||
int n1 = gen.rand();
|
||||
int n2 = gen.rand();
|
||||
int n3 = gen.rand();
|
||||
int n4 = gen.rand();
|
||||
h_state(i, 0) = (((static_cast<uint64_t>(n1)) & 0xffff) << 00) |
|
||||
(((static_cast<uint64_t>(n2)) & 0xffff) << 16) |
|
||||
(((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
|
||||
(((static_cast<uint64_t>(n4)) & 0xffff) << 48);
|
||||
h_lock(i, 0) = 0;
|
||||
}
|
||||
deep_copy(state_, h_state);
|
||||
deep_copy(locks_, h_lock);
|
||||
@ -920,19 +949,19 @@ class Random_XorShift64_Pool {
|
||||
Random_XorShift64<DeviceType> get_state() const {
|
||||
const int i =
|
||||
Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
|
||||
return Random_XorShift64<DeviceType>(state_(i), i);
|
||||
return Random_XorShift64<DeviceType>(state_(i, 0), i);
|
||||
}
|
||||
|
||||
// NOTE: state_idx MUST be unique and less than num_states
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift64<DeviceType> get_state(const int state_idx) const {
|
||||
return Random_XorShift64<DeviceType>(state_(state_idx), state_idx);
|
||||
return Random_XorShift64<DeviceType>(state_(state_idx, 0), state_idx);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void free_state(const Random_XorShift64<DeviceType>& state) const {
|
||||
state_(state.state_idx_) = state.state_;
|
||||
locks_(state.state_idx_) = 0;
|
||||
state_(state.state_idx_, 0) = state.state_;
|
||||
locks_(state.state_idx_, 0) = 0;
|
||||
}
|
||||
};
|
||||
|
||||
@ -1092,14 +1121,15 @@ template <class DeviceType = Kokkos::DefaultExecutionSpace>
|
||||
class Random_XorShift1024_Pool {
|
||||
private:
|
||||
using execution_space = typename DeviceType::execution_space;
|
||||
using locks_type = View<int*, execution_space>;
|
||||
using int_view_type = View<int*, DeviceType>;
|
||||
using locks_type = View<int**, execution_space>;
|
||||
using int_view_type = View<int**, DeviceType>;
|
||||
using state_data_type = View<uint64_t * [16], DeviceType>;
|
||||
|
||||
locks_type locks_;
|
||||
state_data_type state_;
|
||||
int_view_type p_;
|
||||
int num_states_;
|
||||
int padding_;
|
||||
friend class Random_XorShift1024<DeviceType>;
|
||||
|
||||
public:
|
||||
@ -1129,15 +1159,21 @@ class Random_XorShift1024_Pool {
|
||||
state_ = src.state_;
|
||||
p_ = src.p_;
|
||||
num_states_ = src.num_states_;
|
||||
padding_ = src.padding_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline void init(uint64_t seed, int num_states) {
|
||||
if (seed == 0) seed = uint64_t(1318319);
|
||||
// I only want to pad on CPU like archs (less than 1000 threads). 64 is a
|
||||
// magic number, or random number I just wanted something not too large and
|
||||
// not too small. 64 sounded fine.
|
||||
padding_ = num_states < 1000 ? 64 : 1;
|
||||
num_states_ = num_states;
|
||||
locks_ = locks_type("Kokkos::Random_XorShift1024::locks", num_states_);
|
||||
locks_ =
|
||||
locks_type("Kokkos::Random_XorShift1024::locks", num_states_, padding_);
|
||||
state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_);
|
||||
p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_);
|
||||
p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_, padding_);
|
||||
|
||||
typename state_data_type::HostMirror h_state = create_mirror_view(state_);
|
||||
typename locks_type::HostMirror h_lock = create_mirror_view(locks_);
|
||||
@ -1158,8 +1194,8 @@ class Random_XorShift1024_Pool {
|
||||
(((static_cast<uint64_t>(n3)) & 0xffff) << 32) |
|
||||
(((static_cast<uint64_t>(n4)) & 0xffff) << 48);
|
||||
}
|
||||
h_p(i) = 0;
|
||||
h_lock(i) = 0;
|
||||
h_p(i, 0) = 0;
|
||||
h_lock(i, 0) = 0;
|
||||
}
|
||||
deep_copy(state_, h_state);
|
||||
deep_copy(locks_, h_lock);
|
||||
@ -1169,20 +1205,20 @@ class Random_XorShift1024_Pool {
|
||||
Random_XorShift1024<DeviceType> get_state() const {
|
||||
const int i =
|
||||
Impl::Random_UniqueIndex<execution_space>::get_state_idx(locks_);
|
||||
return Random_XorShift1024<DeviceType>(state_, p_(i), i);
|
||||
return Random_XorShift1024<DeviceType>(state_, p_(i, 0), i);
|
||||
};
|
||||
|
||||
// NOTE: state_idx MUST be unique and less than num_states
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift1024<DeviceType> get_state(const int state_idx) const {
|
||||
return Random_XorShift1024<DeviceType>(state_, p_(state_idx), state_idx);
|
||||
return Random_XorShift1024<DeviceType>(state_, p_(state_idx, 0), state_idx);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void free_state(const Random_XorShift1024<DeviceType>& state) const {
|
||||
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
|
||||
p_(state.state_idx_) = state.p_;
|
||||
locks_(state.state_idx_) = 0;
|
||||
p_(state.state_idx_, 0) = state.p_;
|
||||
locks_(state.state_idx_, 0) = 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user