Merge pull request #3329 from stanmoore1/kk_update_3.6.1
Update Kokkos library in LAMMPS to v3.6.1
This commit is contained in:
@ -47,8 +47,8 @@ if(DOWNLOAD_KOKKOS)
|
|||||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
|
||||||
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.6.00.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.6.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
|
||||||
set(KOKKOS_MD5 "b5c44ea961031795f434002cd7b31c20" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
set(KOKKOS_MD5 "0ec97fc0c356dd65bd2487defe81a7bf" CACHE STRING "MD5 checksum of KOKKOS tarball")
|
||||||
mark_as_advanced(KOKKOS_URL)
|
mark_as_advanced(KOKKOS_URL)
|
||||||
mark_as_advanced(KOKKOS_MD5)
|
mark_as_advanced(KOKKOS_MD5)
|
||||||
ExternalProject_Add(kokkos_build
|
ExternalProject_Add(kokkos_build
|
||||||
@ -72,7 +72,7 @@ if(DOWNLOAD_KOKKOS)
|
|||||||
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
|
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
|
||||||
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
|
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
|
||||||
elseif(EXTERNAL_KOKKOS)
|
elseif(EXTERNAL_KOKKOS)
|
||||||
find_package(Kokkos 3.6.00 REQUIRED CONFIG)
|
find_package(Kokkos 3.6.01 REQUIRED CONFIG)
|
||||||
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
|
||||||
target_link_libraries(lmp PRIVATE Kokkos::kokkos)
|
target_link_libraries(lmp PRIVATE Kokkos::kokkos)
|
||||||
else()
|
else()
|
||||||
|
|||||||
@ -1,5 +1,21 @@
|
|||||||
# Change Log
|
# Change Log
|
||||||
|
|
||||||
|
## [3.6.01](https://github.com/kokkos/kokkos/tree/3.6.01) (2022-05-23)
|
||||||
|
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.6.00...3.6.01)
|
||||||
|
|
||||||
|
### Bug Fixes:
|
||||||
|
- Fix Threads: Fix serial resizing scratch space (3.6.01 cherry-pick) [\#5109](https://github.com/kokkos/kokkos/pull/5109)
|
||||||
|
- Fix ScatterMin/ScatterMax to use proper atomics (3.6.01 cherry-pick) [\#5046](https://github.com/kokkos/kokkos/pull/5046)
|
||||||
|
- Fix allocating large Views [\#4907](https://github.com/kokkos/kokkos/pull/4907)
|
||||||
|
- Fix bounds errors with Kokkos::sort [\#4980](https://github.com/kokkos/kokkos/pull/4980)
|
||||||
|
- Fix HIP version when printing the configuration [\#4872](https://github.com/kokkos/kokkos/pull/4872)
|
||||||
|
- Fixed `_CUDA_ARCH__` to `__CUDA_ARCH__` for CUDA LDG [\#4893](https://github.com/kokkos/kokkos/pull/4893)
|
||||||
|
- Fixed an incorrect struct initialization [\#5028](https://github.com/kokkos/kokkos/pull/5028)
|
||||||
|
- Fix racing condition in `HIPParallelLaunch` [\#5008](https://github.com/kokkos/kokkos/pull/5008)
|
||||||
|
- Avoid deprecation warnings with `OpenMPExec::validate_partition` [\#4982](https://github.com/kokkos/kokkos/pull/4982)
|
||||||
|
- Make View self-assignment not produce double-free [\#5024](https://github.com/kokkos/kokkos/pull/5024)
|
||||||
|
|
||||||
|
|
||||||
## [3.6.00](https://github.com/kokkos/kokkos/tree/3.6.00) (2022-02-18)
|
## [3.6.00](https://github.com/kokkos/kokkos/tree/3.6.00) (2022-02-18)
|
||||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.5.00...3.6.00)
|
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.5.00...3.6.00)
|
||||||
|
|
||||||
|
|||||||
@ -136,7 +136,7 @@ ENDIF()
|
|||||||
|
|
||||||
set(Kokkos_VERSION_MAJOR 3)
|
set(Kokkos_VERSION_MAJOR 3)
|
||||||
set(Kokkos_VERSION_MINOR 6)
|
set(Kokkos_VERSION_MINOR 6)
|
||||||
set(Kokkos_VERSION_PATCH 00)
|
set(Kokkos_VERSION_PATCH 01)
|
||||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||||
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
||||||
|
|
||||||
|
|||||||
@ -12,7 +12,7 @@ endif
|
|||||||
|
|
||||||
KOKKOS_VERSION_MAJOR = 3
|
KOKKOS_VERSION_MAJOR = 3
|
||||||
KOKKOS_VERSION_MINOR = 6
|
KOKKOS_VERSION_MINOR = 6
|
||||||
KOKKOS_VERSION_PATCH = 00
|
KOKKOS_VERSION_PATCH = 01
|
||||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||||
|
|
||||||
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
|
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
|
||||||
|
|||||||
@ -422,54 +422,34 @@ class BinSort {
|
|||||||
|
|
||||||
template <class KeyViewType>
|
template <class KeyViewType>
|
||||||
struct BinOp1D {
|
struct BinOp1D {
|
||||||
int max_bins_;
|
int max_bins_ = {};
|
||||||
double mul_;
|
double mul_ = {};
|
||||||
typename KeyViewType::const_value_type range_;
|
double min_ = {};
|
||||||
typename KeyViewType::const_value_type min_;
|
|
||||||
|
|
||||||
BinOp1D()
|
BinOp1D() = default;
|
||||||
: max_bins_(0),
|
|
||||||
mul_(0.0),
|
|
||||||
range_(typename KeyViewType::const_value_type()),
|
|
||||||
min_(typename KeyViewType::const_value_type()) {}
|
|
||||||
|
|
||||||
// Construct BinOp with number of bins, minimum value and maxuimum value
|
// Construct BinOp with number of bins, minimum value and maxuimum value
|
||||||
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
|
||||||
typename KeyViewType::const_value_type max)
|
typename KeyViewType::const_value_type max)
|
||||||
: max_bins_(max_bins__ + 1),
|
: max_bins_(max_bins__ + 1),
|
||||||
// Cast to int64_t to avoid possible overflow when using integer
|
// Cast to double to avoid possible overflow when using integer
|
||||||
mul_(std::is_integral<typename KeyViewType::const_value_type>::value
|
mul_(static_cast<double>(max_bins__) /
|
||||||
? 1.0 * max_bins__ / (int64_t(max) - int64_t(min))
|
(static_cast<double>(max) - static_cast<double>(min))),
|
||||||
: 1.0 * max_bins__ / (max - min)),
|
min_(static_cast<double>(min)) {
|
||||||
range_(max - min),
|
|
||||||
min_(min) {
|
|
||||||
// For integral types the number of bins may be larger than the range
|
// For integral types the number of bins may be larger than the range
|
||||||
// in which case we can exactly have one unique value per bin
|
// in which case we can exactly have one unique value per bin
|
||||||
// and then don't need to sort bins.
|
// and then don't need to sort bins.
|
||||||
if (std::is_integral<typename KeyViewType::const_value_type>::value &&
|
if (std::is_integral<typename KeyViewType::const_value_type>::value &&
|
||||||
static_cast<uint64_t>(range_) <= static_cast<uint64_t>(max_bins__)) {
|
(static_cast<double>(max) - static_cast<double>(min)) <=
|
||||||
|
static_cast<double>(max_bins__)) {
|
||||||
mul_ = 1.;
|
mul_ = 1.;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine bin index from key value
|
// Determine bin index from key value
|
||||||
template <
|
template <class ViewType>
|
||||||
class ViewType,
|
|
||||||
std::enable_if_t<!std::is_integral<typename ViewType::value_type>::value,
|
|
||||||
bool> = true>
|
|
||||||
KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
|
KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
|
||||||
return int(mul_ * (keys(i) - min_));
|
return static_cast<int>(mul_ * (static_cast<double>(keys(i)) - min_));
|
||||||
}
|
|
||||||
|
|
||||||
// Determine bin index from key value
|
|
||||||
template <
|
|
||||||
class ViewType,
|
|
||||||
std::enable_if_t<std::is_integral<typename ViewType::value_type>::value,
|
|
||||||
bool> = true>
|
|
||||||
KOKKOS_INLINE_FUNCTION int bin(ViewType& keys, const int& i) const {
|
|
||||||
// The cast to int64_t is necessary because otherwise HIP returns the wrong
|
|
||||||
// result.
|
|
||||||
return int(mul_ * (int64_t(keys(i)) - int64_t(min_)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return maximum bin index + 1
|
// Return maximum bin index + 1
|
||||||
@ -486,10 +466,9 @@ struct BinOp1D {
|
|||||||
|
|
||||||
template <class KeyViewType>
|
template <class KeyViewType>
|
||||||
struct BinOp3D {
|
struct BinOp3D {
|
||||||
int max_bins_[3];
|
int max_bins_[3] = {};
|
||||||
double mul_[3];
|
double mul_[3] = {};
|
||||||
typename KeyViewType::non_const_value_type range_[3];
|
double min_[3] = {};
|
||||||
typename KeyViewType::non_const_value_type min_[3];
|
|
||||||
|
|
||||||
BinOp3D() = default;
|
BinOp3D() = default;
|
||||||
|
|
||||||
@ -498,15 +477,15 @@ struct BinOp3D {
|
|||||||
max_bins_[0] = max_bins__[0];
|
max_bins_[0] = max_bins__[0];
|
||||||
max_bins_[1] = max_bins__[1];
|
max_bins_[1] = max_bins__[1];
|
||||||
max_bins_[2] = max_bins__[2];
|
max_bins_[2] = max_bins__[2];
|
||||||
mul_[0] = 1.0 * max_bins__[0] / (max[0] - min[0]);
|
mul_[0] = static_cast<double>(max_bins__[0]) /
|
||||||
mul_[1] = 1.0 * max_bins__[1] / (max[1] - min[1]);
|
(static_cast<double>(max[0]) - static_cast<double>(min[0]));
|
||||||
mul_[2] = 1.0 * max_bins__[2] / (max[2] - min[2]);
|
mul_[1] = static_cast<double>(max_bins__[1]) /
|
||||||
range_[0] = max[0] - min[0];
|
(static_cast<double>(max[1]) - static_cast<double>(min[1]));
|
||||||
range_[1] = max[1] - min[1];
|
mul_[2] = static_cast<double>(max_bins__[2]) /
|
||||||
range_[2] = max[2] - min[2];
|
(static_cast<double>(max[2]) - static_cast<double>(min[2]));
|
||||||
min_[0] = min[0];
|
min_[0] = static_cast<double>(min[0]);
|
||||||
min_[1] = min[1];
|
min_[1] = static_cast<double>(min[1]);
|
||||||
min_[2] = min[2];
|
min_[2] = static_cast<double>(min[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class ViewType>
|
template <class ViewType>
|
||||||
@ -596,9 +575,9 @@ std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
|
|||||||
// TODO: figure out better max_bins then this ...
|
// TODO: figure out better max_bins then this ...
|
||||||
int64_t max_bins = view.extent(0) / 2;
|
int64_t max_bins = view.extent(0) / 2;
|
||||||
if (std::is_integral<typename ViewType::non_const_value_type>::value) {
|
if (std::is_integral<typename ViewType::non_const_value_type>::value) {
|
||||||
// Cast to int64_t to avoid possible overflow when using integer
|
// Cast to double to avoid possible overflow when using integer
|
||||||
int64_t const max_val = result.max_val;
|
auto const max_val = static_cast<double>(result.max_val);
|
||||||
int64_t const min_val = result.min_val;
|
auto const min_val = static_cast<double>(result.min_val);
|
||||||
// using 10M as the cutoff for special behavior (roughly 40MB for the count
|
// using 10M as the cutoff for special behavior (roughly 40MB for the count
|
||||||
// array)
|
// array)
|
||||||
if ((max_val - min_val) < 10000000) {
|
if ((max_val - min_val) < 10000000) {
|
||||||
@ -606,6 +585,10 @@ std::enable_if_t<Kokkos::is_execution_space<ExecutionSpace>::value> sort(
|
|||||||
sort_in_bins = false;
|
sort_in_bins = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (std::is_floating_point<typename ViewType::non_const_value_type>::value) {
|
||||||
|
KOKKOS_ASSERT(std::isfinite(static_cast<double>(result.max_val) -
|
||||||
|
static_cast<double>(result.min_val)));
|
||||||
|
}
|
||||||
|
|
||||||
BinSort<ViewType, CompType> bin_sort(
|
BinSort<ViewType, CompType> bin_sort(
|
||||||
view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins);
|
view, CompType(max_bins, result.min_val, result.max_val), sort_in_bins);
|
||||||
|
|||||||
@ -353,6 +353,55 @@ void test_issue_1160_impl() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class ExecutionSpace>
|
||||||
|
void test_issue_4978_impl() {
|
||||||
|
Kokkos::View<long long*, ExecutionSpace> element_("element", 9);
|
||||||
|
|
||||||
|
auto h_element = Kokkos::create_mirror_view(element_);
|
||||||
|
|
||||||
|
h_element(0) = LLONG_MIN;
|
||||||
|
h_element(1) = 0;
|
||||||
|
h_element(2) = 3;
|
||||||
|
h_element(3) = 2;
|
||||||
|
h_element(4) = 1;
|
||||||
|
h_element(5) = 3;
|
||||||
|
h_element(6) = 6;
|
||||||
|
h_element(7) = 4;
|
||||||
|
h_element(8) = 3;
|
||||||
|
|
||||||
|
ExecutionSpace exec;
|
||||||
|
Kokkos::deep_copy(exec, element_, h_element);
|
||||||
|
|
||||||
|
Kokkos::sort(exec, element_);
|
||||||
|
|
||||||
|
Kokkos::deep_copy(exec, h_element, element_);
|
||||||
|
exec.fence();
|
||||||
|
|
||||||
|
ASSERT_EQ(h_element(0), LLONG_MIN);
|
||||||
|
ASSERT_EQ(h_element(1), 0);
|
||||||
|
ASSERT_EQ(h_element(2), 1);
|
||||||
|
ASSERT_EQ(h_element(3), 2);
|
||||||
|
ASSERT_EQ(h_element(4), 3);
|
||||||
|
ASSERT_EQ(h_element(5), 3);
|
||||||
|
ASSERT_EQ(h_element(6), 3);
|
||||||
|
ASSERT_EQ(h_element(7), 4);
|
||||||
|
ASSERT_EQ(h_element(8), 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class ExecutionSpace, class T>
|
||||||
|
void test_sort_integer_overflow() {
|
||||||
|
// array with two extrema in reverse order to expose integer overflow bug in
|
||||||
|
// bin calculation
|
||||||
|
T a[2] = {Kokkos::Experimental::finite_max<T>::value,
|
||||||
|
Kokkos::Experimental::finite_min<T>::value};
|
||||||
|
auto vd = Kokkos::create_mirror_view_and_copy(
|
||||||
|
ExecutionSpace(), Kokkos::View<T[2], Kokkos::HostSpace>(a));
|
||||||
|
Kokkos::sort(vd, /*force using Kokkos bin sort*/ true);
|
||||||
|
auto vh = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), vd);
|
||||||
|
EXPECT_TRUE(std::is_sorted(vh.data(), vh.data() + 2))
|
||||||
|
<< "view (" << vh[0] << ", " << vh[1] << ") is not sorted";
|
||||||
|
}
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
template <class ExecutionSpace, typename KeyType>
|
template <class ExecutionSpace, typename KeyType>
|
||||||
@ -376,6 +425,11 @@ void test_issue_1160_sort() {
|
|||||||
test_issue_1160_impl<ExecutionSpace>();
|
test_issue_1160_impl<ExecutionSpace>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class ExecutionSpace>
|
||||||
|
void test_issue_4978_sort() {
|
||||||
|
test_issue_4978_impl<ExecutionSpace>();
|
||||||
|
}
|
||||||
|
|
||||||
template <class ExecutionSpace, typename KeyType>
|
template <class ExecutionSpace, typename KeyType>
|
||||||
void test_sort(unsigned int N) {
|
void test_sort(unsigned int N) {
|
||||||
test_1D_sort<ExecutionSpace, KeyType>(N);
|
test_1D_sort<ExecutionSpace, KeyType>(N);
|
||||||
@ -385,6 +439,10 @@ void test_sort(unsigned int N) {
|
|||||||
test_dynamic_view_sort<ExecutionSpace, KeyType>(N);
|
test_dynamic_view_sort<ExecutionSpace, KeyType>(N);
|
||||||
#endif
|
#endif
|
||||||
test_issue_1160_sort<ExecutionSpace>();
|
test_issue_1160_sort<ExecutionSpace>();
|
||||||
|
test_issue_4978_sort<ExecutionSpace>();
|
||||||
|
test_sort_integer_overflow<ExecutionSpace, long long>();
|
||||||
|
test_sort_integer_overflow<ExecutionSpace, unsigned long long>();
|
||||||
|
test_sort_integer_overflow<ExecutionSpace, int>();
|
||||||
}
|
}
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
} // namespace Test
|
} // namespace Test
|
||||||
|
|||||||
@ -369,18 +369,6 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, DeviceType,
|
|||||||
Kokkos::atomic_div(&value, rhs);
|
Kokkos::atomic_div(&value, rhs);
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
void atomic_prod(ValueType& dest, const ValueType& src) const {
|
|
||||||
bool success = false;
|
|
||||||
while (!success) {
|
|
||||||
ValueType dest_old = dest;
|
|
||||||
ValueType dest_new = dest_old * src;
|
|
||||||
dest_new =
|
|
||||||
Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
|
|
||||||
success = ((dest_new - dest_old) / dest_old <= 1e-15);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void join(ValueType& dest, const ValueType& src) const {
|
void join(ValueType& dest, const ValueType& src) const {
|
||||||
atomic_prod(&dest, src);
|
atomic_prod(&dest, src);
|
||||||
@ -440,21 +428,9 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, DeviceType,
|
|||||||
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
|
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
|
||||||
: value(other.value) {}
|
: value(other.value) {}
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
void atomic_min(ValueType& dest, const ValueType& src) const {
|
|
||||||
bool success = false;
|
|
||||||
while (!success) {
|
|
||||||
ValueType dest_old = dest;
|
|
||||||
ValueType dest_new = (dest_old > src) ? src : dest_old;
|
|
||||||
dest_new =
|
|
||||||
Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
|
|
||||||
success = ((dest_new - dest_old) / dest_old <= 1e-15);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void join(ValueType& dest, const ValueType& src) const {
|
void join(ValueType& dest, const ValueType& src) const {
|
||||||
atomic_min(dest, src);
|
atomic_min(&dest, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
@ -511,21 +487,9 @@ struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, DeviceType,
|
|||||||
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
|
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other)
|
||||||
: value(other.value) {}
|
: value(other.value) {}
|
||||||
|
|
||||||
KOKKOS_FORCEINLINE_FUNCTION
|
|
||||||
void atomic_max(ValueType& dest, const ValueType& src) const {
|
|
||||||
bool success = false;
|
|
||||||
while (!success) {
|
|
||||||
ValueType dest_old = dest;
|
|
||||||
ValueType dest_new = (dest_old < src) ? src : dest_old;
|
|
||||||
dest_new =
|
|
||||||
Kokkos::atomic_compare_exchange<ValueType>(&dest, dest_old, dest_new);
|
|
||||||
success = ((dest_new - dest_old) / dest_old <= 1e-15);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void join(ValueType& dest, const ValueType& src) const {
|
void join(ValueType& dest, const ValueType& src) const {
|
||||||
atomic_max(dest, src);
|
atomic_max(&dest, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
|||||||
@ -162,7 +162,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
|
|||||||
}
|
}
|
||||||
DV::sync_host();
|
DV::sync_host();
|
||||||
DV::modify_host();
|
DV::modify_host();
|
||||||
if (it < begin() || it > end())
|
if (std::less<>()(it, begin()) || std::less<>()(end(), it))
|
||||||
Kokkos::abort("Kokkos::vector::insert : invalid insert iterator");
|
Kokkos::abort("Kokkos::vector::insert : invalid insert iterator");
|
||||||
if (count == 0) return it;
|
if (count == 0) return it;
|
||||||
ptrdiff_t start = std::distance(begin(), it);
|
ptrdiff_t start = std::distance(begin(), it);
|
||||||
@ -189,27 +189,21 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
|
|||||||
iterator>::type
|
iterator>::type
|
||||||
insert(iterator it, InputIterator b, InputIterator e) {
|
insert(iterator it, InputIterator b, InputIterator e) {
|
||||||
ptrdiff_t count = std::distance(b, e);
|
ptrdiff_t count = std::distance(b, e);
|
||||||
if (count == 0) return it;
|
|
||||||
|
|
||||||
DV::sync_host();
|
DV::sync_host();
|
||||||
DV::modify_host();
|
DV::modify_host();
|
||||||
if (it < begin() || it > end())
|
if (std::less<>()(it, begin()) || std::less<>()(end(), it))
|
||||||
Kokkos::abort("Kokkos::vector::insert : invalid insert iterator");
|
Kokkos::abort("Kokkos::vector::insert : invalid insert iterator");
|
||||||
|
|
||||||
bool resized = false;
|
|
||||||
if ((size() == 0) && (it == begin())) {
|
|
||||||
resize(count);
|
|
||||||
it = begin();
|
|
||||||
resized = true;
|
|
||||||
}
|
|
||||||
ptrdiff_t start = std::distance(begin(), it);
|
ptrdiff_t start = std::distance(begin(), it);
|
||||||
auto org_size = size();
|
auto org_size = size();
|
||||||
if (!resized) resize(size() + count);
|
|
||||||
it = begin() + start;
|
// Note: resize(...) invalidates it; use begin() + start instead
|
||||||
|
resize(size() + count);
|
||||||
|
|
||||||
std::copy_backward(begin() + start, begin() + org_size,
|
std::copy_backward(begin() + start, begin() + org_size,
|
||||||
begin() + org_size + count);
|
begin() + org_size + count);
|
||||||
std::copy(b, e, it);
|
std::copy(b, e, begin() + start);
|
||||||
|
|
||||||
return begin() + start;
|
return begin() + start;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -172,6 +172,23 @@ struct test_vector_insert {
|
|||||||
run_test(a);
|
run_test(a);
|
||||||
check_test(a, size);
|
check_test(a, size);
|
||||||
}
|
}
|
||||||
|
{ test_vector_insert_into_empty(size); }
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_vector_insert_into_empty(const size_t size) {
|
||||||
|
using Vector = Kokkos::vector<Scalar, Device>;
|
||||||
|
{
|
||||||
|
Vector a;
|
||||||
|
Vector b(size);
|
||||||
|
a.insert(a.begin(), b.begin(), b.end());
|
||||||
|
ASSERT_EQ(a.size(), size);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Vector c;
|
||||||
|
c.insert(c.begin(), size, Scalar{});
|
||||||
|
ASSERT_EQ(c.size(), size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ KOKKOS_INCLUDE_DIRECTORIES(
|
|||||||
INSTALL (DIRECTORY
|
INSTALL (DIRECTORY
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/"
|
"${CMAKE_CURRENT_SOURCE_DIR}/"
|
||||||
DESTINATION ${KOKKOS_HEADER_DIR}
|
DESTINATION ${KOKKOS_HEADER_DIR}
|
||||||
|
FILES_MATCHING
|
||||||
PATTERN desul/src EXCLUDE
|
PATTERN desul/src EXCLUDE
|
||||||
PATTERN "*.inc"
|
PATTERN "*.inc"
|
||||||
PATTERN "*.inc_*"
|
PATTERN "*.inc_*"
|
||||||
|
|||||||
@ -1007,6 +1007,15 @@ void CudaSpaceInitializer::print_configuration(std::ostream &msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_CXX14
|
||||||
|
namespace Tools {
|
||||||
|
namespace Experimental {
|
||||||
|
constexpr DeviceType DeviceTypeTraits<Cuda>::id;
|
||||||
|
}
|
||||||
|
} // namespace Tools
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Kokkos
|
} // namespace Kokkos
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|||||||
@ -139,7 +139,7 @@ struct CudaLDGFetch {
|
|||||||
|
|
||||||
template <typename iType>
|
template <typename iType>
|
||||||
KOKKOS_INLINE_FUNCTION ValueType operator[](const iType& i) const {
|
KOKKOS_INLINE_FUNCTION ValueType operator[](const iType& i) const {
|
||||||
#if defined(__CUDA_ARCH__) && (350 <= _CUDA_ARCH__)
|
#if defined(__CUDA_ARCH__) && (350 <= __CUDA_ARCH__)
|
||||||
AliasType v = __ldg(reinterpret_cast<const AliasType*>(&m_ptr[i]));
|
AliasType v = __ldg(reinterpret_cast<const AliasType*>(&m_ptr[i]));
|
||||||
return *(reinterpret_cast<ValueType*>(&v));
|
return *(reinterpret_cast<ValueType*>(&v));
|
||||||
#else
|
#else
|
||||||
|
|||||||
@ -132,7 +132,8 @@ void HIPInternal::print_configuration(std::ostream &s) const {
|
|||||||
s << "macro KOKKOS_ENABLE_HIP : defined" << '\n';
|
s << "macro KOKKOS_ENABLE_HIP : defined" << '\n';
|
||||||
#if defined(HIP_VERSION)
|
#if defined(HIP_VERSION)
|
||||||
s << "macro HIP_VERSION = " << HIP_VERSION << " = version "
|
s << "macro HIP_VERSION = " << HIP_VERSION << " = version "
|
||||||
<< HIP_VERSION / 100 << "." << HIP_VERSION % 100 << '\n';
|
<< HIP_VERSION_MAJOR << '.' << HIP_VERSION_MINOR << '.' << HIP_VERSION_PATCH
|
||||||
|
<< '\n';
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int i = 0; i < dev_info.m_hipDevCount; ++i) {
|
for (int i = 0; i < dev_info.m_hipDevCount; ++i) {
|
||||||
@ -467,7 +468,6 @@ void HIPInternal::finalize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *HIPInternal::get_next_driver(size_t driverTypeSize) const {
|
char *HIPInternal::get_next_driver(size_t driverTypeSize) const {
|
||||||
std::lock_guard<std::mutex> const lock(m_mutexWorkArray);
|
|
||||||
if (d_driverWorkArray == nullptr) {
|
if (d_driverWorkArray == nullptr) {
|
||||||
KOKKOS_IMPL_HIP_SAFE_CALL(
|
KOKKOS_IMPL_HIP_SAFE_CALL(
|
||||||
hipHostMalloc(&d_driverWorkArray,
|
hipHostMalloc(&d_driverWorkArray,
|
||||||
|
|||||||
@ -490,6 +490,8 @@ struct HIPParallelLaunch<
|
|||||||
|
|
||||||
KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE();
|
KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE();
|
||||||
|
|
||||||
|
std::lock_guard<std::mutex> const lock(hip_instance->m_mutexWorkArray);
|
||||||
|
|
||||||
// Invoke the driver function on the device
|
// Invoke the driver function on the device
|
||||||
DriverType *d_driver = reinterpret_cast<DriverType *>(
|
DriverType *d_driver = reinterpret_cast<DriverType *>(
|
||||||
hip_instance->get_next_driver(sizeof(DriverType)));
|
hip_instance->get_next_driver(sizeof(DriverType)));
|
||||||
|
|||||||
@ -56,8 +56,7 @@ namespace Kokkos {
|
|||||||
|
|
||||||
#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
|
#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
|
||||||
namespace Impl {
|
namespace Impl {
|
||||||
__device__ __constant__ HIPLockArrays g_device_hip_lock_arrays = {nullptr,
|
__device__ __constant__ HIPLockArrays g_device_hip_lock_arrays = {nullptr, 0};
|
||||||
nullptr, 0};
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@ -464,6 +464,15 @@ void HIPSpaceInitializer::print_configuration(std::ostream& msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_CXX14
|
||||||
|
namespace Tools {
|
||||||
|
namespace Experimental {
|
||||||
|
constexpr DeviceType DeviceTypeTraits<Kokkos::Experimental::HIP>::id;
|
||||||
|
}
|
||||||
|
} // namespace Tools
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Kokkos
|
} // namespace Kokkos
|
||||||
|
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
|
|||||||
@ -199,6 +199,15 @@ void HPXSpaceInitializer::print_configuration(std::ostream &msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_CXX14
|
||||||
|
namespace Tools {
|
||||||
|
namespace Experimental {
|
||||||
|
constexpr DeviceType DeviceTypeTraits<Kokkos::Experimental::HPX>::id;
|
||||||
|
}
|
||||||
|
} // namespace Tools
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Kokkos
|
} // namespace Kokkos
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|||||||
@ -260,6 +260,7 @@ template <>
|
|||||||
struct DeviceTypeTraits<Cuda> {
|
struct DeviceTypeTraits<Cuda> {
|
||||||
/// \brief An ID to differentiate (for example) Serial from OpenMP in Tooling
|
/// \brief An ID to differentiate (for example) Serial from OpenMP in Tooling
|
||||||
static constexpr DeviceType id = DeviceType::Cuda;
|
static constexpr DeviceType id = DeviceType::Cuda;
|
||||||
|
static int device_id(const Cuda& exec) { return exec.cuda_device(); }
|
||||||
};
|
};
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -571,6 +571,9 @@ namespace Experimental {
|
|||||||
template <>
|
template <>
|
||||||
struct DeviceTypeTraits<Kokkos::Experimental::HIP> {
|
struct DeviceTypeTraits<Kokkos::Experimental::HIP> {
|
||||||
static constexpr DeviceType id = DeviceType::HIP;
|
static constexpr DeviceType id = DeviceType::HIP;
|
||||||
|
static int device_id(const Kokkos::Experimental::HIP& exec) {
|
||||||
|
return exec.hip_device();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -500,6 +500,7 @@ namespace Experimental {
|
|||||||
template <>
|
template <>
|
||||||
struct DeviceTypeTraits<Kokkos::Experimental::HPX> {
|
struct DeviceTypeTraits<Kokkos::Experimental::HPX> {
|
||||||
static constexpr DeviceType id = DeviceType::HPX;
|
static constexpr DeviceType id = DeviceType::HPX;
|
||||||
|
static int device_id(const Kokkos::Experimental::HPX &) { return 0; }
|
||||||
};
|
};
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -179,6 +179,7 @@ namespace Experimental {
|
|||||||
template <>
|
template <>
|
||||||
struct DeviceTypeTraits<OpenMP> {
|
struct DeviceTypeTraits<OpenMP> {
|
||||||
static constexpr DeviceType id = DeviceType::OpenMP;
|
static constexpr DeviceType id = DeviceType::OpenMP;
|
||||||
|
static int device_id(const OpenMP&) { return 0; }
|
||||||
};
|
};
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -130,6 +130,9 @@ template <>
|
|||||||
struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> {
|
struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> {
|
||||||
static constexpr DeviceType id =
|
static constexpr DeviceType id =
|
||||||
::Kokkos::Profiling::Experimental::DeviceType::OpenMPTarget;
|
::Kokkos::Profiling::Experimental::DeviceType::OpenMPTarget;
|
||||||
|
static int device_id(const Kokkos::Experimental::OpenMPTarget&) {
|
||||||
|
return omp_get_default_device();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -182,6 +182,9 @@ template <>
|
|||||||
struct DeviceTypeTraits<Kokkos::Experimental::SYCL> {
|
struct DeviceTypeTraits<Kokkos::Experimental::SYCL> {
|
||||||
/// \brief An ID to differentiate (for example) Serial from OpenMP in Tooling
|
/// \brief An ID to differentiate (for example) Serial from OpenMP in Tooling
|
||||||
static constexpr DeviceType id = DeviceType::SYCL;
|
static constexpr DeviceType id = DeviceType::SYCL;
|
||||||
|
static int device_id(const Kokkos::Experimental::SYCL& exec) {
|
||||||
|
return exec.sycl_device();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -226,6 +226,7 @@ namespace Experimental {
|
|||||||
template <>
|
template <>
|
||||||
struct DeviceTypeTraits<Serial> {
|
struct DeviceTypeTraits<Serial> {
|
||||||
static constexpr DeviceType id = DeviceType::Serial;
|
static constexpr DeviceType id = DeviceType::Serial;
|
||||||
|
static int device_id(const Serial&) { return 0; }
|
||||||
};
|
};
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -175,6 +175,7 @@ namespace Experimental {
|
|||||||
template <>
|
template <>
|
||||||
struct DeviceTypeTraits<Threads> {
|
struct DeviceTypeTraits<Threads> {
|
||||||
static constexpr DeviceType id = DeviceType::Threads;
|
static constexpr DeviceType id = DeviceType::Threads;
|
||||||
|
static int device_id(const Threads&) { return 0; }
|
||||||
};
|
};
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -67,7 +67,8 @@ __thread int t_openmp_hardware_id = 0;
|
|||||||
__thread Impl::OpenMPExec *t_openmp_instance = nullptr;
|
__thread Impl::OpenMPExec *t_openmp_instance = nullptr;
|
||||||
|
|
||||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
|
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
|
||||||
void OpenMPExec::validate_partition(const int nthreads, int &num_partitions,
|
void OpenMPExec::validate_partition_impl(const int nthreads,
|
||||||
|
int &num_partitions,
|
||||||
int &partition_size) {
|
int &partition_size) {
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
num_partitions = 1;
|
num_partitions = 1;
|
||||||
@ -506,6 +507,15 @@ void OpenMPSpaceInitializer::print_configuration(std::ostream &msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_CXX14
|
||||||
|
namespace Tools {
|
||||||
|
namespace Experimental {
|
||||||
|
constexpr DeviceType DeviceTypeTraits<OpenMP>::id;
|
||||||
|
}
|
||||||
|
} // namespace Tools
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Kokkos
|
} // namespace Kokkos
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|||||||
@ -93,6 +93,10 @@ class OpenMPExec {
|
|||||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
|
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_3
|
||||||
KOKKOS_DEPRECATED static void validate_partition(const int nthreads,
|
KOKKOS_DEPRECATED static void validate_partition(const int nthreads,
|
||||||
int& num_partitions,
|
int& num_partitions,
|
||||||
|
int& partition_size) {
|
||||||
|
validate_partition_impl(nthreads, num_partitions, partition_size);
|
||||||
|
}
|
||||||
|
static void validate_partition_impl(const int nthreads, int& num_partitions,
|
||||||
int& partition_size);
|
int& partition_size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -179,7 +183,7 @@ KOKKOS_DEPRECATED void OpenMP::partition_master(F const& f, int num_partitions,
|
|||||||
|
|
||||||
Exec* prev_instance = Impl::t_openmp_instance;
|
Exec* prev_instance = Impl::t_openmp_instance;
|
||||||
|
|
||||||
Exec::validate_partition(prev_instance->m_pool_size, num_partitions,
|
Exec::validate_partition_impl(prev_instance->m_pool_size, num_partitions,
|
||||||
partition_size);
|
partition_size);
|
||||||
|
|
||||||
OpenMP::memory_space space;
|
OpenMP::memory_space space;
|
||||||
|
|||||||
@ -72,7 +72,7 @@ class SYCLInternal {
|
|||||||
bool force_shrink = false);
|
bool force_shrink = false);
|
||||||
|
|
||||||
uint32_t impl_get_instance_id() const;
|
uint32_t impl_get_instance_id() const;
|
||||||
int m_syclDev = -1;
|
int m_syclDev = 0;
|
||||||
|
|
||||||
size_t m_maxWorkgroupSize = 0;
|
size_t m_maxWorkgroupSize = 0;
|
||||||
uint32_t m_maxConcurrency = 0;
|
uint32_t m_maxConcurrency = 0;
|
||||||
|
|||||||
@ -399,27 +399,68 @@ bool ThreadsExec::wake() {
|
|||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
void ThreadsExec::execute_resize_scratch_in_serial() {
|
||||||
|
const unsigned begin = s_threads_process.m_pool_base ? 1 : 0;
|
||||||
|
|
||||||
|
auto deallocate_scratch_memory = [](ThreadsExec &exec) {
|
||||||
|
if (exec.m_scratch) {
|
||||||
|
using Record =
|
||||||
|
Kokkos::Impl::SharedAllocationRecord<Kokkos::HostSpace, void>;
|
||||||
|
Record *const r = Record::get_record(exec.m_scratch);
|
||||||
|
exec.m_scratch = nullptr;
|
||||||
|
Record::decrement(r);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if (s_threads_process.m_pool_base) {
|
||||||
|
for (unsigned i = s_thread_pool_size[0]; begin < i;) {
|
||||||
|
deallocate_scratch_memory(*s_threads_exec[--i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s_current_function = &first_touch_allocate_thread_private_scratch;
|
||||||
|
s_current_function_arg = &s_threads_process;
|
||||||
|
|
||||||
|
// Make sure function and arguments are written before activating threads.
|
||||||
|
memory_fence();
|
||||||
|
|
||||||
|
for (unsigned i = s_thread_pool_size[0]; begin < i;) {
|
||||||
|
ThreadsExec &th = *s_threads_exec[--i];
|
||||||
|
|
||||||
|
th.m_pool_state = ThreadsExec::Active;
|
||||||
|
|
||||||
|
wait_yield(th.m_pool_state, ThreadsExec::Active);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (s_threads_process.m_pool_base) {
|
||||||
|
deallocate_scratch_memory(s_threads_process);
|
||||||
|
s_threads_process.m_pool_state = ThreadsExec::Active;
|
||||||
|
first_touch_allocate_thread_private_scratch(s_threads_process, nullptr);
|
||||||
|
s_threads_process.m_pool_state = ThreadsExec::Inactive;
|
||||||
|
}
|
||||||
|
|
||||||
|
s_current_function_arg = nullptr;
|
||||||
|
s_current_function = nullptr;
|
||||||
|
|
||||||
|
// Make sure function and arguments are cleared before proceeding.
|
||||||
|
memory_fence();
|
||||||
|
}
|
||||||
|
|
||||||
|
//----------------------------------------------------------------------------
|
||||||
|
|
||||||
void *ThreadsExec::root_reduce_scratch() {
|
void *ThreadsExec::root_reduce_scratch() {
|
||||||
return s_threads_process.reduce_memory();
|
return s_threads_process.reduce_memory();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadsExec::execute_resize_scratch(ThreadsExec &exec, const void *) {
|
void ThreadsExec::first_touch_allocate_thread_private_scratch(ThreadsExec &exec,
|
||||||
using Record = Kokkos::Impl::SharedAllocationRecord<Kokkos::HostSpace, void>;
|
const void *) {
|
||||||
|
|
||||||
if (exec.m_scratch) {
|
|
||||||
Record *const r = Record::get_record(exec.m_scratch);
|
|
||||||
|
|
||||||
exec.m_scratch = nullptr;
|
|
||||||
|
|
||||||
Record::decrement(r);
|
|
||||||
}
|
|
||||||
|
|
||||||
exec.m_scratch_reduce_end = s_threads_process.m_scratch_reduce_end;
|
exec.m_scratch_reduce_end = s_threads_process.m_scratch_reduce_end;
|
||||||
exec.m_scratch_thread_end = s_threads_process.m_scratch_thread_end;
|
exec.m_scratch_thread_end = s_threads_process.m_scratch_thread_end;
|
||||||
|
|
||||||
if (s_threads_process.m_scratch_thread_end) {
|
if (s_threads_process.m_scratch_thread_end) {
|
||||||
// Allocate tracked memory:
|
// Allocate tracked memory:
|
||||||
{
|
{
|
||||||
|
using Record =
|
||||||
|
Kokkos::Impl::SharedAllocationRecord<Kokkos::HostSpace, void>;
|
||||||
Record *const r =
|
Record *const r =
|
||||||
Record::allocate(Kokkos::HostSpace(), "Kokkos::thread_scratch",
|
Record::allocate(Kokkos::HostSpace(), "Kokkos::thread_scratch",
|
||||||
s_threads_process.m_scratch_thread_end);
|
s_threads_process.m_scratch_thread_end);
|
||||||
@ -461,7 +502,7 @@ void *ThreadsExec::resize_scratch(size_t reduce_size, size_t thread_size) {
|
|||||||
s_threads_process.m_scratch_reduce_end = reduce_size;
|
s_threads_process.m_scratch_reduce_end = reduce_size;
|
||||||
s_threads_process.m_scratch_thread_end = reduce_size + thread_size;
|
s_threads_process.m_scratch_thread_end = reduce_size + thread_size;
|
||||||
|
|
||||||
execute_resize_scratch(s_threads_process, nullptr);
|
execute_resize_scratch_in_serial();
|
||||||
|
|
||||||
s_threads_process.m_scratch = s_threads_exec[0]->m_scratch;
|
s_threads_process.m_scratch = s_threads_exec[0]->m_scratch;
|
||||||
}
|
}
|
||||||
@ -845,6 +886,15 @@ void ThreadsSpaceInitializer::print_configuration(std::ostream &msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_CXX14
|
||||||
|
namespace Tools {
|
||||||
|
namespace Experimental {
|
||||||
|
constexpr DeviceType DeviceTypeTraits<Threads>::id;
|
||||||
|
}
|
||||||
|
} // namespace Tools
|
||||||
|
#endif
|
||||||
|
|
||||||
} /* namespace Kokkos */
|
} /* namespace Kokkos */
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
|
|||||||
@ -123,12 +123,15 @@ class ThreadsExec {
|
|||||||
static void global_unlock();
|
static void global_unlock();
|
||||||
static void spawn();
|
static void spawn();
|
||||||
|
|
||||||
static void execute_resize_scratch(ThreadsExec &, const void *);
|
static void first_touch_allocate_thread_private_scratch(ThreadsExec &,
|
||||||
|
const void *);
|
||||||
static void execute_sleep(ThreadsExec &, const void *);
|
static void execute_sleep(ThreadsExec &, const void *);
|
||||||
|
|
||||||
ThreadsExec(const ThreadsExec &);
|
ThreadsExec(const ThreadsExec &);
|
||||||
ThreadsExec &operator=(const ThreadsExec &);
|
ThreadsExec &operator=(const ThreadsExec &);
|
||||||
|
|
||||||
|
static void execute_resize_scratch_in_serial();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
KOKKOS_INLINE_FUNCTION int pool_size() const { return m_pool_size; }
|
KOKKOS_INLINE_FUNCTION int pool_size() const { return m_pool_size; }
|
||||||
KOKKOS_INLINE_FUNCTION int pool_rank() const { return m_pool_rank; }
|
KOKKOS_INLINE_FUNCTION int pool_rank() const { return m_pool_rank; }
|
||||||
|
|||||||
@ -118,11 +118,14 @@ template <typename ExecutionSpace>
|
|||||||
constexpr uint32_t device_id_root() {
|
constexpr uint32_t device_id_root() {
|
||||||
constexpr auto device_id =
|
constexpr auto device_id =
|
||||||
static_cast<uint32_t>(DeviceTypeTraits<ExecutionSpace>::id);
|
static_cast<uint32_t>(DeviceTypeTraits<ExecutionSpace>::id);
|
||||||
return (device_id << num_instance_bits);
|
return (device_id << (num_instance_bits + num_device_bits));
|
||||||
}
|
}
|
||||||
template <typename ExecutionSpace>
|
template <typename ExecutionSpace>
|
||||||
inline uint32_t device_id(ExecutionSpace const& space) noexcept {
|
inline uint32_t device_id(ExecutionSpace const& space) noexcept {
|
||||||
return device_id_root<ExecutionSpace>() + space.impl_instance_id();
|
return device_id_root<ExecutionSpace>() +
|
||||||
|
(DeviceTypeTraits<ExecutionSpace>::device_id(space)
|
||||||
|
<< num_instance_bits) +
|
||||||
|
space.impl_instance_id();
|
||||||
}
|
}
|
||||||
} // namespace Experimental
|
} // namespace Experimental
|
||||||
} // namespace Tools
|
} // namespace Tools
|
||||||
|
|||||||
@ -233,6 +233,15 @@ void SerialSpaceInitializer::print_configuration(std::ostream& msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Impl
|
} // namespace Impl
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_CXX14
|
||||||
|
namespace Tools {
|
||||||
|
namespace Experimental {
|
||||||
|
constexpr DeviceType DeviceTypeTraits<Serial>::id;
|
||||||
|
}
|
||||||
|
} // namespace Tools
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Kokkos
|
} // namespace Kokkos
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|||||||
@ -1005,15 +1005,15 @@ struct ViewOffset<
|
|||||||
/* Cardinality of the domain index space */
|
/* Cardinality of the domain index space */
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
constexpr size_type size() const {
|
constexpr size_type size() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
||||||
m_dim.N6 * m_dim.N7;
|
m_dim.N5 * m_dim.N6 * m_dim.N7;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Span of the range space */
|
/* Span of the range space */
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
constexpr size_type span() const {
|
constexpr size_type span() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
||||||
m_dim.N6 * m_dim.N7;
|
m_dim.N5 * m_dim.N6 * m_dim.N7;
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
||||||
@ -1026,23 +1026,24 @@ struct ViewOffset<
|
|||||||
return m_dim.N0;
|
return m_dim.N0;
|
||||||
}
|
}
|
||||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const {
|
KOKKOS_INLINE_FUNCTION constexpr size_type stride_2() const {
|
||||||
return m_dim.N0 * m_dim.N1;
|
return size_type(m_dim.N0) * m_dim.N1;
|
||||||
}
|
}
|
||||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const {
|
KOKKOS_INLINE_FUNCTION constexpr size_type stride_3() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2;
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2;
|
||||||
}
|
}
|
||||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const {
|
KOKKOS_INLINE_FUNCTION constexpr size_type stride_4() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3;
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3;
|
||||||
}
|
}
|
||||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const {
|
KOKKOS_INLINE_FUNCTION constexpr size_type stride_5() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4;
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4;
|
||||||
}
|
}
|
||||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const {
|
KOKKOS_INLINE_FUNCTION constexpr size_type stride_6() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5;
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
||||||
|
m_dim.N5;
|
||||||
}
|
}
|
||||||
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const {
|
KOKKOS_INLINE_FUNCTION constexpr size_type stride_7() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
||||||
m_dim.N6;
|
m_dim.N5 * m_dim.N6;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stride with [ rank ] value is the total length
|
// Stride with [ rank ] value is the total length
|
||||||
@ -1288,8 +1289,8 @@ struct ViewOffset<
|
|||||||
/* Cardinality of the domain index space */
|
/* Cardinality of the domain index space */
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
constexpr size_type size() const {
|
constexpr size_type size() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
||||||
m_dim.N6 * m_dim.N7;
|
m_dim.N5 * m_dim.N6 * m_dim.N7;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Span of the range space */
|
/* Span of the range space */
|
||||||
@ -1633,15 +1634,15 @@ struct ViewOffset<
|
|||||||
/* Cardinality of the domain index space */
|
/* Cardinality of the domain index space */
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
constexpr size_type size() const {
|
constexpr size_type size() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
||||||
m_dim.N6 * m_dim.N7;
|
m_dim.N5 * m_dim.N6 * m_dim.N7;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Span of the range space */
|
/* Span of the range space */
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
constexpr size_type span() const {
|
constexpr size_type span() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
||||||
m_dim.N6 * m_dim.N7;
|
m_dim.N5 * m_dim.N6 * m_dim.N7;
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
||||||
@ -1916,14 +1917,14 @@ struct ViewOffset<
|
|||||||
/* Cardinality of the domain index space */
|
/* Cardinality of the domain index space */
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
constexpr size_type size() const {
|
constexpr size_type size() const {
|
||||||
return m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
|
return size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
||||||
m_dim.N6 * m_dim.N7;
|
m_dim.N5 * m_dim.N6 * m_dim.N7;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Span of the range space */
|
/* Span of the range space */
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
constexpr size_type span() const {
|
constexpr size_type span() const {
|
||||||
return size() > 0 ? m_dim.N0 * m_stride : 0;
|
return size() > 0 ? size_type(m_dim.N0) * m_stride : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
||||||
@ -2066,27 +2067,29 @@ struct ViewOffset<
|
|||||||
stride(/* 2 <= rank */
|
stride(/* 2 <= rank */
|
||||||
m_dim.N1 *
|
m_dim.N1 *
|
||||||
(dimension_type::rank == 2
|
(dimension_type::rank == 2
|
||||||
? 1
|
? size_t(1)
|
||||||
: m_dim.N2 *
|
: m_dim.N2 *
|
||||||
(dimension_type::rank == 3
|
(dimension_type::rank == 3
|
||||||
? 1
|
? size_t(1)
|
||||||
: m_dim.N3 *
|
: m_dim.N3 *
|
||||||
(dimension_type::rank == 4
|
(dimension_type::rank == 4
|
||||||
? 1
|
? size_t(1)
|
||||||
: m_dim.N4 *
|
: m_dim.N4 *
|
||||||
(dimension_type::rank ==
|
(dimension_type::rank ==
|
||||||
5
|
5
|
||||||
? 1
|
? size_t(1)
|
||||||
: m_dim.N5 *
|
: m_dim.N5 *
|
||||||
(dimension_type::
|
(dimension_type::
|
||||||
rank ==
|
rank ==
|
||||||
6
|
6
|
||||||
? 1
|
? size_t(
|
||||||
|
1)
|
||||||
: m_dim.N6 *
|
: m_dim.N6 *
|
||||||
(dimension_type::
|
(dimension_type::
|
||||||
rank ==
|
rank ==
|
||||||
7
|
7
|
||||||
? 1
|
? size_t(
|
||||||
|
1)
|
||||||
: m_dim
|
: m_dim
|
||||||
.N7)))))))) {
|
.N7)))))))) {
|
||||||
}
|
}
|
||||||
@ -2447,8 +2450,8 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> {
|
|||||||
constexpr size_type size() const {
|
constexpr size_type size() const {
|
||||||
return dimension_type::rank == 0
|
return dimension_type::rank == 0
|
||||||
? 1
|
? 1
|
||||||
: m_dim.N0 * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 *
|
: size_type(m_dim.N0) * m_dim.N1 * m_dim.N2 * m_dim.N3 *
|
||||||
m_dim.N5 * m_dim.N6 * m_dim.N7;
|
m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|||||||
@ -91,6 +91,7 @@ struct ViewTracker {
|
|||||||
|
|
||||||
template <class RT, class... RP>
|
template <class RT, class... RP>
|
||||||
KOKKOS_INLINE_FUNCTION void assign(const View<RT, RP...>& vt) noexcept {
|
KOKKOS_INLINE_FUNCTION void assign(const View<RT, RP...>& vt) noexcept {
|
||||||
|
if (this == reinterpret_cast<const ViewTracker*>(&vt.m_track)) return;
|
||||||
KOKKOS_IF_ON_HOST((
|
KOKKOS_IF_ON_HOST((
|
||||||
if (view_traits::is_managed && Kokkos::Impl::SharedAllocationRecord<
|
if (view_traits::is_managed && Kokkos::Impl::SharedAllocationRecord<
|
||||||
void, void>::tracking_enabled()) {
|
void, void>::tracking_enabled()) {
|
||||||
@ -102,6 +103,7 @@ struct ViewTracker {
|
|||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION ViewTracker& operator=(
|
KOKKOS_INLINE_FUNCTION ViewTracker& operator=(
|
||||||
const ViewTracker& rhs) noexcept {
|
const ViewTracker& rhs) noexcept {
|
||||||
|
if (this == &rhs) return *this;
|
||||||
KOKKOS_IF_ON_HOST((
|
KOKKOS_IF_ON_HOST((
|
||||||
if (view_traits::is_managed && Kokkos::Impl::SharedAllocationRecord<
|
if (view_traits::is_managed && Kokkos::Impl::SharedAllocationRecord<
|
||||||
void, void>::tracking_enabled()) {
|
void, void>::tracking_enabled()) {
|
||||||
|
|||||||
@ -1087,6 +1087,20 @@ class TestViewAPI {
|
|||||||
dView4_unmanaged unmanaged_dx = dx;
|
dView4_unmanaged unmanaged_dx = dx;
|
||||||
ASSERT_EQ(dx.use_count(), 1);
|
ASSERT_EQ(dx.use_count(), 1);
|
||||||
|
|
||||||
|
// Test self assignment
|
||||||
|
#if defined(__clang__)
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wself-assign-overloaded"
|
||||||
|
#endif
|
||||||
|
dx = dx; // copy-assignment operator
|
||||||
|
#if defined(__clang__)
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
|
ASSERT_EQ(dx.use_count(), 1);
|
||||||
|
dx = reinterpret_cast<typename dView4::uniform_type &>(
|
||||||
|
dx); // conversion assignment operator
|
||||||
|
ASSERT_EQ(dx.use_count(), 1);
|
||||||
|
|
||||||
dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(
|
dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(
|
||||||
dx.data(), dx.extent(0), dx.extent(1), dx.extent(2), dx.extent(3));
|
dx.data(), dx.extent(0), dx.extent(1), dx.extent(2), dx.extent(3));
|
||||||
|
|
||||||
|
|||||||
@ -240,6 +240,35 @@ struct TestViewOverloadResolution {
|
|||||||
TEST(TEST_CATEGORY, view_overload_resolution) {
|
TEST(TEST_CATEGORY, view_overload_resolution) {
|
||||||
TestViewOverloadResolution<TEST_EXECSPACE>::test_function_overload();
|
TestViewOverloadResolution<TEST_EXECSPACE>::test_function_overload();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename MemorySpace>
|
||||||
|
struct TestViewAllocationLargeRank {
|
||||||
|
using ViewType = Kokkos::View<char********, MemorySpace>;
|
||||||
|
|
||||||
|
KOKKOS_FUNCTION void operator()(int) const {
|
||||||
|
size_t idx = v.extent(0) - 1;
|
||||||
|
auto& lhs = v(idx, idx, idx, idx, idx, idx, idx, idx);
|
||||||
|
lhs = 42; // This is where it segfaulted
|
||||||
|
}
|
||||||
|
|
||||||
|
ViewType v;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST(TEST_CATEGORY, view_allocation_large_rank) {
|
||||||
|
using ExecutionSpace = typename TEST_EXECSPACE::execution_space;
|
||||||
|
using MemorySpace = typename TEST_EXECSPACE::memory_space;
|
||||||
|
constexpr int dim = 16;
|
||||||
|
using FunctorType = TestViewAllocationLargeRank<MemorySpace>;
|
||||||
|
typename FunctorType::ViewType v("v", dim, dim, dim, dim, dim, dim, dim, dim);
|
||||||
|
|
||||||
|
Kokkos::parallel_for(Kokkos::RangePolicy<ExecutionSpace>(0, 1),
|
||||||
|
FunctorType{v});
|
||||||
|
typename FunctorType::ViewType v_single(v.data() + v.size() - 1, 1, 1, 1, 1,
|
||||||
|
1, 1, 1, 1);
|
||||||
|
auto result =
|
||||||
|
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, v_single);
|
||||||
|
ASSERT_EQ(result(0, 0, 0, 0, 0, 0, 0, 0), 42);
|
||||||
|
}
|
||||||
} // namespace Test
|
} // namespace Test
|
||||||
|
|
||||||
#include <TestViewIsAssignable.hpp>
|
#include <TestViewIsAssignable.hpp>
|
||||||
|
|||||||
@ -240,11 +240,8 @@ TEST(kokkosp, test_id_gen) {
|
|||||||
Kokkos::DefaultExecutionSpace ex;
|
Kokkos::DefaultExecutionSpace ex;
|
||||||
auto id = device_id(ex);
|
auto id = device_id(ex);
|
||||||
auto id_ref = identifier_from_devid(id);
|
auto id_ref = identifier_from_devid(id);
|
||||||
auto success = (id_ref.instance_id == ex.impl_instance_id()) &&
|
ASSERT_EQ(DeviceTypeTraits<decltype(ex)>::id, id_ref.type);
|
||||||
(id_ref.device_id ==
|
ASSERT_EQ(id_ref.instance_id, ex.impl_instance_id());
|
||||||
static_cast<uint32_t>(
|
|
||||||
DeviceTypeTraits<Kokkos::DefaultExecutionSpace>::id));
|
|
||||||
ASSERT_TRUE(success);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -253,6 +250,7 @@ TEST(kokkosp, test_id_gen) {
|
|||||||
*/
|
*/
|
||||||
TEST(kokkosp, test_kernel_sequence) {
|
TEST(kokkosp, test_kernel_sequence) {
|
||||||
test_wrapper([&]() {
|
test_wrapper([&]() {
|
||||||
|
Kokkos::DefaultExecutionSpace ex;
|
||||||
auto root = Kokkos::Tools::Experimental::device_id_root<
|
auto root = Kokkos::Tools::Experimental::device_id_root<
|
||||||
Kokkos::DefaultExecutionSpace>();
|
Kokkos::DefaultExecutionSpace>();
|
||||||
std::vector<FencePayload> expected{
|
std::vector<FencePayload> expected{
|
||||||
@ -260,11 +258,10 @@ TEST(kokkosp, test_kernel_sequence) {
|
|||||||
{"named_instance", FencePayload::distinguishable_devices::no,
|
{"named_instance", FencePayload::distinguishable_devices::no,
|
||||||
root + num_instances},
|
root + num_instances},
|
||||||
{"test_kernel", FencePayload::distinguishable_devices::no,
|
{"test_kernel", FencePayload::distinguishable_devices::no,
|
||||||
root + num_instances}
|
Kokkos::Tools::Experimental::device_id(ex)}
|
||||||
|
|
||||||
};
|
};
|
||||||
expect_fence_events(expected, [=]() {
|
expect_fence_events(expected, [=]() {
|
||||||
Kokkos::DefaultExecutionSpace ex;
|
|
||||||
TestFunctor tf;
|
TestFunctor tf;
|
||||||
ex.fence("named_instance");
|
ex.fence("named_instance");
|
||||||
Kokkos::parallel_for(
|
Kokkos::parallel_for(
|
||||||
|
|||||||
@ -27,3 +27,4 @@ tag: 3.4.00 date: 04:26:2021 master: 1fb0c284 release: 5d7738d6
|
|||||||
tag: 3.4.01 date: 05:20:2021 master: 4b97a22f release: 410b15c8
|
tag: 3.4.01 date: 05:20:2021 master: 4b97a22f release: 410b15c8
|
||||||
tag: 3.5.00 date: 11:19:2021 master: c28a8b03 release: 21b879e4
|
tag: 3.5.00 date: 11:19:2021 master: c28a8b03 release: 21b879e4
|
||||||
tag: 3.6.00 date: 04:14:2022 master: 2834f94a release: 6ea708ff
|
tag: 3.6.00 date: 04:14:2022 master: 2834f94a release: 6ea708ff
|
||||||
|
tag: 3.6.01 date: 06:16:2022 master: b52f8c83 release: afe9b404
|
||||||
|
|||||||
Reference in New Issue
Block a user