Update Kokkos library in LAMMPS to v4.4.1
This commit is contained in:
@ -1,5 +1,20 @@
|
||||
# CHANGELOG
|
||||
|
||||
## [4.4.01](https://github.com/kokkos/kokkos/tree/4.4.01)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.0.00...4.4.01)
|
||||
|
||||
### Features:
|
||||
* Introduce new SequentialHostInit view allocation property [\#7229](https://github.com/kokkos/kokkos/pull/7229)
|
||||
|
||||
### Backend and Architecture Enhancements:
|
||||
|
||||
#### CUDA:
|
||||
* Experimental support for unified memory mode (intended for Grace-Hopper etc.) [\#6823](https://github.com/kokkos/kokkos/pull/6823)
|
||||
|
||||
### Bug Fixes
|
||||
* OpenMP: Fix issue related to the visibility of an internal symbol with shared libraries that affected `ScatterView` in particular [\#7284](https://github.com/kokkos/kokkos/pull/7284)
|
||||
* Fix implicit copy assignment operators in few AVX2 masks being deleted [#7296](https://github.com/kokkos/kokkos/pull/7296)
|
||||
|
||||
## [4.4.00](https://github.com/kokkos/kokkos/tree/4.4.00)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/4.3.01...4.4.00)
|
||||
|
||||
|
||||
@ -151,7 +151,7 @@ ENDIF()
|
||||
|
||||
set(Kokkos_VERSION_MAJOR 4)
|
||||
set(Kokkos_VERSION_MINOR 4)
|
||||
set(Kokkos_VERSION_PATCH 0)
|
||||
set(Kokkos_VERSION_PATCH 1)
|
||||
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
|
||||
message(STATUS "Kokkos version: ${Kokkos_VERSION}")
|
||||
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")
|
||||
|
||||
@ -12,7 +12,7 @@ endif
|
||||
|
||||
KOKKOS_VERSION_MAJOR = 4
|
||||
KOKKOS_VERSION_MINOR = 4
|
||||
KOKKOS_VERSION_PATCH = 0
|
||||
KOKKOS_VERSION_PATCH = 1
|
||||
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
|
||||
|
||||
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial
|
||||
|
||||
@ -37,6 +37,7 @@
|
||||
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA // deprecated
|
||||
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
|
||||
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
|
||||
#cmakedefine KOKKOS_ENABLE_HIP_MULTIPLE_KERNEL_INSTANTIATIONS
|
||||
#cmakedefine KOKKOS_ENABLE_IMPL_HIP_UNIFIED_MEMORY
|
||||
|
||||
@ -48,6 +48,8 @@ KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to allow lambda
|
||||
# resolved but we keep the option around a bit longer to be safe.
|
||||
KOKKOS_ENABLE_OPTION(IMPL_CUDA_MALLOC_ASYNC ON "Whether to enable CudaMallocAsync (requires CUDA Toolkit 11.2)")
|
||||
KOKKOS_ENABLE_OPTION(IMPL_NVHPC_AS_DEVICE_COMPILER OFF "Whether to allow nvc++ as Cuda device compiler")
|
||||
KOKKOS_ENABLE_OPTION(IMPL_CUDA_UNIFIED_MEMORY OFF "Whether to leverage unified memory architectures for CUDA")
|
||||
|
||||
KOKKOS_ENABLE_OPTION(DEPRECATED_CODE_4 ON "Whether code deprecated in major release 4 is available" )
|
||||
KOKKOS_ENABLE_OPTION(DEPRECATION_WARNINGS ON "Whether to emit deprecation warnings" )
|
||||
KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP")
|
||||
@ -135,7 +137,7 @@ FUNCTION(check_device_specific_options)
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
|
||||
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC IMPL_CUDA_UNIFIED_MEMORY)
|
||||
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE)
|
||||
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH)
|
||||
|
||||
|
||||
@ -37,6 +37,17 @@
|
||||
#endif
|
||||
///@}
|
||||
|
||||
/// Some tests are skipped for unified memory space
|
||||
#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
|
||||
#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE \
|
||||
if constexpr (std::is_same_v<typename TEST_EXECSPACE::memory_space, \
|
||||
Kokkos::CudaSpace>) \
|
||||
GTEST_SKIP() << "skipping since unified memory requires additional " \
|
||||
"fences";
|
||||
#else
|
||||
#define GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE
|
||||
#endif
|
||||
|
||||
TEST(TEST_CATEGORY, resize_realloc_no_init_dualview) {
|
||||
using namespace Kokkos::Test::Tools;
|
||||
listen_tool_events(Config::DisableAll(), Config::EnableKernels());
|
||||
@ -657,6 +668,7 @@ TEST(TEST_CATEGORY, create_mirror_no_init_dynamicview) {
|
||||
|
||||
TEST(TEST_CATEGORY, create_mirror_view_and_copy_dynamicview) {
|
||||
GTEST_SKIP_IF_CUDAUVM_MEMORY_SPACE
|
||||
GTEST_SKIP_IF_UNIFIED_MEMORY_SPACE
|
||||
|
||||
using namespace Kokkos::Test::Tools;
|
||||
listen_tool_events(Config::DisableAll(), Config::EnableKernels(),
|
||||
|
||||
@ -31,7 +31,6 @@
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
|
||||
//#include <Cuda/Kokkos_Cuda_BlockSize_Deduction.hpp>
|
||||
#include <impl/Kokkos_Error.hpp>
|
||||
|
||||
#include <impl/Kokkos_Tools.hpp>
|
||||
@ -178,6 +177,29 @@ void *impl_allocate_common(const int device_id,
|
||||
cudaError_t error_code = cudaSuccess;
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION undefined!
|
||||
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
|
||||
// This is intended for Grace-Hopper (and future unified memory architectures)
|
||||
// The idea is to use host allocator and then advise to keep it in HBM on the
|
||||
// device, but that requires CUDA 12.2
|
||||
static_assert(CUDART_VERSION >= 12020,
|
||||
"CUDA runtime version >=12.2 required when "
|
||||
"Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY is set. "
|
||||
"Please update your CUDA runtime version or "
|
||||
"reconfigure with "
|
||||
"-D Kokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=OFF");
|
||||
if (arg_alloc_size) { // cudaMemAdvise_v2 does not work with nullptr
|
||||
error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal);
|
||||
if (error_code == cudaSuccess) {
|
||||
// One would think cudaMemLocation{device_id,
|
||||
// cudaMemLocationTypeDevice} would work but it doesn't. I.e. the order of
|
||||
// members doesn't seem to be defined.
|
||||
cudaMemLocation loc;
|
||||
loc.id = device_id;
|
||||
loc.type = cudaMemLocationTypeDevice;
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemAdvise_v2(
|
||||
ptr, arg_alloc_size, cudaMemAdviseSetPreferredLocation, loc));
|
||||
}
|
||||
}
|
||||
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
|
||||
if (arg_alloc_size >= memory_threshold_g) {
|
||||
error_code = cudaMallocAsync(&ptr, arg_alloc_size, stream);
|
||||
@ -190,9 +212,13 @@ void *impl_allocate_common(const int device_id,
|
||||
"Kokkos::Cuda: backend fence after async malloc");
|
||||
}
|
||||
}
|
||||
} else
|
||||
} else {
|
||||
error_code = cudaMalloc(&ptr, arg_alloc_size);
|
||||
}
|
||||
#else
|
||||
error_code = cudaMalloc(&ptr, arg_alloc_size);
|
||||
#endif
|
||||
{ error_code = cudaMalloc(&ptr, arg_alloc_size); }
|
||||
|
||||
if (error_code != cudaSuccess) { // TODO tag as unlikely branch
|
||||
// This is the only way to clear the last error, which
|
||||
// we should do here since we're turning it into an
|
||||
@ -326,6 +352,9 @@ void CudaSpace::impl_deallocate(
|
||||
}
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION undefined!
|
||||
#elif defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(m_device));
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(arg_alloc_ptr));
|
||||
#elif (defined(KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC) && CUDART_VERSION >= 11020)
|
||||
if (arg_alloc_size >= memory_threshold_g) {
|
||||
Impl::cuda_device_synchronize(
|
||||
@ -436,8 +465,12 @@ void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes,
|
||||
|
||||
#include <impl/Kokkos_SharedAlloc_timpl.hpp>
|
||||
|
||||
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
|
||||
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
|
||||
Kokkos::CudaSpace);
|
||||
#else
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(Kokkos::CudaSpace);
|
||||
#endif
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
|
||||
Kokkos::CudaUVMSpace);
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION(
|
||||
|
||||
@ -88,6 +88,19 @@ class CudaSpace {
|
||||
void* allocate(const char* arg_label, const size_t arg_alloc_size,
|
||||
const size_t arg_logical_size = 0) const;
|
||||
|
||||
#if defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
|
||||
template <typename ExecutionSpace>
|
||||
void* allocate(const ExecutionSpace&, const size_t arg_alloc_size) const {
|
||||
return allocate(arg_alloc_size);
|
||||
}
|
||||
template <typename ExecutionSpace>
|
||||
void* allocate(const ExecutionSpace&, const char* arg_label,
|
||||
const size_t arg_alloc_size,
|
||||
const size_t arg_logical_size = 0) const {
|
||||
return allocate(arg_label, arg_alloc_size, arg_logical_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**\brief Deallocate untracked memory in the cuda space */
|
||||
void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
|
||||
void deallocate(const char* arg_label, void* const arg_alloc_ptr,
|
||||
@ -337,7 +350,11 @@ static_assert(
|
||||
template <>
|
||||
struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
|
||||
enum : bool { assignable = false };
|
||||
enum : bool { accessible = false };
|
||||
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
|
||||
enum : bool{accessible = false};
|
||||
#else
|
||||
enum : bool { accessible = true };
|
||||
#endif
|
||||
enum : bool { deepcopy = true };
|
||||
};
|
||||
|
||||
@ -558,8 +575,12 @@ struct DeepCopy<HostSpace, MemSpace, ExecutionSpace,
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if !defined(KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY)
|
||||
KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION(
|
||||
Kokkos::CudaSpace);
|
||||
#else
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaSpace);
|
||||
#endif
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaUVMSpace);
|
||||
KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(Kokkos::CudaHostPinnedSpace);
|
||||
|
||||
|
||||
@ -607,6 +607,22 @@ Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default
|
||||
|
||||
//----------------------------------
|
||||
|
||||
#ifdef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
|
||||
// Check if unified memory is available
|
||||
int cuda_result;
|
||||
cudaDeviceGetAttribute(&cuda_result, cudaDevAttrConcurrentManagedAccess,
|
||||
cuda_device_id);
|
||||
if (cuda_result == 0) {
|
||||
Kokkos::abort(
|
||||
"Kokkos::Cuda::initialize ERROR: Unified memory is not available on "
|
||||
"this device\n"
|
||||
"Please recompile Kokkos with "
|
||||
"-DKokkos_ENABLE_IMPL_CUDA_UNIFIED_MEMORY=OFF\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
//----------------------------------
|
||||
|
||||
cudaStream_t singleton_stream;
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaSetDevice(cuda_device_id));
|
||||
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaStreamCreate(&singleton_stream));
|
||||
@ -705,6 +721,10 @@ void Cuda::print_configuration(std::ostream &os, bool /*verbose*/) const {
|
||||
#else
|
||||
os << "no\n";
|
||||
#endif
|
||||
#ifdef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
|
||||
os << " KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY: ";
|
||||
os << "yes\n";
|
||||
#endif
|
||||
|
||||
os << "\nCuda Runtime Configuration:\n";
|
||||
|
||||
|
||||
@ -571,6 +571,8 @@ inline constexpr Kokkos::ALL_t ALL{};
|
||||
#pragma omp end declare target
|
||||
#endif
|
||||
|
||||
inline constexpr Kokkos::Impl::SequentialHostInit_t SequentialHostInit{};
|
||||
|
||||
inline constexpr Kokkos::Impl::WithoutInitializing_t WithoutInitializing{};
|
||||
|
||||
inline constexpr Kokkos::Impl::AllowPadding_t AllowPadding{};
|
||||
|
||||
@ -113,7 +113,7 @@ int OpenMP::impl_thread_pool_size() const noexcept {
|
||||
}
|
||||
|
||||
int OpenMP::impl_max_hardware_threads() noexcept {
|
||||
return Impl::g_openmp_hardware_max_threads;
|
||||
return Impl::OpenMPInternal::max_hardware_threads();
|
||||
}
|
||||
|
||||
namespace Impl {
|
||||
|
||||
@ -31,12 +31,20 @@
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
|
||||
namespace {
|
||||
int g_openmp_hardware_max_threads = 1;
|
||||
}
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
std::vector<OpenMPInternal *> OpenMPInternal::all_instances;
|
||||
std::mutex OpenMPInternal::all_instances_mutex;
|
||||
|
||||
int OpenMPInternal::max_hardware_threads() noexcept {
|
||||
return g_openmp_hardware_max_threads;
|
||||
}
|
||||
|
||||
void OpenMPInternal::clear_thread_data() {
|
||||
const size_t member_bytes =
|
||||
sizeof(int64_t) *
|
||||
@ -188,9 +196,9 @@ void OpenMPInternal::initialize(int thread_count) {
|
||||
// Before any other call to OMP query the maximum number of threads
|
||||
// and save the value for re-initialization unit testing.
|
||||
|
||||
Impl::g_openmp_hardware_max_threads = get_current_max_threads();
|
||||
g_openmp_hardware_max_threads = get_current_max_threads();
|
||||
|
||||
int process_num_threads = Impl::g_openmp_hardware_max_threads;
|
||||
int process_num_threads = g_openmp_hardware_max_threads;
|
||||
|
||||
if (Kokkos::hwloc::available()) {
|
||||
process_num_threads = Kokkos::hwloc::get_available_numa_count() *
|
||||
@ -203,11 +211,11 @@ void OpenMPInternal::initialize(int thread_count) {
|
||||
// process_num_threads if thread_count > 0, set
|
||||
// g_openmp_hardware_max_threads to thread_count
|
||||
if (thread_count < 0) {
|
||||
thread_count = Impl::g_openmp_hardware_max_threads;
|
||||
thread_count = g_openmp_hardware_max_threads;
|
||||
} else if (thread_count == 0) {
|
||||
if (Impl::g_openmp_hardware_max_threads != process_num_threads) {
|
||||
Impl::g_openmp_hardware_max_threads = process_num_threads;
|
||||
omp_set_num_threads(Impl::g_openmp_hardware_max_threads);
|
||||
if (g_openmp_hardware_max_threads != process_num_threads) {
|
||||
g_openmp_hardware_max_threads = process_num_threads;
|
||||
omp_set_num_threads(g_openmp_hardware_max_threads);
|
||||
}
|
||||
} else {
|
||||
if (Kokkos::show_warnings() && thread_count > process_num_threads) {
|
||||
@ -218,16 +226,16 @@ void OpenMPInternal::initialize(int thread_count) {
|
||||
<< ", requested thread : " << std::setw(3) << thread_count
|
||||
<< std::endl;
|
||||
}
|
||||
Impl::g_openmp_hardware_max_threads = thread_count;
|
||||
omp_set_num_threads(Impl::g_openmp_hardware_max_threads);
|
||||
g_openmp_hardware_max_threads = thread_count;
|
||||
omp_set_num_threads(g_openmp_hardware_max_threads);
|
||||
}
|
||||
|
||||
// setup thread local
|
||||
#pragma omp parallel num_threads(Impl::g_openmp_hardware_max_threads)
|
||||
#pragma omp parallel num_threads(g_openmp_hardware_max_threads)
|
||||
{ Impl::SharedAllocationRecord<void, void>::tracking_enable(); }
|
||||
|
||||
auto &instance = OpenMPInternal::singleton();
|
||||
instance.m_pool_size = Impl::g_openmp_hardware_max_threads;
|
||||
instance.m_pool_size = g_openmp_hardware_max_threads;
|
||||
|
||||
// New, unified host thread team data:
|
||||
{
|
||||
@ -272,10 +280,9 @@ void OpenMPInternal::finalize() {
|
||||
if (this == &singleton()) {
|
||||
auto const &instance = singleton();
|
||||
// Silence Cuda Warning
|
||||
const int nthreads =
|
||||
instance.m_pool_size <= Impl::g_openmp_hardware_max_threads
|
||||
? Impl::g_openmp_hardware_max_threads
|
||||
: instance.m_pool_size;
|
||||
const int nthreads = instance.m_pool_size <= g_openmp_hardware_max_threads
|
||||
? g_openmp_hardware_max_threads
|
||||
: instance.m_pool_size;
|
||||
(void)nthreads;
|
||||
|
||||
#pragma omp parallel num_threads(nthreads)
|
||||
@ -284,7 +291,7 @@ void OpenMPInternal::finalize() {
|
||||
// allow main thread to track
|
||||
Impl::SharedAllocationRecord<void, void>::tracking_enable();
|
||||
|
||||
Impl::g_openmp_hardware_max_threads = 1;
|
||||
g_openmp_hardware_max_threads = 1;
|
||||
}
|
||||
|
||||
m_initialized = false;
|
||||
@ -307,7 +314,7 @@ void OpenMPInternal::print_configuration(std::ostream &s) const {
|
||||
|
||||
if (m_initialized) {
|
||||
const int numa_count = 1;
|
||||
const int core_per_numa = Impl::g_openmp_hardware_max_threads;
|
||||
const int core_per_numa = g_openmp_hardware_max_threads;
|
||||
const int thread_per_core = 1;
|
||||
|
||||
s << " thread_pool_topology[ " << numa_count << " x " << core_per_numa
|
||||
|
||||
@ -47,8 +47,6 @@ namespace Impl {
|
||||
|
||||
class OpenMPInternal;
|
||||
|
||||
inline int g_openmp_hardware_max_threads = 1;
|
||||
|
||||
struct OpenMPTraits {
|
||||
static constexpr int MAX_THREAD_COUNT = 512;
|
||||
};
|
||||
@ -86,6 +84,8 @@ class OpenMPInternal {
|
||||
|
||||
void clear_thread_data();
|
||||
|
||||
static int max_hardware_threads() noexcept;
|
||||
|
||||
int thread_pool_size() const { return m_pool_size; }
|
||||
|
||||
void resize_thread_data(size_t pool_reduce_bytes, size_t team_reduce_bytes,
|
||||
|
||||
@ -105,7 +105,8 @@ class UniqueToken<OpenMP, UniqueTokenScope::Global> {
|
||||
/// \brief upper bound for acquired values, i.e. 0 <= value < size()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int size() const noexcept {
|
||||
KOKKOS_IF_ON_HOST((return Kokkos::Impl::g_openmp_hardware_max_threads;))
|
||||
KOKKOS_IF_ON_HOST(
|
||||
(return Kokkos::Impl::OpenMPInternal::max_hardware_threads();))
|
||||
|
||||
KOKKOS_IF_ON_DEVICE((return 0;))
|
||||
}
|
||||
|
||||
@ -313,6 +313,51 @@ struct ViewValueFunctor<DeviceType, ValueType, true /* is_scalar */> {
|
||||
|
||||
void destroy_shared_allocation() {}
|
||||
};
|
||||
|
||||
template <class DeviceType, class ValueType>
|
||||
struct ViewValueFunctorSequentialHostInit {
|
||||
using ExecSpace = typename DeviceType::execution_space;
|
||||
using MemSpace = typename DeviceType::memory_space;
|
||||
static_assert(SpaceAccessibility<HostSpace, MemSpace>::accessible);
|
||||
|
||||
ValueType* ptr;
|
||||
size_t n;
|
||||
|
||||
ViewValueFunctorSequentialHostInit() = default;
|
||||
|
||||
ViewValueFunctorSequentialHostInit(ExecSpace const& /*arg_space*/,
|
||||
ValueType* const arg_ptr,
|
||||
size_t const arg_n,
|
||||
std::string /*arg_name*/)
|
||||
: ptr(arg_ptr), n(arg_n) {}
|
||||
|
||||
ViewValueFunctorSequentialHostInit(ValueType* const arg_ptr,
|
||||
size_t const arg_n,
|
||||
std::string /*arg_name*/)
|
||||
: ptr(arg_ptr), n(arg_n) {}
|
||||
|
||||
void construct_shared_allocation() {
|
||||
if constexpr (std::is_trivial_v<ValueType>) {
|
||||
// value-initialization is equivalent to filling with zeros
|
||||
std::memset(static_cast<void*>(ptr), 0, n * sizeof(ValueType));
|
||||
} else {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
new (ptr + i) ValueType();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void destroy_shared_allocation() {
|
||||
if constexpr (std::is_trivially_destructible_v<ValueType>) {
|
||||
// do nothing, don't bother calling the destructor
|
||||
} else {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
(ptr + i)->~ValueType();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Kokkos::Impl
|
||||
|
||||
#endif // KOKKOS_VIEW_ALLOC_HPP
|
||||
|
||||
@ -23,12 +23,16 @@
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
struct SequentialHostInit_t {};
|
||||
struct WithoutInitializing_t {};
|
||||
struct AllowPadding_t {};
|
||||
|
||||
template <typename>
|
||||
struct is_view_ctor_property : public std::false_type {};
|
||||
|
||||
template <>
|
||||
struct is_view_ctor_property<SequentialHostInit_t> : public std::true_type {};
|
||||
|
||||
template <>
|
||||
struct is_view_ctor_property<WithoutInitializing_t> : public std::true_type {};
|
||||
|
||||
@ -84,10 +88,10 @@ struct ViewCtorProp<void, CommonViewAllocProp<Specialize, T>> {
|
||||
|
||||
/* Property flags have constexpr value */
|
||||
template <typename P>
|
||||
struct ViewCtorProp<
|
||||
std::enable_if_t<std::is_same<P, AllowPadding_t>::value ||
|
||||
std::is_same<P, WithoutInitializing_t>::value>,
|
||||
P> {
|
||||
struct ViewCtorProp<std::enable_if_t<std::is_same_v<P, AllowPadding_t> ||
|
||||
std::is_same_v<P, WithoutInitializing_t> ||
|
||||
std::is_same_v<P, SequentialHostInit_t>>,
|
||||
P> {
|
||||
ViewCtorProp() = default;
|
||||
ViewCtorProp(const ViewCtorProp &) = default;
|
||||
ViewCtorProp &operator=(const ViewCtorProp &) = default;
|
||||
@ -199,6 +203,11 @@ struct ViewCtorProp : public ViewCtorProp<void, P>... {
|
||||
Kokkos::Impl::has_type<AllowPadding_t, P...>::value;
|
||||
static constexpr bool initialize =
|
||||
!Kokkos::Impl::has_type<WithoutInitializing_t, P...>::value;
|
||||
static constexpr bool sequential_host_init =
|
||||
Kokkos::Impl::has_type<SequentialHostInit_t, P...>::value;
|
||||
static_assert(initialize || !sequential_host_init,
|
||||
"Incompatible WithoutInitializing and SequentialHostInit view "
|
||||
"alloc properties");
|
||||
|
||||
using memory_space = typename var_memory_space::type;
|
||||
using execution_space = typename var_execution_space::type;
|
||||
@ -251,7 +260,9 @@ auto with_properties_if_unset(const ViewCtorProp<P...> &view_ctor_prop,
|
||||
(is_view_label<Property>::value &&
|
||||
!ViewCtorProp<P...>::has_label) ||
|
||||
(std::is_same_v<Property, WithoutInitializing_t> &&
|
||||
ViewCtorProp<P...>::initialize)) {
|
||||
ViewCtorProp<P...>::initialize) ||
|
||||
(std::is_same_v<Property, SequentialHostInit_t> &&
|
||||
!ViewCtorProp<P...>::sequential_host_init)) {
|
||||
using NewViewCtorProp = ViewCtorProp<P..., Property>;
|
||||
NewViewCtorProp new_view_ctor_prop(view_ctor_prop);
|
||||
static_cast<ViewCtorProp<void, Property> &>(new_view_ctor_prop).value =
|
||||
@ -299,7 +310,9 @@ struct WithPropertiesIfUnset<ViewCtorProp<P...>, Property, Properties...> {
|
||||
(is_view_label<Property>::value &&
|
||||
!ViewCtorProp<P...>::has_label) ||
|
||||
(std::is_same_v<Property, WithoutInitializing_t> &&
|
||||
ViewCtorProp<P...>::initialize)) {
|
||||
ViewCtorProp<P...>::initialize) ||
|
||||
(std::is_same_v<Property, SequentialHostInit_t> &&
|
||||
!ViewCtorProp<P...>::sequential_host_init)) {
|
||||
using NewViewCtorProp = ViewCtorProp<P..., Property>;
|
||||
NewViewCtorProp new_view_ctor_prop(view_ctor_prop);
|
||||
static_cast<ViewCtorProp<void, Property> &>(new_view_ctor_prop).value =
|
||||
|
||||
@ -2825,10 +2825,12 @@ class ViewMapping<
|
||||
using memory_space = typename Traits::memory_space;
|
||||
static_assert(
|
||||
SpaceAccessibility<execution_space, memory_space>::accessible);
|
||||
using value_type = typename Traits::value_type;
|
||||
using functor_type =
|
||||
ViewValueFunctor<Kokkos::Device<execution_space, memory_space>,
|
||||
value_type>;
|
||||
using device_type = Kokkos::Device<execution_space, memory_space>;
|
||||
using value_type = typename Traits::value_type;
|
||||
using functor_type = std::conditional_t<
|
||||
alloc_prop::sequential_host_init,
|
||||
ViewValueFunctorSequentialHostInit<device_type, value_type>,
|
||||
ViewValueFunctor<device_type, value_type>>;
|
||||
using record_type =
|
||||
Kokkos::Impl::SharedAllocationRecord<memory_space, functor_type>;
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
|
||||
namespace {
|
||||
|
||||
// User-defined type with a View data member
|
||||
// User-defined types with a View data member
|
||||
template <class V>
|
||||
class S {
|
||||
V v_;
|
||||
@ -28,48 +28,102 @@ class S {
|
||||
public:
|
||||
template <class... Extents>
|
||||
S(std::string label, Extents... extents) : v_(std::move(label), extents...) {}
|
||||
S() = default;
|
||||
KOKKOS_DEFAULTED_FUNCTION S() = default;
|
||||
};
|
||||
|
||||
template <class V>
|
||||
void test_view_of_views() {
|
||||
class N { // not default constructible
|
||||
V v_;
|
||||
|
||||
public:
|
||||
template <class... Extents>
|
||||
N(std::string label, Extents... extents) : v_(std::move(label), extents...) {}
|
||||
};
|
||||
|
||||
template <class V>
|
||||
class H { // constructible and destructible only from on the host side
|
||||
V v_;
|
||||
|
||||
public:
|
||||
template <class... Extents>
|
||||
H(std::string label, Extents... extents) : v_(std::move(label), extents...) {}
|
||||
H() {}
|
||||
~H() {}
|
||||
};
|
||||
|
||||
template <class V>
|
||||
void test_view_of_views_default() {
|
||||
// assigning a default-constructed view to destruct the inner objects
|
||||
using VoV = Kokkos::View<V**, Kokkos::HostSpace>;
|
||||
{ // assigning a default-constructed view to destruct the inner objects
|
||||
VoV vov("vov", 2, 3);
|
||||
V a("a");
|
||||
V b("b");
|
||||
vov(0, 0) = a;
|
||||
vov(1, 0) = a;
|
||||
vov(0, 1) = b;
|
||||
VoV vov("vov", 2, 3);
|
||||
V a("a");
|
||||
V b("b");
|
||||
vov(0, 0) = a;
|
||||
vov(1, 0) = a;
|
||||
vov(0, 1) = b;
|
||||
#ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND
|
||||
vov(0, 0) = V();
|
||||
vov(1, 0) = V();
|
||||
vov(0, 1) = V();
|
||||
vov(0, 0) = V();
|
||||
vov(1, 0) = V();
|
||||
vov(0, 1) = V();
|
||||
#endif
|
||||
}
|
||||
{ // using placement new to construct the inner objects and explicitly
|
||||
// calling the destructor
|
||||
VoV vov(Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3);
|
||||
V a("a");
|
||||
V b("b");
|
||||
new (&vov(0, 0)) V(a);
|
||||
new (&vov(1, 0)) V(a);
|
||||
new (&vov(0, 1)) V(b);
|
||||
#ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND
|
||||
vov(0, 0).~V();
|
||||
vov(1, 0).~V();
|
||||
vov(0, 1).~V();
|
||||
#else
|
||||
// leaks memory
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, view_of_views) {
|
||||
test_view_of_views<Kokkos::View<int, TEST_EXECSPACE>>();
|
||||
test_view_of_views<Kokkos::View<int[4], TEST_EXECSPACE>>();
|
||||
template <class V>
|
||||
void test_view_of_views_without_initializing() {
|
||||
// using placement new to construct the inner objects and explicitly
|
||||
// calling the destructor
|
||||
using VoV = Kokkos::View<V**, Kokkos::HostSpace>;
|
||||
VoV vov(Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3);
|
||||
V a("a");
|
||||
V b("b");
|
||||
new (&vov(0, 0)) V(a);
|
||||
new (&vov(1, 0)) V(a);
|
||||
new (&vov(0, 1)) V(b);
|
||||
#ifndef KOKKOS_ENABLE_IMPL_VIEW_OF_VIEWS_DESTRUCTOR_PRECONDITION_VIOLATION_WORKAROUND
|
||||
vov(0, 0).~V();
|
||||
vov(1, 0).~V();
|
||||
vov(0, 1).~V();
|
||||
#else
|
||||
// leaks memory
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class V>
|
||||
void test_view_of_views_sequential_host_init() {
|
||||
// inner views value-initialized sequentially on the host, and also
|
||||
// sequentially destructed on the host, without the need to cleanup
|
||||
using VoV = Kokkos::View<V**, Kokkos::HostSpace>;
|
||||
VoV vov(Kokkos::view_alloc("vov", Kokkos::SequentialHostInit), 2, 3);
|
||||
V a("a");
|
||||
V b("b");
|
||||
vov(0, 0) = a;
|
||||
vov(1, 0) = a;
|
||||
vov(0, 1) = b;
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, view_of_views_default) {
|
||||
test_view_of_views_default<Kokkos::View<int, TEST_EXECSPACE>>();
|
||||
test_view_of_views_default<Kokkos::View<int[4], TEST_EXECSPACE>>();
|
||||
// User-defined type with View data member
|
||||
test_view_of_views<S<Kokkos::View<float, TEST_EXECSPACE>>>();
|
||||
test_view_of_views_default<S<Kokkos::View<float, TEST_EXECSPACE>>>();
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, view_of_views_without_initializing) {
|
||||
test_view_of_views_without_initializing<Kokkos::View<int, TEST_EXECSPACE>>();
|
||||
test_view_of_views_without_initializing<
|
||||
S<Kokkos::View<float, TEST_EXECSPACE>>>();
|
||||
test_view_of_views_without_initializing<
|
||||
N<Kokkos::View<double, TEST_EXECSPACE>>>();
|
||||
test_view_of_views_without_initializing<
|
||||
H<Kokkos::View<int, TEST_EXECSPACE>>>();
|
||||
}
|
||||
|
||||
TEST(TEST_CATEGORY, test_view_of_views_sequential_host_init) {
|
||||
test_view_of_views_sequential_host_init<Kokkos::View<int, TEST_EXECSPACE>>();
|
||||
test_view_of_views_sequential_host_init<
|
||||
S<Kokkos::View<float, TEST_EXECSPACE>>>();
|
||||
test_view_of_views_sequential_host_init<
|
||||
H<Kokkos::View<int, TEST_EXECSPACE>>>();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -39,9 +39,14 @@ TEST(cuda, space_access) {
|
||||
!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
|
||||
Kokkos::CudaSpace>::assignable);
|
||||
|
||||
#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
|
||||
static_assert(
|
||||
!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
|
||||
Kokkos::CudaSpace>::accessible);
|
||||
#else
|
||||
static_assert(Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
|
||||
Kokkos::CudaSpace>::accessible);
|
||||
#endif
|
||||
|
||||
static_assert(
|
||||
!Kokkos::Impl::MemorySpaceAccess<Kokkos::HostSpace,
|
||||
@ -147,8 +152,13 @@ TEST(cuda, space_access) {
|
||||
Kokkos::SpaceAccessibility<Kokkos::Cuda,
|
||||
Kokkos::CudaHostPinnedSpace>::accessible);
|
||||
|
||||
#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
|
||||
static_assert(!Kokkos::SpaceAccessibility<Kokkos::HostSpace,
|
||||
Kokkos::CudaSpace>::accessible);
|
||||
#else
|
||||
static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace,
|
||||
Kokkos::CudaSpace>::accessible);
|
||||
#endif
|
||||
|
||||
static_assert(Kokkos::SpaceAccessibility<Kokkos::HostSpace,
|
||||
Kokkos::CudaUVMSpace>::accessible);
|
||||
@ -157,8 +167,14 @@ TEST(cuda, space_access) {
|
||||
Kokkos::SpaceAccessibility<Kokkos::HostSpace,
|
||||
Kokkos::CudaHostPinnedSpace>::accessible);
|
||||
|
||||
#ifndef KOKKOS_ENABLE_IMPL_CUDA_UNIFIED_MEMORY
|
||||
static_assert(std::is_same<Kokkos::Impl::HostMirror<Kokkos::CudaSpace>::Space,
|
||||
Kokkos::HostSpace>::value);
|
||||
#else
|
||||
static_assert(std::is_same<Kokkos::Impl::HostMirror<Kokkos::CudaSpace>::Space,
|
||||
Kokkos::Device<Kokkos::HostSpace::execution_space,
|
||||
Kokkos::CudaSpace>>::value);
|
||||
#endif
|
||||
|
||||
static_assert(
|
||||
std::is_same<Kokkos::Impl::HostMirror<Kokkos::CudaUVMSpace>::Space,
|
||||
|
||||
@ -38,3 +38,4 @@ tag: 4.2.01 date: 01:30:2024 master: 71a9bcae release: 221e5f7a
|
||||
tag: 4.3.00 date: 04:03:2024 master: e0dc0128 release: f08217a4
|
||||
tag: 4.3.01 date: 05:07:2024 master: 486cc745 release: 262d2d6e
|
||||
tag: 4.4.00 date: 08:08:2024 master: 6ecdf605 release: 6068673c
|
||||
tag: 4.4.01 date: 09:12:2024 master: 08ceff92 release: 2d60c039
|
||||
|
||||
@ -361,9 +361,7 @@ class simd_mask<std::int32_t, simd_abi::avx2_fixed_size<4>> {
|
||||
};
|
||||
using value_type = bool;
|
||||
using abi_type = simd_abi::avx2_fixed_size<4>;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value)
|
||||
: m_value(_mm_set1_epi32(-std::int32_t(value))) {}
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() {
|
||||
@ -460,9 +458,7 @@ class simd_mask<std::int32_t, simd_abi::avx2_fixed_size<8>> {
|
||||
};
|
||||
using value_type = bool;
|
||||
using abi_type = simd_abi::avx2_fixed_size<8>;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value)
|
||||
: m_value(_mm256_set1_epi32(-std::int32_t(value))) {}
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() {
|
||||
@ -561,9 +557,7 @@ class simd_mask<std::int64_t, simd_abi::avx2_fixed_size<4>> {
|
||||
};
|
||||
using value_type = bool;
|
||||
using abi_type = simd_abi::avx2_fixed_size<4>;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask const&) = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask(simd_mask&&) = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION simd_mask() = default;
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION explicit simd_mask(value_type value)
|
||||
: m_value(_mm256_set1_epi64x(-std::int64_t(value))) {}
|
||||
KOKKOS_IMPL_HOST_FORCEINLINE_FUNCTION static constexpr std::size_t size() {
|
||||
|
||||
@ -22,3 +22,4 @@
|
||||
#include <TestSIMD_GeneratorCtors.hpp>
|
||||
#include <TestSIMD_WhereExpressions.hpp>
|
||||
#include <TestSIMD_Reductions.hpp>
|
||||
#include <TestSIMD_Construction.hpp>
|
||||
|
||||
150
lib/kokkos/simd/unit_tests/include/TestSIMD_Construction.hpp
Normal file
150
lib/kokkos/simd/unit_tests/include/TestSIMD_Construction.hpp
Normal file
@ -0,0 +1,150 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 4.0
|
||||
// Copyright (2022) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://kokkos.org/LICENSE for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_TEST_SIMD_CONSTRUCTION_HPP
|
||||
#define KOKKOS_TEST_SIMD_CONSTRUCTION_HPP
|
||||
|
||||
#include <Kokkos_SIMD.hpp>
|
||||
#include <SIMDTesting_Utilities.hpp>
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
inline void host_test_simd_traits() {
|
||||
using simd_type = Kokkos::Experimental::simd<DataType, Abi>;
|
||||
|
||||
static_assert(std::is_nothrow_default_constructible_v<simd_type>);
|
||||
static_assert(std::is_nothrow_copy_assignable_v<simd_type>);
|
||||
static_assert(std::is_nothrow_copy_constructible_v<simd_type>);
|
||||
static_assert(std::is_nothrow_move_assignable_v<simd_type>);
|
||||
static_assert(std::is_nothrow_move_constructible_v<simd_type>);
|
||||
|
||||
simd_type default_simd, result;
|
||||
simd_type test_simd(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); });
|
||||
simd_type copy_simd(test_simd);
|
||||
simd_type move_simd(std::move(copy_simd));
|
||||
default_simd = std::move(move_simd);
|
||||
result = default_simd;
|
||||
EXPECT_TRUE(all_of(test_simd == result));
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
inline void host_test_mask_traits() {
|
||||
using mask_type = Kokkos::Experimental::simd_mask<DataType, Abi>;
|
||||
|
||||
static_assert(std::is_nothrow_default_constructible_v<mask_type>);
|
||||
static_assert(std::is_nothrow_copy_assignable_v<mask_type>);
|
||||
static_assert(std::is_nothrow_copy_constructible_v<mask_type>);
|
||||
static_assert(std::is_nothrow_move_assignable_v<mask_type>);
|
||||
static_assert(std::is_nothrow_move_constructible_v<mask_type>);
|
||||
|
||||
mask_type default_mask, result;
|
||||
mask_type test_mask(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); });
|
||||
mask_type copy_mask(test_mask);
|
||||
mask_type move_mask(std::move(copy_mask));
|
||||
default_mask = std::move(move_mask);
|
||||
result = default_mask;
|
||||
EXPECT_EQ(test_mask, result);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
inline void host_check_construction() {
|
||||
if constexpr (is_type_v<Kokkos::Experimental::simd<DataType, Abi>>) {
|
||||
host_test_simd_traits<Abi, DataType>();
|
||||
host_test_mask_traits<Abi, DataType>();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Abi, typename... DataTypes>
|
||||
inline void host_check_construction_all_types(
|
||||
Kokkos::Experimental::Impl::data_types<DataTypes...>) {
|
||||
(host_check_construction<Abi, DataTypes>(), ...);
|
||||
}
|
||||
|
||||
template <typename... Abis>
|
||||
inline void host_check_construction_all_abis(
|
||||
Kokkos::Experimental::Impl::abi_set<Abis...>) {
|
||||
using DataTypes = Kokkos::Experimental::Impl::data_type_set;
|
||||
(host_check_construction_all_types<Abis>(DataTypes()), ...);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
KOKKOS_INLINE_FUNCTION void device_test_simd_traits() {
|
||||
using simd_type = Kokkos::Experimental::simd<DataType, Abi>;
|
||||
|
||||
simd_type default_simd, result;
|
||||
simd_type test_simd(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); });
|
||||
simd_type copy_simd(test_simd);
|
||||
simd_type move_simd(std::move(copy_simd));
|
||||
default_simd = std::move(move_simd);
|
||||
result = default_simd;
|
||||
|
||||
kokkos_checker checker;
|
||||
checker.truth(all_of(test_simd == result));
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
KOKKOS_INLINE_FUNCTION void device_test_mask_traits() {
|
||||
using mask_type = Kokkos::Experimental::simd_mask<DataType, Abi>;
|
||||
|
||||
mask_type default_mask, result;
|
||||
mask_type test_mask(KOKKOS_LAMBDA(std::size_t i) { return (i % 2 == 0); });
|
||||
mask_type copy_mask(test_mask);
|
||||
mask_type move_mask(std::move(copy_mask));
|
||||
default_mask = std::move(move_mask);
|
||||
result = default_mask;
|
||||
|
||||
kokkos_checker checker;
|
||||
checker.truth(test_mask == result);
|
||||
}
|
||||
|
||||
template <typename Abi, typename DataType>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_construction() {
|
||||
if constexpr (is_type_v<Kokkos::Experimental::simd<DataType, Abi>>) {
|
||||
device_test_simd_traits<Abi, DataType>();
|
||||
device_test_mask_traits<Abi, DataType>();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Abi, typename... DataTypes>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_construction_all_types(
|
||||
Kokkos::Experimental::Impl::data_types<DataTypes...>) {
|
||||
(device_check_construction<Abi, DataTypes>(), ...);
|
||||
}
|
||||
|
||||
template <typename... Abis>
|
||||
KOKKOS_INLINE_FUNCTION void device_check_construction_all_abis(
|
||||
Kokkos::Experimental::Impl::abi_set<Abis...>) {
|
||||
using DataTypes = Kokkos::Experimental::Impl::data_type_set;
|
||||
(device_check_construction_all_types<Abis>(DataTypes()), ...);
|
||||
}
|
||||
|
||||
class simd_device_construction_functor {
|
||||
public:
|
||||
KOKKOS_INLINE_FUNCTION void operator()(int) const {
|
||||
device_check_construction_all_abis(
|
||||
Kokkos::Experimental::Impl::device_abi_set());
|
||||
}
|
||||
};
|
||||
|
||||
TEST(simd, host_construction) {
|
||||
host_check_construction_all_abis(Kokkos::Experimental::Impl::host_abi_set());
|
||||
}
|
||||
|
||||
TEST(simd, device_construction) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::IndexType<int>>(0, 1),
|
||||
simd_device_construction_functor());
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user