pull in kokkos-3.1.01
This commit is contained in:
@ -97,7 +97,9 @@ __device__ inline
|
||||
// Depending on the ValueType _shared__ memory must be aligned up to 8byte
|
||||
// boundaries The reason not to use ValueType directly is that for types with
|
||||
// constructors it could lead to race conditions
|
||||
__shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
|
||||
alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType)
|
||||
: alignof(double))
|
||||
__shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
|
||||
ValueType* result = (ValueType*)&sh_result;
|
||||
const int step = 32 / blockDim.x;
|
||||
int shift = STEP_WIDTH;
|
||||
@ -282,7 +284,9 @@ __device__ inline
|
||||
// Depending on the ValueType _shared__ memory must be aligned up to 8byte
|
||||
// boundaries The reason not to use ValueType directly is that for types with
|
||||
// constructors it could lead to race conditions
|
||||
__shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
|
||||
alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType)
|
||||
: alignof(double))
|
||||
__shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
|
||||
ValueType* result = (ValueType*)&sh_result;
|
||||
const int step = 32 / blockDim.x;
|
||||
int shift = STEP_WIDTH;
|
||||
|
||||
@ -81,18 +81,19 @@ struct in_place_shfl_op {
|
||||
union conv_type {
|
||||
Scalar orig;
|
||||
shfl_type conv;
|
||||
// This should be fine, members get explicitly reset, which changes the
|
||||
// active member
|
||||
KOKKOS_FUNCTION conv_type() { conv = 0; }
|
||||
};
|
||||
conv_type tmp_in;
|
||||
tmp_in.orig = in;
|
||||
conv_type tmp_out;
|
||||
tmp_out.conv = tmp_in.conv;
|
||||
shfl_type tmp_out;
|
||||
tmp_out = reinterpret_cast<shfl_type&>(tmp_in.orig);
|
||||
conv_type res;
|
||||
//------------------------------------------------
|
||||
res.conv = self().do_shfl_op(
|
||||
mask, reinterpret_cast<shfl_type const&>(tmp_out.conv), lane_or_delta,
|
||||
width);
|
||||
res.conv = self().do_shfl_op(mask, tmp_out, lane_or_delta, width);
|
||||
//------------------------------------------------
|
||||
out = res.orig;
|
||||
out = reinterpret_cast<Scalar&>(res.conv);
|
||||
}
|
||||
|
||||
// TODO: figure out why 64-bit shfl fails in Clang
|
||||
|
||||
@ -56,6 +56,8 @@
|
||||
|
||||
#include <Kokkos_HostSpace.hpp>
|
||||
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
|
||||
#include <Cuda/Kokkos_Cuda_abort.hpp>
|
||||
|
||||
#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
|
||||
|
||||
@ -59,7 +59,7 @@
|
||||
#include <Kokkos_TaskPolicy.hpp>
|
||||
#include <Kokkos_Layout.hpp>
|
||||
#include <impl/Kokkos_Tags.hpp>
|
||||
|
||||
#include <impl/Kokkos_Profiling_Interface.hpp>
|
||||
#include <KokkosExp_MDRangePolicy.hpp>
|
||||
/*--------------------------------------------------------------------------*/
|
||||
|
||||
@ -124,8 +124,9 @@ class OpenMPTarget {
|
||||
namespace Profiling {
|
||||
namespace Experimental {
|
||||
template <>
|
||||
struct DeviceTypeTraits<Experimental::OpenMPTarget> {
|
||||
static constexpr DeviceType id = DeviceType::OpenMPTarget;
|
||||
struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> {
|
||||
static constexpr DeviceType id =
|
||||
::Kokkos::Profiling::Experimental::DeviceType::OpenMPTarget;
|
||||
};
|
||||
} // namespace Experimental
|
||||
} // namespace Profiling
|
||||
|
||||
@ -1286,8 +1286,8 @@ struct ViewOffset<
|
||||
/* Span of the range space */
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr size_type span() const {
|
||||
return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
|
||||
m_dim.N6 * m_dim.N7;
|
||||
return (m_dim.N0 > size_type(0) ? m_stride : size_type(0)) * m_dim.N1 *
|
||||
m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
||||
@ -1882,7 +1882,9 @@ struct ViewOffset<
|
||||
|
||||
/* Span of the range space */
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr size_type span() const { return m_dim.N0 * m_stride; }
|
||||
constexpr size_type span() const {
|
||||
return size() > 0 ? m_dim.N0 * m_stride : 0;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
||||
return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 *
|
||||
@ -2398,14 +2400,16 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> {
|
||||
/* Span of the range space, largest stride * dimension */
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
constexpr size_type span() const {
|
||||
return Max(m_dim.N0 * m_stride.S0,
|
||||
Max(m_dim.N1 * m_stride.S1,
|
||||
Max(m_dim.N2 * m_stride.S2,
|
||||
Max(m_dim.N3 * m_stride.S3,
|
||||
Max(m_dim.N4 * m_stride.S4,
|
||||
Max(m_dim.N5 * m_stride.S5,
|
||||
Max(m_dim.N6 * m_stride.S6,
|
||||
m_dim.N7 * m_stride.S7)))))));
|
||||
return size() == size_type(0)
|
||||
? size_type(0)
|
||||
: Max(m_dim.N0 * m_stride.S0,
|
||||
Max(m_dim.N1 * m_stride.S1,
|
||||
Max(m_dim.N2 * m_stride.S2,
|
||||
Max(m_dim.N3 * m_stride.S3,
|
||||
Max(m_dim.N4 * m_stride.S4,
|
||||
Max(m_dim.N5 * m_stride.S5,
|
||||
Max(m_dim.N6 * m_stride.S6,
|
||||
m_dim.N7 * m_stride.S7)))))));
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
|
||||
|
||||
Reference in New Issue
Block a user