pull in kokkos-3.1.01

This commit is contained in:
Christoph Junghans
2020-05-07 10:22:42 -06:00
parent 66994562e6
commit 69a6a8e064
11 changed files with 59 additions and 26 deletions

View File

@ -97,7 +97,9 @@ __device__ inline
// Depending on the ValueType _shared__ memory must be aligned up to 8byte
// boundaries The reason not to use ValueType directly is that for types with
// constructors it could lead to race conditions
__shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType)
: alignof(double))
__shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
ValueType* result = (ValueType*)&sh_result;
const int step = 32 / blockDim.x;
int shift = STEP_WIDTH;
@ -282,7 +284,9 @@ __device__ inline
// Depending on the ValueType _shared__ memory must be aligned up to 8byte
// boundaries The reason not to use ValueType directly is that for types with
// constructors it could lead to race conditions
__shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
alignas(alignof(ValueType) > alignof(double) ? alignof(ValueType)
: alignof(double))
__shared__ double sh_result[(sizeof(ValueType) + 7) / 8 * STEP_WIDTH];
ValueType* result = (ValueType*)&sh_result;
const int step = 32 / blockDim.x;
int shift = STEP_WIDTH;

View File

@ -81,18 +81,19 @@ struct in_place_shfl_op {
union conv_type {
Scalar orig;
shfl_type conv;
// This should be fine, members get explicitly reset, which changes the
// active member
KOKKOS_FUNCTION conv_type() { conv = 0; }
};
conv_type tmp_in;
tmp_in.orig = in;
conv_type tmp_out;
tmp_out.conv = tmp_in.conv;
shfl_type tmp_out;
tmp_out = reinterpret_cast<shfl_type&>(tmp_in.orig);
conv_type res;
//------------------------------------------------
res.conv = self().do_shfl_op(
mask, reinterpret_cast<shfl_type const&>(tmp_out.conv), lane_or_delta,
width);
res.conv = self().do_shfl_op(mask, tmp_out, lane_or_delta, width);
//------------------------------------------------
out = res.orig;
out = reinterpret_cast<Scalar&>(res.conv);
}
// TODO: figure out why 64-bit shfl fails in Clang

View File

@ -56,6 +56,8 @@
#include <Kokkos_HostSpace.hpp>
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <Cuda/Kokkos_Cuda_abort.hpp>
#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST

View File

@ -59,7 +59,7 @@
#include <Kokkos_TaskPolicy.hpp>
#include <Kokkos_Layout.hpp>
#include <impl/Kokkos_Tags.hpp>
#include <impl/Kokkos_Profiling_Interface.hpp>
#include <KokkosExp_MDRangePolicy.hpp>
/*--------------------------------------------------------------------------*/
@ -124,8 +124,9 @@ class OpenMPTarget {
namespace Profiling {
namespace Experimental {
template <>
struct DeviceTypeTraits<Experimental::OpenMPTarget> {
static constexpr DeviceType id = DeviceType::OpenMPTarget;
struct DeviceTypeTraits<::Kokkos::Experimental::OpenMPTarget> {
static constexpr DeviceType id =
::Kokkos::Profiling::Experimental::DeviceType::OpenMPTarget;
};
} // namespace Experimental
} // namespace Profiling

View File

@ -1286,8 +1286,8 @@ struct ViewOffset<
/* Span of the range space */
KOKKOS_INLINE_FUNCTION
constexpr size_type span() const {
return m_stride * m_dim.N1 * m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 *
m_dim.N6 * m_dim.N7;
return (m_dim.N0 > size_type(0) ? m_stride : size_type(0)) * m_dim.N1 *
m_dim.N2 * m_dim.N3 * m_dim.N4 * m_dim.N5 * m_dim.N6 * m_dim.N7;
}
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
@ -1882,7 +1882,9 @@ struct ViewOffset<
/* Span of the range space */
KOKKOS_INLINE_FUNCTION
constexpr size_type span() const { return m_dim.N0 * m_stride; }
constexpr size_type span() const {
return size() > 0 ? m_dim.N0 * m_stride : 0;
}
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {
return m_stride == m_dim.N7 * m_dim.N6 * m_dim.N5 * m_dim.N4 * m_dim.N3 *
@ -2398,14 +2400,16 @@ struct ViewOffset<Dimension, Kokkos::LayoutStride, void> {
/* Span of the range space, largest stride * dimension */
KOKKOS_INLINE_FUNCTION
constexpr size_type span() const {
return Max(m_dim.N0 * m_stride.S0,
Max(m_dim.N1 * m_stride.S1,
Max(m_dim.N2 * m_stride.S2,
Max(m_dim.N3 * m_stride.S3,
Max(m_dim.N4 * m_stride.S4,
Max(m_dim.N5 * m_stride.S5,
Max(m_dim.N6 * m_stride.S6,
m_dim.N7 * m_stride.S7)))))));
return size() == size_type(0)
? size_type(0)
: Max(m_dim.N0 * m_stride.S0,
Max(m_dim.N1 * m_stride.S1,
Max(m_dim.N2 * m_stride.S2,
Max(m_dim.N3 * m_stride.S3,
Max(m_dim.N4 * m_stride.S4,
Max(m_dim.N5 * m_stride.S5,
Max(m_dim.N6 * m_stride.S6,
m_dim.N7 * m_stride.S7)))))));
}
KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const {