Files
lammps/lib/kokkos/containers/src/Kokkos_ScatterView.hpp
2019-06-28 11:23:24 -06:00

1319 lines
46 KiB
C++

/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_ScatterView.hpp
/// \brief Declaration and definition of Kokkos::ScatterView.
///
/// This header file declares and defines Kokkos::ScatterView and its
/// related nonmember functions.
#ifndef KOKKOS_SCATTER_VIEW_HPP
#define KOKKOS_SCATTER_VIEW_HPP
#include <Kokkos_Core.hpp>
#include <utility>
namespace Kokkos {
namespace Experimental {
/*
* Reduction Type list
* - These corresponds to subset of the reducers in parallel_reduce
* - See Implementations of ScatterValue for details.
*/
enum : int {
ScatterSum,
ScatterProd,
ScatterMax,
ScatterMin,
};
enum : int {
ScatterNonDuplicated = 0,
ScatterDuplicated = 1
};
enum : int {
ScatterNonAtomic = 0,
ScatterAtomic = 1
};
}} // Kokkos::Experimental
namespace Kokkos {
namespace Impl {
namespace Experimental {
template <typename ExecSpace>
struct DefaultDuplication;
template <typename ExecSpace, int duplication>
struct DefaultContribution;
#ifdef KOKKOS_ENABLE_SERIAL
template <>
struct DefaultDuplication<Kokkos::Serial> {
enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Serial, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
template <>
struct DefaultContribution<Kokkos::Serial, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
#endif
#ifdef KOKKOS_ENABLE_OPENMP
template <>
struct DefaultDuplication<Kokkos::OpenMP> {
enum : int { value = Kokkos::Experimental::ScatterDuplicated };
};
template <>
struct DefaultContribution<Kokkos::OpenMP, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::OpenMP, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
#endif
#ifdef KOKKOS_ENABLE_HPX
template <>
struct DefaultDuplication<Kokkos::Experimental::HPX> {
enum : int { value = Kokkos::Experimental::ScatterDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Experimental::HPX, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::Experimental::HPX, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
#endif
#ifdef KOKKOS_ENABLE_THREADS
template <>
struct DefaultDuplication<Kokkos::Threads> {
enum : int { value = Kokkos::Experimental::ScatterDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Threads, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::Threads, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterNonAtomic };
};
#endif
#ifdef KOKKOS_ENABLE_CUDA
template <>
struct DefaultDuplication<Kokkos::Cuda> {
enum : int { value = Kokkos::Experimental::ScatterNonDuplicated };
};
template <>
struct DefaultContribution<Kokkos::Cuda, Kokkos::Experimental::ScatterNonDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
template <>
struct DefaultContribution<Kokkos::Cuda, Kokkos::Experimental::ScatterDuplicated> {
enum : int { value = Kokkos::Experimental::ScatterAtomic };
};
#endif
/* ScatterValue <Op=ScatterSum, contribution=ScatterNonAtomic> is the object returned by the access operator() of ScatterAccess,
This class inherits from the Sum<> reducer and it wraps join(dest, src) with convenient operator+=, etc.
Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions
See ReduceDuplicates and ResetDuplicates ) */
template <typename ValueType, int Op, int contribution>
struct ScatterValue;
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonAtomic> :
Sum<ValueType,Kokkos::DefaultExecutionSpace> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) :
Sum<ValueType,Kokkos::DefaultExecutionSpace>(value_in)
{}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) :
Sum<ValueType,Kokkos::DefaultExecutionSpace>(other.reference())
{}
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
this->join( this->reference(), -rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void reset() {
this->init( this->reference() );
}
};
/* ScatterValue <Op=ScatterSum, contribution=ScatterAtomic> is the object returned by the access operator()
* of ScatterAccess, similar to that returned by an Atomic View, it wraps Kokkos::atomic_add with convenient
operator+=, etc. This version also has the update(rhs) and reset() functions. */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterAtomic> :
Sum<ValueType,Kokkos::DefaultExecutionSpace> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) :
Sum<ValueType,Kokkos::DefaultExecutionSpace>(value_in)
{}
KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) {
this->join(this->reference(), rhs);
}
KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) {
this->join(this->reference(), -rhs);
}
KOKKOS_INLINE_FUNCTION
void join(ValueType& dest, const ValueType& src) const {
Kokkos::atomic_add(&dest, src);
}
KOKKOS_INLINE_FUNCTION
void join(volatile ValueType& dest, const volatile ValueType& src) const {
Kokkos::atomic_add(&dest, src);
}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void reset() {
this->init( this->reference() );
}
};
/* ScatterValue <Op=ScatterProd, contribution=ScatterNonAtomic> is the object returned by the access operator() of ScatterAccess,
This class inherits from the Prod<> reducer and it wraps join(dest, src) with convenient operator*=, etc.
Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions
See ReduceDuplicates and ResetDuplicates ) */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, Kokkos::Experimental::ScatterNonAtomic> :
Prod<ValueType,Kokkos::DefaultExecutionSpace> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) :
Prod<ValueType,Kokkos::DefaultExecutionSpace>(value_in)
{}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) :
Prod<ValueType,Kokkos::DefaultExecutionSpace>(other.reference())
{}
KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
this->join( this->reference(), static_cast<ValueType>(1)/rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void reset() {
this->init( this->reference() );
}
};
/* ScatterValue <Op=ScatterProd, contribution=ScatterAtomic> is the object returned by the access operator()
* of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_prod with convenient
operator*=, etc. atomic_prod uses the atomic_compare_exchange. This version also has the update(rhs) and reset() functions. */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterProd, Kokkos::Experimental::ScatterAtomic> :
Prod<ValueType,Kokkos::DefaultExecutionSpace> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) :
Prod<ValueType,Kokkos::DefaultExecutionSpace>(value_in)
{}
KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) {
this->join(this->reference(), rhs);
}
KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) {
this->join(this->reference(), static_cast<ValueType>(1)/rhs);
}
KOKKOS_FORCEINLINE_FUNCTION
void atomic_prod(ValueType & dest, const ValueType& src) const {
bool success = false;
while(!success) {
ValueType dest_old = dest;
ValueType dest_new = dest_old * src;
dest_new = Kokkos::atomic_compare_exchange<ValueType>(&dest,dest_old,dest_new);
success = ( (dest_new - dest_old)/dest_old <= 1e-15 );
}
}
KOKKOS_INLINE_FUNCTION
void join(ValueType& dest, const ValueType& src) const {
atomic_prod(dest, src);
}
KOKKOS_INLINE_FUNCTION
void join(volatile ValueType& dest, const volatile ValueType& src) const {
atomic_prod(dest, src);
}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void reset() {
this->init( this->reference() );
}
};
/* ScatterValue <Op=ScatterMin, contribution=ScatterNonAtomic> is the object returned by the access operator() of ScatterAccess,
This class inherits from the Min<> reducer and it wraps join(dest, src) with convenient update(rhs).
Note the addition of update(ValueType const& rhs) and reset() are so that all reducers can have a common update function
See ReduceDuplicates and ResetDuplicates ) */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, Kokkos::Experimental::ScatterNonAtomic> :
Min<ValueType,Kokkos::DefaultExecutionSpace> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) :
Min<ValueType,Kokkos::DefaultExecutionSpace>(value_in)
{}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) :
Min<ValueType,Kokkos::DefaultExecutionSpace>(other.reference())
{}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void reset() {
this->init( this->reference() );
}
};
/* ScatterValue <Op=ScatterMin, contribution=ScatterAtomic> is the object returned by the access operator()
* of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_min with the update(rhs)
function. atomic_min uses the atomic_compare_exchange. This version also has the reset() function */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMin, Kokkos::Experimental::ScatterAtomic> :
Min<ValueType,Kokkos::DefaultExecutionSpace> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) :
Min<ValueType,Kokkos::DefaultExecutionSpace>(value_in)
{}
KOKKOS_FORCEINLINE_FUNCTION
void atomic_min(ValueType & dest, const ValueType& src) const {
bool success = false;
while(!success) {
ValueType dest_old = dest;
ValueType dest_new = ( dest_old > src ) ? src : dest_old;
dest_new = Kokkos::atomic_compare_exchange<ValueType>(&dest,dest_old,dest_new);
success = ( (dest_new - dest_old)/dest_old <= 1e-15 );
}
}
KOKKOS_INLINE_FUNCTION
void join(ValueType& dest, const ValueType& src) const {
atomic_min(dest, src);
}
KOKKOS_INLINE_FUNCTION
void join(volatile ValueType& dest, const volatile ValueType& src) const {
atomic_min(dest, src);
}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void reset() {
this->init( this->reference() );
}
};
/* ScatterValue <Op=ScatterMax, contribution=ScatterNonAtomic> is the object returned by the access operataor() of ScatterAccess,
This class inherits from the Max<> reducer and it wraps join(dest, src) with convenient update(rhs).
Note the addition of update(ValueType const& rhs) and reset() are so that all reducers can have a common update function
See ReduceDuplicates and ResetDuplicates ) */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, Kokkos::Experimental::ScatterNonAtomic> :
Max<ValueType,Kokkos::DefaultExecutionSpace> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) :
Max<ValueType,Kokkos::DefaultExecutionSpace>(value_in)
{}
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) :
Max<ValueType,Kokkos::DefaultExecutionSpace>(other.reference())
{}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void reset() {
this->init( this->reference() );
}
};
/* ScatterValue <Op=ScatterMax, contribution=ScatterAtomic> is the object returned by the access operator()
* of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_max with the update(rhs)
function. atomic_max uses the atomic_compare_exchange. This version also has the reset() function */
template <typename ValueType>
struct ScatterValue<ValueType, Kokkos::Experimental::ScatterMax, Kokkos::Experimental::ScatterAtomic> :
Max<ValueType,Kokkos::DefaultExecutionSpace> {
public:
KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) :
Max<ValueType,Kokkos::DefaultExecutionSpace>(value_in)
{}
KOKKOS_FORCEINLINE_FUNCTION
void atomic_max(ValueType & dest, const ValueType& src) const {
bool success = false;
while(!success) {
ValueType dest_old = dest;
ValueType dest_new = ( dest_old < src ) ? src : dest_old;
dest_new = Kokkos::atomic_compare_exchange<ValueType>(&dest,dest_old,dest_new);
success = ( (dest_new - dest_old)/dest_old <= 1e-15 );
}
}
KOKKOS_INLINE_FUNCTION
void join(ValueType& dest, const ValueType& src) const {
atomic_max(dest, src);
}
KOKKOS_INLINE_FUNCTION
void join(volatile ValueType& dest, const volatile ValueType& src) const {
atomic_max(dest, src);
}
KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) {
this->join( this->reference(), rhs );
}
KOKKOS_FORCEINLINE_FUNCTION void reset() {
this->init( this->reference() );
}
};
/* DuplicatedDataType, given a View DataType, will create a new DataType
that has a new runtime dimension which becomes the largest-stride dimension.
In the case of LayoutLeft, due to the limitation induced by the design of DataType
itself, it must convert any existing compile-time dimensions into runtime dimensions. */
template <typename T, typename Layout>
struct DuplicatedDataType;
template <typename T>
struct DuplicatedDataType<T, Kokkos::LayoutRight> {
typedef T* value_type; // For LayoutRight, add a star all the way on the left
};
template <typename T, size_t N>
struct DuplicatedDataType<T[N], Kokkos::LayoutRight> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type value_type[N];
};
template <typename T>
struct DuplicatedDataType<T[], Kokkos::LayoutRight> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type value_type[];
};
template <typename T>
struct DuplicatedDataType<T*, Kokkos::LayoutRight> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutRight>::value_type* value_type;
};
template <typename T>
struct DuplicatedDataType<T, Kokkos::LayoutLeft> {
typedef T* value_type;
};
template <typename T, size_t N>
struct DuplicatedDataType<T[N], Kokkos::LayoutLeft> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
};
template <typename T>
struct DuplicatedDataType<T[], Kokkos::LayoutLeft> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
};
template <typename T>
struct DuplicatedDataType<T*, Kokkos::LayoutLeft> {
typedef typename DuplicatedDataType<T, Kokkos::LayoutLeft>::value_type* value_type;
};
/* Insert integer argument pack into array */
template<class T>
void args_to_array(size_t* array, int pos, T dim0) {
array[pos] = dim0;
}
template<class T, class ... Dims>
void args_to_array(size_t* array, int pos, T dim0, Dims ... dims) {
array[pos] = dim0;
args_to_array(array,pos+1,dims...);
}
/* Slice is just responsible for stuffing the correct number of Kokkos::ALL
arguments on the correct side of the index in a call to subview() to get a
subview where the index specified is the largest-stride one. */
template <typename Layout, int rank, typename V, typename ... Args>
struct Slice {
typedef Slice<Layout, rank - 1, V, Kokkos::Impl::ALL_t, Args...> next;
typedef typename next::value_type value_type;
static
value_type get(V const& src, const size_t i, Args ... args) {
return next::get(src, i, Kokkos::ALL, args...);
}
};
template <typename V, typename ... Args>
struct Slice<Kokkos::LayoutRight, 1, V, Args...> {
typedef typename Kokkos::Impl::ViewMapping
< void
, V
, const size_t
, Args ...
>::type value_type;
static
value_type get(V const& src, const size_t i, Args ... args) {
return Kokkos::subview(src, i, args...);
}
};
template <typename V, typename ... Args>
struct Slice<Kokkos::LayoutLeft, 1, V, Args...> {
typedef typename Kokkos::Impl::ViewMapping
< void
, V
, Args ...
, const size_t
>::type value_type;
static
value_type get(V const& src, const size_t i, Args ... args) {
return Kokkos::subview(src, args..., i);
}
};
template <typename ExecSpace, typename ValueType, int Op>
struct ReduceDuplicates;
template <typename ExecSpace, typename ValueType, int Op>
struct ReduceDuplicatesBase {
typedef ReduceDuplicates<ExecSpace, ValueType, Op> Derived;
ValueType const* src;
ValueType* dst;
size_t stride;
size_t start;
size_t n;
ReduceDuplicatesBase(ValueType const* src_in, ValueType* dest_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name)
: src(src_in)
, dst(dest_in)
, stride(stride_in)
, start(start_in)
, n(n_in)
{
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID);
}
#endif
typedef RangePolicy<ExecSpace, size_t> policy_type;
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
const closure_type closure(*(static_cast<Derived*>(this)), policy_type(0, stride));
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelFor(kpID);
}
#endif
}
};
/* ReduceDuplicates -- Perform reduction on destination array using strided source
* Use ScatterValue<> specific to operation to wrap destination array so that
* the reduction operation can be accessed via the update(rhs) function */
template <typename ExecSpace, typename ValueType, int Op>
struct ReduceDuplicates :
public ReduceDuplicatesBase<ExecSpace, ValueType, Op>
{
typedef ReduceDuplicatesBase<ExecSpace, ValueType, Op> Base;
ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name):
Base(src_in, dst_in, stride_in, start_in, n_in, name)
{}
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
for (size_t j = Base::start; j < Base::n; ++j) {
ScatterValue<ValueType, Op, Kokkos::Experimental::ScatterNonAtomic> sv(Base::dst[i]);
sv.update( Base::src[i + Base::stride * j] );
}
}
};
template <typename ExecSpace, typename ValueType, int Op>
struct ResetDuplicates;
template <typename ExecSpace, typename ValueType, int Op>
struct ResetDuplicatesBase {
typedef ResetDuplicates<ExecSpace, ValueType, Op> Derived;
ValueType* data;
ResetDuplicatesBase(ValueType* data_in, size_t size_in, std::string const& name)
: data(data_in)
{
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID);
}
#endif
typedef RangePolicy<ExecSpace, size_t> policy_type;
typedef Kokkos::Impl::ParallelFor<Derived, policy_type> closure_type;
const closure_type closure(*(static_cast<Derived*>(this)), policy_type(0, size_in));
closure.execute();
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelFor(kpID);
}
#endif
}
};
/* ResetDuplicates -- Perform reset on destination array
* Use ScatterValue<> specific to operation to wrap destination array so that
* the reset operation can be accessed via the reset() function */
template <typename ExecSpace, typename ValueType, int Op>
struct ResetDuplicates :
public ResetDuplicatesBase<ExecSpace, ValueType, Op>
{
typedef ResetDuplicatesBase<ExecSpace, ValueType, Op> Base;
ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name):
Base(data_in, size_in, name)
{}
KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const {
ScatterValue<ValueType, Op, Kokkos::Experimental::ScatterNonAtomic> sv(Base::data[i]);
sv.reset();
}
};
}}} // Kokkos::Impl::Experimental
namespace Kokkos {
namespace Experimental {
template <typename DataType
,typename Layout = Kokkos::DefaultExecutionSpace::array_layout
,typename ExecSpace = Kokkos::DefaultExecutionSpace
,int Op = ScatterSum
,int duplication = Kokkos::Impl::Experimental::DefaultDuplication<ExecSpace>::value
,int contribution = Kokkos::Impl::Experimental::DefaultContribution<ExecSpace, duplication>::value
>
class ScatterView;
template <typename DataType
,int Op
,typename ExecSpace
,typename Layout
,int duplication
,int contribution
,int override_contribution
>
class ScatterAccess;
// non-duplicated implementation
template <typename DataType
,int Op
,typename ExecSpace
,typename Layout
,int contribution
>
class ScatterView<DataType
,Layout
,ExecSpace
,Op
,ScatterNonDuplicated
,contribution>
{
public:
typedef Kokkos::View<DataType, Layout, ExecSpace> original_view_type;
typedef typename original_view_type::value_type original_value_type;
typedef typename original_view_type::reference_type original_reference_type;
friend class ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, ScatterNonAtomic>;
friend class ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, ScatterAtomic>;
ScatterView()
{
}
template <typename RT, typename ... RP>
ScatterView(View<RT, RP...> const& original_view)
: internal_view(original_view)
{
}
template <typename ... Dims>
ScatterView(std::string const& name, Dims ... dims)
: internal_view(name, dims ...)
{
}
template <int override_contrib = contribution>
KOKKOS_FORCEINLINE_FUNCTION
ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, override_contrib>
access() const {
return ScatterAccess<DataType, Op, ExecSpace, Layout, ScatterNonDuplicated, contribution, override_contrib>{*this};
}
original_view_type subview() const {
return internal_view;
}
template <typename DT, typename ... RP>
void contribute_into(View<DT, RP...> const& dest) const
{
typedef View<DT, RP...> dest_type;
static_assert(std::is_same<
typename dest_type::array_layout,
Layout>::value,
"ScatterView contribute destination has different layout");
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
typename ExecSpace::memory_space,
typename dest_type::memory_space>::value,
"ScatterView contribute destination memory space not accessible");
if (dest.data() == internal_view.data()) return;
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
dest.data(),
0,
0,
1,
internal_view.label());
}
void reset() {
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
internal_view.size(),
internal_view.label());
}
template <typename DT, typename ... RP>
void reset_except(View<DT, RP...> const& view) {
if (view.data() != internal_view.data()) reset();
}
void resize(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0) {
::Kokkos::resize(internal_view,n0,n1,n2,n3,n4,n5,n6,n7);
}
void realloc(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0,
const size_t n7 = 0) {
::Kokkos::realloc(internal_view,n0,n1,n2,n3,n4,n5,n6,n7);
}
protected:
template <typename ... Args>
KOKKOS_FORCEINLINE_FUNCTION
original_reference_type at(Args ... args) const {
return internal_view(args...);
}
private:
typedef original_view_type internal_view_type;
internal_view_type internal_view;
};
template <typename DataType
,int Op
,typename ExecSpace
,typename Layout
,int contribution
,int override_contribution
>
class ScatterAccess<DataType
,Op
,ExecSpace
,Layout
,ScatterNonDuplicated
,contribution
,override_contribution>
{
public:
typedef ScatterView<DataType, Layout, ExecSpace, Op, ScatterNonDuplicated, contribution> view_type;
typedef typename view_type::original_value_type original_value_type;
typedef Kokkos::Impl::Experimental::ScatterValue<
original_value_type, Op, override_contribution> value_type;
KOKKOS_INLINE_FUNCTION
ScatterAccess() :
view(view_type()) {
}
KOKKOS_INLINE_FUNCTION
ScatterAccess(view_type const& view_in)
: view(view_in)
{
}
KOKKOS_INLINE_FUNCTION
~ScatterAccess()
{
}
template <typename ... Args>
KOKKOS_FORCEINLINE_FUNCTION
value_type operator()(Args ... args) const {
return view.at(args...);
}
template <typename Arg>
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<view_type::original_view_type::rank == 1 &&
std::is_integral<Arg>::value, value_type>::type
operator[](Arg arg) const {
return view.at(arg);
}
private:
view_type const& view;
};
// duplicated implementation
// LayoutLeft and LayoutRight are different enough that we'll just specialize each
template <typename DataType
,int Op
,typename ExecSpace
,int contribution
>
class ScatterView<DataType
,Kokkos::LayoutRight
,ExecSpace
,Op
,ScatterDuplicated
,contribution>
{
public:
typedef Kokkos::View<DataType, Kokkos::LayoutRight, ExecSpace> original_view_type;
typedef typename original_view_type::value_type original_value_type;
typedef typename original_view_type::reference_type original_reference_type;
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, ScatterNonAtomic>;
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, ScatterAtomic>;
typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<DataType, Kokkos::LayoutRight> data_type_info;
typedef typename data_type_info::value_type internal_data_type;
typedef Kokkos::View<internal_data_type, Kokkos::LayoutRight, ExecSpace> internal_view_type;
ScatterView()
{
}
template <typename RT, typename ... RP >
ScatterView(View<RT, RP...> const& original_view)
: unique_token()
, internal_view(Kokkos::ViewAllocateWithoutInitializing(
std::string("duplicated_") + original_view.label()),
unique_token.size(),
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
original_view.extent(0),
original_view.extent(1),
original_view.extent(2),
original_view.extent(3),
original_view.extent(4),
original_view.extent(5),
original_view.extent(6) )
#else
original_view.rank_dynamic > 0 ? original_view.extent(0): KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank_dynamic > 1 ? original_view.extent(1): KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank_dynamic > 2 ? original_view.extent(2): KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank_dynamic > 3 ? original_view.extent(3): KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank_dynamic > 4 ? original_view.extent(4): KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank_dynamic > 5 ? original_view.extent(5): KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank_dynamic > 6 ? original_view.extent(6): KOKKOS_IMPL_CTOR_DEFAULT_ARG)
#endif
{
reset();
}
template <typename ... Dims>
ScatterView(std::string const& name, Dims ... dims)
: internal_view(Kokkos::ViewAllocateWithoutInitializing(name), unique_token.size(), dims ...)
{
reset();
}
template <int override_contribution = contribution>
KOKKOS_FORCEINLINE_FUNCTION
ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, override_contribution>
access() const {
return ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutRight, ScatterDuplicated, contribution, override_contribution>{*this};
}
typename Kokkos::Impl::Experimental::Slice<
Kokkos::LayoutRight, internal_view_type::rank, internal_view_type>::value_type
subview() const
{
return Kokkos::Impl::Experimental::Slice<
Kokkos::LayoutRight, internal_view_type::Rank, internal_view_type>::get(internal_view, 0);
}
template <typename DT, typename ... RP>
void contribute_into(View<DT, RP...> const& dest) const
{
typedef View<DT, RP...> dest_type;
static_assert(std::is_same<
typename dest_type::array_layout,
Kokkos::LayoutRight>::value,
"ScatterView deep_copy destination has different layout");
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
typename ExecSpace::memory_space,
typename dest_type::memory_space>::value,
"ScatterView deep_copy destination memory space not accessible");
bool is_equal = (dest.data() == internal_view.data());
size_t start = is_equal ? 1 : 0;
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
dest.data(),
internal_view.stride(0),
start,
internal_view.extent(0),
internal_view.label());
}
void reset() {
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
internal_view.size(),
internal_view.label());
}
template <typename DT, typename ... RP>
void reset_except(View<DT, RP...> const& view) {
if (view.data() != internal_view.data()) {
reset();
return;
}
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data() + view.size(),
internal_view.size() - view.size(),
internal_view.label());
}
void resize(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0) {
::Kokkos::resize(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6);
}
void realloc(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0) {
::Kokkos::realloc(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6);
}
protected:
template <typename ... Args>
KOKKOS_FORCEINLINE_FUNCTION
original_reference_type at(int rank, Args ... args) const {
return internal_view(rank, args...);
}
protected:
typedef Kokkos::Experimental::UniqueToken<
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type;
unique_token_type unique_token;
internal_view_type internal_view;
};
template <typename DataType
,int Op
,typename ExecSpace
,int contribution
>
class ScatterView<DataType
,Kokkos::LayoutLeft
,ExecSpace
,Op
,ScatterDuplicated
,contribution>
{
public:
typedef Kokkos::View<DataType, Kokkos::LayoutLeft, ExecSpace> original_view_type;
typedef typename original_view_type::value_type original_value_type;
typedef typename original_view_type::reference_type original_reference_type;
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, ScatterNonAtomic>;
friend class ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, ScatterAtomic>;
typedef typename Kokkos::Impl::Experimental::DuplicatedDataType<DataType, Kokkos::LayoutLeft> data_type_info;
typedef typename data_type_info::value_type internal_data_type;
typedef Kokkos::View<internal_data_type, Kokkos::LayoutLeft, ExecSpace> internal_view_type;
ScatterView()
{
}
template <typename RT, typename ... RP >
ScatterView(View<RT, RP...> const& original_view)
: unique_token()
{
size_t arg_N[8] = {
original_view.rank>0?original_view.extent(0):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>1?original_view.extent(1):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>2?original_view.extent(2):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>3?original_view.extent(3):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>4?original_view.extent(4):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>5?original_view.extent(5):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>6?original_view.extent(6):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
KOKKOS_IMPL_CTOR_DEFAULT_ARG
};
arg_N[internal_view_type::rank - 1] = unique_token.size();
internal_view = internal_view_type(
Kokkos::ViewAllocateWithoutInitializing(
std::string("duplicated_") + original_view.label()),
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
reset();
}
template <typename ... Dims>
ScatterView(std::string const& name, Dims ... dims) {
original_view_type original_view;
size_t arg_N[8] = {
original_view.rank>0?original_view.static_extent(0):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>1?original_view.static_extent(1):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>2?original_view.static_extent(2):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>3?original_view.static_extent(3):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>4?original_view.static_extent(4):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>5?original_view.static_extent(5):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
original_view.rank>6?original_view.static_extent(6):KOKKOS_IMPL_CTOR_DEFAULT_ARG,
KOKKOS_IMPL_CTOR_DEFAULT_ARG
};
Kokkos::Impl::Experimental::args_to_array(arg_N,0,dims ...);
arg_N[internal_view_type::rank - 1] = unique_token.size();
internal_view = internal_view_type(Kokkos::ViewAllocateWithoutInitializing(name),
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
reset();
}
template <int override_contribution = contribution>
KOKKOS_FORCEINLINE_FUNCTION
ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, override_contribution>
access() const {
return ScatterAccess<DataType, Op, ExecSpace, Kokkos::LayoutLeft, ScatterDuplicated, contribution, override_contribution>{*this};
}
typename Kokkos::Impl::Experimental::Slice<
Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::value_type
subview() const
{
return Kokkos::Impl::Experimental::Slice<
Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::get(internal_view, 0);
}
template <typename ... RP>
void contribute_into(View<RP...> const& dest) const
{
typedef View<RP...> dest_type;
static_assert(std::is_same<
typename dest_type::value_type,
typename original_view_type::non_const_value_type>::value,
"ScatterView deep_copy destination has wrong value_type");
static_assert(std::is_same<
typename dest_type::array_layout,
Kokkos::LayoutLeft>::value,
"ScatterView deep_copy destination has different layout");
static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
typename ExecSpace::memory_space,
typename dest_type::memory_space>::value,
"ScatterView deep_copy destination memory space not accessible");
auto extent = internal_view.extent(
internal_view_type::rank - 1);
bool is_equal = (dest.data() == internal_view.data());
size_t start = is_equal ? 1 : 0;
Kokkos::Impl::Experimental::ReduceDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
dest.data(),
internal_view.stride(internal_view_type::rank - 1),
start,
extent,
internal_view.label());
}
void reset() {
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data(),
internal_view.size(),
internal_view.label());
}
template <typename DT, typename ... RP>
void reset_except(View<DT, RP...> const& view) {
if (view.data() != internal_view.data()) {
reset();
return;
}
Kokkos::Impl::Experimental::ResetDuplicates<ExecSpace, original_value_type, Op>(
internal_view.data() + view.size(),
internal_view.size() - view.size(),
internal_view.label());
}
void resize(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0) {
size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0};
const int i = internal_view.rank-1;
arg_N[i] = unique_token.size();
::Kokkos::resize(internal_view,
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
}
void realloc(const size_t n0 = 0,
const size_t n1 = 0,
const size_t n2 = 0,
const size_t n3 = 0,
const size_t n4 = 0,
const size_t n5 = 0,
const size_t n6 = 0) {
size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0};
const int i = internal_view.rank-1;
arg_N[i] = unique_token.size();
::Kokkos::realloc(internal_view,
arg_N[0], arg_N[1], arg_N[2], arg_N[3],
arg_N[4], arg_N[5], arg_N[6], arg_N[7]);
}
protected:
template <typename ... Args>
inline original_reference_type at(int thread_id, Args ... args) const {
return internal_view(args..., thread_id);
}
protected:
typedef Kokkos::Experimental::UniqueToken<
ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type;
unique_token_type unique_token;
internal_view_type internal_view;
};
/* This object has to be separate in order to store the thread ID, which cannot
be obtained until one is inside a parallel construct, and may be relatively
expensive to obtain at every contribution
(calls a non-inlined function, looks up a thread-local variable).
Due to the expense, it is sensible to query it at most once per parallel iterate
(ideally once per thread, but parallel_for doesn't expose that)
and then store it in a stack variable.
ScatterAccess serves as a non-const object on the stack which can store the thread ID */
template <typename DataType
,int Op
,typename ExecSpace
,typename Layout
,int contribution
,int override_contribution
>
class ScatterAccess<DataType
,Op
,ExecSpace
,Layout
,ScatterDuplicated
,contribution
,override_contribution>
{
public:
typedef ScatterView<DataType, Layout, ExecSpace, Op, ScatterDuplicated, contribution> view_type;
typedef typename view_type::original_value_type original_value_type;
typedef Kokkos::Impl::Experimental::ScatterValue<
original_value_type, Op, override_contribution> value_type;
KOKKOS_FORCEINLINE_FUNCTION
ScatterAccess(view_type const& view_in)
: view(view_in)
, thread_id(view_in.unique_token.acquire()) {
}
KOKKOS_FORCEINLINE_FUNCTION
~ScatterAccess() {
if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id);
}
template <typename ... Args>
KOKKOS_FORCEINLINE_FUNCTION
value_type operator()(Args ... args) const {
return view.at(thread_id, args...);
}
template <typename Arg>
KOKKOS_FORCEINLINE_FUNCTION
typename std::enable_if<view_type::original_view_type::rank == 1 &&
std::is_integral<Arg>::value, value_type>::type
operator[](Arg arg) const {
return view.at(thread_id, arg);
}
private:
view_type const& view;
// simplify RAII by disallowing copies
ScatterAccess(ScatterAccess const& other) = delete;
ScatterAccess& operator=(ScatterAccess const& other) = delete;
ScatterAccess& operator=(ScatterAccess&& other) = delete;
public:
// do need to allow moves though, for the common
// auto b = a.access();
// that assignments turns into a move constructor call
KOKKOS_FORCEINLINE_FUNCTION
ScatterAccess(ScatterAccess&& other)
: view(other.view)
, thread_id(other.thread_id)
{
other.thread_id = ~thread_id_type(0);
}
private:
typedef typename view_type::unique_token_type unique_token_type;
typedef typename unique_token_type::size_type thread_id_type;
thread_id_type thread_id;
};
template <int Op = Kokkos::Experimental::ScatterSum,
int duplication = -1,
int contribution = -1,
typename RT, typename ... RP>
ScatterView
< RT
, typename ViewTraits<RT, RP...>::array_layout
, typename ViewTraits<RT, RP...>::execution_space
, Op
/* just setting defaults if not specified... things got messy because the view type
does not come before the duplication/contribution settings in the
template parameter list */
, duplication == -1 ? Kokkos::Impl::Experimental::DefaultDuplication<typename ViewTraits<RT, RP...>::execution_space>::value : duplication
, contribution == -1 ?
Kokkos::Impl::Experimental::DefaultContribution<
typename ViewTraits<RT, RP...>::execution_space,
(duplication == -1 ?
Kokkos::Impl::Experimental::DefaultDuplication<
typename ViewTraits<RT, RP...>::execution_space
>::value
: duplication
)
>::value
: contribution
>
create_scatter_view(View<RT, RP...> const& original_view) {
return original_view; // implicit ScatterView constructor call
}
}} // namespace Kokkos::Experimental
namespace Kokkos {
namespace Experimental {
template <typename DT1, typename DT2, typename LY, typename ES, int OP, int CT, int DP, typename ... VP>
void
contribute(View<DT1, VP...>& dest, Kokkos::Experimental::ScatterView<DT2, LY, ES, OP, CT, DP> const& src)
{
src.contribute_into(dest);
}
}} // namespace Kokkos::Experimental
namespace Kokkos {
template <typename DT, typename LY, typename ES, int OP, int CT, int DP, typename ... IS>
void
realloc(Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view, IS ... is)
{
scatter_view.realloc(is ...);
}
template <typename DT, typename LY, typename ES, int OP, int CT, int DP, typename ... IS>
void
resize(Kokkos::Experimental::ScatterView<DT, LY, ES, OP, CT, DP>& scatter_view, IS ... is)
{
scatter_view.resize(is ...);
}
} // namespace Kokkos
#endif