/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ /// \file Kokkos_ScatterView.hpp /// \brief Declaration and definition of Kokkos::ScatterView. /// /// This header file declares and defines Kokkos::ScatterView and its /// related nonmember functions. #ifndef KOKKOS_SCATTER_VIEW_HPP #define KOKKOS_SCATTER_VIEW_HPP #include #include namespace Kokkos { namespace Experimental { /* * Reduction Type list * - These corresponds to subset of the reducers in parallel_reduce * - See Implementations of ScatterValue for details. */ enum : int { ScatterSum, ScatterProd, ScatterMax, ScatterMin, }; enum : int { ScatterNonDuplicated = 0, ScatterDuplicated = 1 }; enum : int { ScatterNonAtomic = 0, ScatterAtomic = 1 }; }} // Kokkos::Experimental namespace Kokkos { namespace Impl { namespace Experimental { template struct DefaultDuplication; template struct DefaultContribution; #ifdef KOKKOS_ENABLE_SERIAL template <> struct DefaultDuplication { enum : int { value = Kokkos::Experimental::ScatterNonDuplicated }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; }; #endif #ifdef KOKKOS_ENABLE_OPENMP template <> struct DefaultDuplication { enum : int { value = Kokkos::Experimental::ScatterDuplicated }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterAtomic }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; }; #endif #ifdef KOKKOS_ENABLE_HPX template <> struct DefaultDuplication { enum : int { value = Kokkos::Experimental::ScatterDuplicated }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterAtomic }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; }; #endif #ifdef KOKKOS_ENABLE_THREADS template <> struct DefaultDuplication { enum : int { value = Kokkos::Experimental::ScatterDuplicated }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterAtomic }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterNonAtomic }; }; #endif #ifdef KOKKOS_ENABLE_CUDA template <> struct DefaultDuplication { enum : int { value = Kokkos::Experimental::ScatterNonDuplicated }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterAtomic }; }; template <> struct DefaultContribution { enum : int { value = Kokkos::Experimental::ScatterAtomic }; }; #endif /* ScatterValue is the object returned by the access operator() of ScatterAccess, This class inherits from the Sum<> reducer and it wraps join(dest, src) with convenient operator+=, etc. Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions See ReduceDuplicates and ResetDuplicates ) */ template struct ScatterValue; template struct ScatterValue : Sum { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : Sum(value_in) {} KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : Sum(other.reference()) {} KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) { this->join( this->reference(), -rhs ); } KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init( this->reference() ); } }; /* ScatterValue is the object returned by the access operator() * of ScatterAccess, similar to that returned by an Atomic View, it wraps Kokkos::atomic_add with convenient operator+=, etc. This version also has the update(rhs) and reset() functions. */ template struct ScatterValue : Sum { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : Sum(value_in) {} KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { this->join(this->reference(), rhs); } KOKKOS_FORCEINLINE_FUNCTION void operator-=(ValueType const& rhs) { this->join(this->reference(), -rhs); } KOKKOS_INLINE_FUNCTION void join(ValueType& dest, const ValueType& src) const { Kokkos::atomic_add(&dest, src); } KOKKOS_INLINE_FUNCTION void join(volatile ValueType& dest, const volatile ValueType& src) const { Kokkos::atomic_add(&dest, src); } KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init( this->reference() ); } }; /* ScatterValue is the object returned by the access operator() of ScatterAccess, This class inherits from the Prod<> reducer and it wraps join(dest, src) with convenient operator*=, etc. Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions See ReduceDuplicates and ResetDuplicates ) */ template struct ScatterValue : Prod { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : Prod(value_in) {} KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : Prod(other.reference()) {} KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) { this->join( this->reference(), static_cast(1)/rhs ); } KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init( this->reference() ); } }; /* ScatterValue is the object returned by the access operator() * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_prod with convenient operator*=, etc. atomic_prod uses the atomic_compare_exchange. This version also has the update(rhs) and reset() functions. */ template struct ScatterValue : Prod { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : Prod(value_in) {} KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { this->join(this->reference(), rhs); } KOKKOS_FORCEINLINE_FUNCTION void operator/=(ValueType const& rhs) { this->join(this->reference(), static_cast(1)/rhs); } KOKKOS_FORCEINLINE_FUNCTION void atomic_prod(ValueType & dest, const ValueType& src) const { bool success = false; while(!success) { ValueType dest_old = dest; ValueType dest_new = dest_old * src; dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); } } KOKKOS_INLINE_FUNCTION void join(ValueType& dest, const ValueType& src) const { atomic_prod(dest, src); } KOKKOS_INLINE_FUNCTION void join(volatile ValueType& dest, const volatile ValueType& src) const { atomic_prod(dest, src); } KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init( this->reference() ); } }; /* ScatterValue is the object returned by the access operator() of ScatterAccess, This class inherits from the Min<> reducer and it wraps join(dest, src) with convenient update(rhs). Note the addition of update(ValueType const& rhs) and reset() are so that all reducers can have a common update function See ReduceDuplicates and ResetDuplicates ) */ template struct ScatterValue : Min { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : Min(value_in) {} KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : Min(other.reference()) {} KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init( this->reference() ); } }; /* ScatterValue is the object returned by the access operator() * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_min with the update(rhs) function. atomic_min uses the atomic_compare_exchange. This version also has the reset() function */ template struct ScatterValue : Min { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : Min(value_in) {} KOKKOS_FORCEINLINE_FUNCTION void atomic_min(ValueType & dest, const ValueType& src) const { bool success = false; while(!success) { ValueType dest_old = dest; ValueType dest_new = ( dest_old > src ) ? src : dest_old; dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); } } KOKKOS_INLINE_FUNCTION void join(ValueType& dest, const ValueType& src) const { atomic_min(dest, src); } KOKKOS_INLINE_FUNCTION void join(volatile ValueType& dest, const volatile ValueType& src) const { atomic_min(dest, src); } KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init( this->reference() ); } }; /* ScatterValue is the object returned by the access operataor() of ScatterAccess, This class inherits from the Max<> reducer and it wraps join(dest, src) with convenient update(rhs). Note the addition of update(ValueType const& rhs) and reset() are so that all reducers can have a common update function See ReduceDuplicates and ResetDuplicates ) */ template struct ScatterValue : Max { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : Max(value_in) {} KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) : Max(other.reference()) {} KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init( this->reference() ); } }; /* ScatterValue is the object returned by the access operator() * of ScatterAccess, similar to that returned by an Atomic View, it wraps and atomic_max with the update(rhs) function. atomic_max uses the atomic_compare_exchange. This version also has the reset() function */ template struct ScatterValue : Max { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) : Max(value_in) {} KOKKOS_FORCEINLINE_FUNCTION void atomic_max(ValueType & dest, const ValueType& src) const { bool success = false; while(!success) { ValueType dest_old = dest; ValueType dest_new = ( dest_old < src ) ? src : dest_old; dest_new = Kokkos::atomic_compare_exchange(&dest,dest_old,dest_new); success = ( (dest_new - dest_old)/dest_old <= 1e-15 ); } } KOKKOS_INLINE_FUNCTION void join(ValueType& dest, const ValueType& src) const { atomic_max(dest, src); } KOKKOS_INLINE_FUNCTION void join(volatile ValueType& dest, const volatile ValueType& src) const { atomic_max(dest, src); } KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join( this->reference(), rhs ); } KOKKOS_FORCEINLINE_FUNCTION void reset() { this->init( this->reference() ); } }; /* DuplicatedDataType, given a View DataType, will create a new DataType that has a new runtime dimension which becomes the largest-stride dimension. In the case of LayoutLeft, due to the limitation induced by the design of DataType itself, it must convert any existing compile-time dimensions into runtime dimensions. */ template struct DuplicatedDataType; template struct DuplicatedDataType { typedef T* value_type; // For LayoutRight, add a star all the way on the left }; template struct DuplicatedDataType { typedef typename DuplicatedDataType::value_type value_type[N]; }; template struct DuplicatedDataType { typedef typename DuplicatedDataType::value_type value_type[]; }; template struct DuplicatedDataType { typedef typename DuplicatedDataType::value_type* value_type; }; template struct DuplicatedDataType { typedef T* value_type; }; template struct DuplicatedDataType { typedef typename DuplicatedDataType::value_type* value_type; }; template struct DuplicatedDataType { typedef typename DuplicatedDataType::value_type* value_type; }; template struct DuplicatedDataType { typedef typename DuplicatedDataType::value_type* value_type; }; /* Insert integer argument pack into array */ template void args_to_array(size_t* array, int pos, T dim0) { array[pos] = dim0; } template void args_to_array(size_t* array, int pos, T dim0, Dims ... dims) { array[pos] = dim0; args_to_array(array,pos+1,dims...); } /* Slice is just responsible for stuffing the correct number of Kokkos::ALL arguments on the correct side of the index in a call to subview() to get a subview where the index specified is the largest-stride one. */ template struct Slice { typedef Slice next; typedef typename next::value_type value_type; static value_type get(V const& src, const size_t i, Args ... args) { return next::get(src, i, Kokkos::ALL, args...); } }; template struct Slice { typedef typename Kokkos::Impl::ViewMapping < void , V , const size_t , Args ... >::type value_type; static value_type get(V const& src, const size_t i, Args ... args) { return Kokkos::subview(src, i, args...); } }; template struct Slice { typedef typename Kokkos::Impl::ViewMapping < void , V , Args ... , const size_t >::type value_type; static value_type get(V const& src, const size_t i, Args ... args) { return Kokkos::subview(src, args..., i); } }; template struct ReduceDuplicates; template struct ReduceDuplicatesBase { typedef ReduceDuplicates Derived; ValueType const* src; ValueType* dst; size_t stride; size_t start; size_t n; ReduceDuplicatesBase(ValueType const* src_in, ValueType* dest_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name) : src(src_in) , dst(dest_in) , stride(stride_in) , start(start_in) , n(n_in) { #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID); } #endif typedef RangePolicy policy_type; typedef Kokkos::Impl::ParallelFor closure_type; const closure_type closure(*(static_cast(this)), policy_type(0, stride)); closure.execute(); #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } #endif } }; /* ReduceDuplicates -- Perform reduction on destination array using strided source * Use ScatterValue<> specific to operation to wrap destination array so that * the reduction operation can be accessed via the update(rhs) function */ template struct ReduceDuplicates : public ReduceDuplicatesBase { typedef ReduceDuplicatesBase Base; ReduceDuplicates(ValueType const* src_in, ValueType* dst_in, size_t stride_in, size_t start_in, size_t n_in, std::string const& name): Base(src_in, dst_in, stride_in, start_in, n_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { for (size_t j = Base::start; j < Base::n; ++j) { ScatterValue sv(Base::dst[i]); sv.update( Base::src[i + Base::stride * j] ); } } }; template struct ResetDuplicates; template struct ResetDuplicatesBase { typedef ResetDuplicates Derived; ValueType* data; ResetDuplicatesBase(ValueType* data_in, size_t size_in, std::string const& name) : data(data_in) { #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID); } #endif typedef RangePolicy policy_type; typedef Kokkos::Impl::ParallelFor closure_type; const closure_type closure(*(static_cast(this)), policy_type(0, size_in)); closure.execute(); #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } #endif } }; /* ResetDuplicates -- Perform reset on destination array * Use ScatterValue<> specific to operation to wrap destination array so that * the reset operation can be accessed via the reset() function */ template struct ResetDuplicates : public ResetDuplicatesBase { typedef ResetDuplicatesBase Base; ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name): Base(data_in, size_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { ScatterValue sv(Base::data[i]); sv.reset(); } }; }}} // Kokkos::Impl::Experimental namespace Kokkos { namespace Experimental { template ::value ,int contribution = Kokkos::Impl::Experimental::DefaultContribution::value > class ScatterView; template class ScatterAccess; // non-duplicated implementation template class ScatterView { public: typedef Kokkos::View original_view_type; typedef typename original_view_type::value_type original_value_type; typedef typename original_view_type::reference_type original_reference_type; friend class ScatterAccess; friend class ScatterAccess; ScatterView() { } template ScatterView(View const& original_view) : internal_view(original_view) { } template ScatterView(std::string const& name, Dims ... dims) : internal_view(name, dims ...) { } template KOKKOS_FORCEINLINE_FUNCTION ScatterAccess access() const { return ScatterAccess{*this}; } original_view_type subview() const { return internal_view; } template void contribute_into(View const& dest) const { typedef View dest_type; static_assert(std::is_same< typename dest_type::array_layout, Layout>::value, "ScatterView contribute destination has different layout"); static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename ExecSpace::memory_space, typename dest_type::memory_space>::value, "ScatterView contribute destination memory space not accessible"); if (dest.data() == internal_view.data()) return; Kokkos::Impl::Experimental::ReduceDuplicates( internal_view.data(), dest.data(), 0, 0, 1, internal_view.label()); } void reset() { Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data(), internal_view.size(), internal_view.label()); } template void reset_except(View const& view) { if (view.data() != internal_view.data()) reset(); } void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0, const size_t n6 = 0, const size_t n7 = 0) { ::Kokkos::resize(internal_view,n0,n1,n2,n3,n4,n5,n6,n7); } void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0, const size_t n6 = 0, const size_t n7 = 0) { ::Kokkos::realloc(internal_view,n0,n1,n2,n3,n4,n5,n6,n7); } protected: template KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(Args ... args) const { return internal_view(args...); } private: typedef original_view_type internal_view_type; internal_view_type internal_view; }; template class ScatterAccess { public: typedef ScatterView view_type; typedef typename view_type::original_value_type original_value_type; typedef Kokkos::Impl::Experimental::ScatterValue< original_value_type, Op, override_contribution> value_type; KOKKOS_INLINE_FUNCTION ScatterAccess() : view(view_type()) { } KOKKOS_INLINE_FUNCTION ScatterAccess(view_type const& view_in) : view(view_in) { } KOKKOS_INLINE_FUNCTION ~ScatterAccess() { } template KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args ... args) const { return view.at(args...); } template KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if::value, value_type>::type operator[](Arg arg) const { return view.at(arg); } private: view_type const& view; }; // duplicated implementation // LayoutLeft and LayoutRight are different enough that we'll just specialize each template class ScatterView { public: typedef Kokkos::View original_view_type; typedef typename original_view_type::value_type original_value_type; typedef typename original_view_type::reference_type original_reference_type; friend class ScatterAccess; friend class ScatterAccess; typedef typename Kokkos::Impl::Experimental::DuplicatedDataType data_type_info; typedef typename data_type_info::value_type internal_data_type; typedef Kokkos::View internal_view_type; ScatterView() { } template ScatterView(View const& original_view) : unique_token() , internal_view(Kokkos::ViewAllocateWithoutInitializing( std::string("duplicated_") + original_view.label()), unique_token.size(), #ifdef KOKKOS_ENABLE_DEPRECATED_CODE original_view.extent(0), original_view.extent(1), original_view.extent(2), original_view.extent(3), original_view.extent(4), original_view.extent(5), original_view.extent(6) ) #else original_view.rank_dynamic > 0 ? original_view.extent(0): KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank_dynamic > 1 ? original_view.extent(1): KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank_dynamic > 2 ? original_view.extent(2): KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank_dynamic > 3 ? original_view.extent(3): KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank_dynamic > 4 ? original_view.extent(4): KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank_dynamic > 5 ? original_view.extent(5): KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank_dynamic > 6 ? original_view.extent(6): KOKKOS_IMPL_CTOR_DEFAULT_ARG) #endif { reset(); } template ScatterView(std::string const& name, Dims ... dims) : internal_view(Kokkos::ViewAllocateWithoutInitializing(name), unique_token.size(), dims ...) { reset(); } template KOKKOS_FORCEINLINE_FUNCTION ScatterAccess access() const { return ScatterAccess{*this}; } typename Kokkos::Impl::Experimental::Slice< Kokkos::LayoutRight, internal_view_type::rank, internal_view_type>::value_type subview() const { return Kokkos::Impl::Experimental::Slice< Kokkos::LayoutRight, internal_view_type::Rank, internal_view_type>::get(internal_view, 0); } template void contribute_into(View const& dest) const { typedef View dest_type; static_assert(std::is_same< typename dest_type::array_layout, Kokkos::LayoutRight>::value, "ScatterView deep_copy destination has different layout"); static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename ExecSpace::memory_space, typename dest_type::memory_space>::value, "ScatterView deep_copy destination memory space not accessible"); bool is_equal = (dest.data() == internal_view.data()); size_t start = is_equal ? 1 : 0; Kokkos::Impl::Experimental::ReduceDuplicates( internal_view.data(), dest.data(), internal_view.stride(0), start, internal_view.extent(0), internal_view.label()); } void reset() { Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data(), internal_view.size(), internal_view.label()); } template void reset_except(View const& view) { if (view.data() != internal_view.data()) { reset(); return; } Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data() + view.size(), internal_view.size() - view.size(), internal_view.label()); } void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0, const size_t n6 = 0) { ::Kokkos::resize(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6); } void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0, const size_t n6 = 0) { ::Kokkos::realloc(internal_view,unique_token.size(),n0,n1,n2,n3,n4,n5,n6); } protected: template KOKKOS_FORCEINLINE_FUNCTION original_reference_type at(int rank, Args ... args) const { return internal_view(rank, args...); } protected: typedef Kokkos::Experimental::UniqueToken< ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; unique_token_type unique_token; internal_view_type internal_view; }; template class ScatterView { public: typedef Kokkos::View original_view_type; typedef typename original_view_type::value_type original_value_type; typedef typename original_view_type::reference_type original_reference_type; friend class ScatterAccess; friend class ScatterAccess; typedef typename Kokkos::Impl::Experimental::DuplicatedDataType data_type_info; typedef typename data_type_info::value_type internal_data_type; typedef Kokkos::View internal_view_type; ScatterView() { } template ScatterView(View const& original_view) : unique_token() { size_t arg_N[8] = { original_view.rank>0?original_view.extent(0):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>1?original_view.extent(1):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>2?original_view.extent(2):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>3?original_view.extent(3):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>4?original_view.extent(4):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>5?original_view.extent(5):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>6?original_view.extent(6):KOKKOS_IMPL_CTOR_DEFAULT_ARG, KOKKOS_IMPL_CTOR_DEFAULT_ARG }; arg_N[internal_view_type::rank - 1] = unique_token.size(); internal_view = internal_view_type( Kokkos::ViewAllocateWithoutInitializing( std::string("duplicated_") + original_view.label()), arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6], arg_N[7]); reset(); } template ScatterView(std::string const& name, Dims ... dims) { original_view_type original_view; size_t arg_N[8] = { original_view.rank>0?original_view.static_extent(0):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>1?original_view.static_extent(1):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>2?original_view.static_extent(2):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>3?original_view.static_extent(3):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>4?original_view.static_extent(4):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>5?original_view.static_extent(5):KOKKOS_IMPL_CTOR_DEFAULT_ARG, original_view.rank>6?original_view.static_extent(6):KOKKOS_IMPL_CTOR_DEFAULT_ARG, KOKKOS_IMPL_CTOR_DEFAULT_ARG }; Kokkos::Impl::Experimental::args_to_array(arg_N,0,dims ...); arg_N[internal_view_type::rank - 1] = unique_token.size(); internal_view = internal_view_type(Kokkos::ViewAllocateWithoutInitializing(name), arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6], arg_N[7]); reset(); } template KOKKOS_FORCEINLINE_FUNCTION ScatterAccess access() const { return ScatterAccess{*this}; } typename Kokkos::Impl::Experimental::Slice< Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::value_type subview() const { return Kokkos::Impl::Experimental::Slice< Kokkos::LayoutLeft, internal_view_type::rank, internal_view_type>::get(internal_view, 0); } template void contribute_into(View const& dest) const { typedef View dest_type; static_assert(std::is_same< typename dest_type::value_type, typename original_view_type::non_const_value_type>::value, "ScatterView deep_copy destination has wrong value_type"); static_assert(std::is_same< typename dest_type::array_layout, Kokkos::LayoutLeft>::value, "ScatterView deep_copy destination has different layout"); static_assert(Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< typename ExecSpace::memory_space, typename dest_type::memory_space>::value, "ScatterView deep_copy destination memory space not accessible"); auto extent = internal_view.extent( internal_view_type::rank - 1); bool is_equal = (dest.data() == internal_view.data()); size_t start = is_equal ? 1 : 0; Kokkos::Impl::Experimental::ReduceDuplicates( internal_view.data(), dest.data(), internal_view.stride(internal_view_type::rank - 1), start, extent, internal_view.label()); } void reset() { Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data(), internal_view.size(), internal_view.label()); } template void reset_except(View const& view) { if (view.data() != internal_view.data()) { reset(); return; } Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data() + view.size(), internal_view.size() - view.size(), internal_view.label()); } void resize(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0, const size_t n6 = 0) { size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0}; const int i = internal_view.rank-1; arg_N[i] = unique_token.size(); ::Kokkos::resize(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6], arg_N[7]); } void realloc(const size_t n0 = 0, const size_t n1 = 0, const size_t n2 = 0, const size_t n3 = 0, const size_t n4 = 0, const size_t n5 = 0, const size_t n6 = 0) { size_t arg_N[8] = {n0,n1,n2,n3,n4,n5,n6,0}; const int i = internal_view.rank-1; arg_N[i] = unique_token.size(); ::Kokkos::realloc(internal_view, arg_N[0], arg_N[1], arg_N[2], arg_N[3], arg_N[4], arg_N[5], arg_N[6], arg_N[7]); } protected: template inline original_reference_type at(int thread_id, Args ... args) const { return internal_view(args..., thread_id); } protected: typedef Kokkos::Experimental::UniqueToken< ExecSpace, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; unique_token_type unique_token; internal_view_type internal_view; }; /* This object has to be separate in order to store the thread ID, which cannot be obtained until one is inside a parallel construct, and may be relatively expensive to obtain at every contribution (calls a non-inlined function, looks up a thread-local variable). Due to the expense, it is sensible to query it at most once per parallel iterate (ideally once per thread, but parallel_for doesn't expose that) and then store it in a stack variable. ScatterAccess serves as a non-const object on the stack which can store the thread ID */ template class ScatterAccess { public: typedef ScatterView view_type; typedef typename view_type::original_value_type original_value_type; typedef Kokkos::Impl::Experimental::ScatterValue< original_value_type, Op, override_contribution> value_type; KOKKOS_FORCEINLINE_FUNCTION ScatterAccess(view_type const& view_in) : view(view_in) , thread_id(view_in.unique_token.acquire()) { } KOKKOS_FORCEINLINE_FUNCTION ~ScatterAccess() { if (thread_id != ~thread_id_type(0)) view.unique_token.release(thread_id); } template KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args ... args) const { return view.at(thread_id, args...); } template KOKKOS_FORCEINLINE_FUNCTION typename std::enable_if::value, value_type>::type operator[](Arg arg) const { return view.at(thread_id, arg); } private: view_type const& view; // simplify RAII by disallowing copies ScatterAccess(ScatterAccess const& other) = delete; ScatterAccess& operator=(ScatterAccess const& other) = delete; ScatterAccess& operator=(ScatterAccess&& other) = delete; public: // do need to allow moves though, for the common // auto b = a.access(); // that assignments turns into a move constructor call KOKKOS_FORCEINLINE_FUNCTION ScatterAccess(ScatterAccess&& other) : view(other.view) , thread_id(other.thread_id) { other.thread_id = ~thread_id_type(0); } private: typedef typename view_type::unique_token_type unique_token_type; typedef typename unique_token_type::size_type thread_id_type; thread_id_type thread_id; }; template ScatterView < RT , typename ViewTraits::array_layout , typename ViewTraits::execution_space , Op /* just setting defaults if not specified... things got messy because the view type does not come before the duplication/contribution settings in the template parameter list */ , duplication == -1 ? Kokkos::Impl::Experimental::DefaultDuplication::execution_space>::value : duplication , contribution == -1 ? Kokkos::Impl::Experimental::DefaultContribution< typename ViewTraits::execution_space, (duplication == -1 ? Kokkos::Impl::Experimental::DefaultDuplication< typename ViewTraits::execution_space >::value : duplication ) >::value : contribution > create_scatter_view(View const& original_view) { return original_view; // implicit ScatterView constructor call } }} // namespace Kokkos::Experimental namespace Kokkos { namespace Experimental { template void contribute(View& dest, Kokkos::Experimental::ScatterView const& src) { src.contribute_into(dest); } }} // namespace Kokkos::Experimental namespace Kokkos { template void realloc(Kokkos::Experimental::ScatterView& scatter_view, IS ... is) { scatter_view.realloc(is ...); } template void resize(Kokkos::Experimental::ScatterView& scatter_view, IS ... is) { scatter_view.resize(is ...); } } // namespace Kokkos #endif