/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) // // ************************************************************************ //@HEADER */ /// \file Kokkos_Parallel.hpp /// \brief Declaration of parallel operators #ifndef KOKKOS_PARALLEL_HPP #define KOKKOS_PARALLEL_HPP #include #include #include #include #ifdef KOKKOSP_ENABLE_PROFILING #include #include #endif #include #include #include #include #ifdef KOKKOS_HAVE_DEBUG #include #endif //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { //---------------------------------------------------------------------------- /** \brief Given a Functor and Execution Policy query an execution space. * * if the Policy has an execution space use that * else if the Functor has an execution_space use that * else if the Functor has a device_type use that for backward compatibility * else use the default */ template< class Functor , class Policy , class EnableFunctor = void , class EnablePolicy = void > struct FunctorPolicyExecutionSpace { typedef Kokkos::DefaultExecutionSpace execution_space ; }; template< class Functor , class Policy > struct FunctorPolicyExecutionSpace < Functor , Policy , typename enable_if_type< typename Functor::device_type >::type , typename enable_if_type< typename Policy ::execution_space >::type > { typedef typename Policy ::execution_space execution_space ; }; template< class Functor , class Policy > struct FunctorPolicyExecutionSpace < Functor , Policy , typename enable_if_type< typename Functor::execution_space >::type , typename enable_if_type< typename Policy ::execution_space >::type > { typedef typename Policy ::execution_space execution_space ; }; template< class Functor , class Policy , class EnableFunctor > struct FunctorPolicyExecutionSpace < Functor , Policy , EnableFunctor , typename enable_if_type< typename Policy::execution_space >::type > { typedef typename Policy ::execution_space execution_space ; }; template< class Functor , class Policy , class EnablePolicy > struct FunctorPolicyExecutionSpace < Functor , Policy , typename enable_if_type< typename Functor::device_type >::type , EnablePolicy > { typedef typename Functor::device_type execution_space ; }; template< class Functor , class Policy , class EnablePolicy > struct FunctorPolicyExecutionSpace < Functor , Policy , typename enable_if_type< typename Functor::execution_space >::type , EnablePolicy > { typedef typename Functor::execution_space execution_space ; }; //---------------------------------------------------------------------------- /// \class ParallelFor /// \brief Implementation of the ParallelFor operator that has a /// partial specialization for the device. /// /// This is an implementation detail of parallel_for. Users should /// skip this and go directly to the nonmember function parallel_for. template< class FunctorType , class ExecPolicy > class ParallelFor ; /// \class ParallelReduce /// \brief Implementation detail of parallel_reduce. /// /// This is an implementation detail of parallel_reduce. Users should /// skip this and go directly to the nonmember function parallel_reduce. template< class FunctorType , class ExecPolicy > class ParallelReduce ; /// \class ParallelScan /// \brief Implementation detail of parallel_scan. /// /// This is an implementation detail of parallel_scan. Users should /// skip this and go directly to the documentation of the nonmember /// template function Kokkos::parallel_scan. template< class FunctorType , class ExecPolicy > class ParallelScan ; } // namespace Impl } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { /** \brief Execute \c functor in parallel according to the execution \c policy. * * A "functor" is a class containing the function to execute in parallel, * data needed for that execution, and an optional \c execution_space * typedef. Here is an example functor for parallel_for: * * \code * class FunctorType { * public: * typedef ... execution_space ; * void operator() ( WorkType iwork ) const ; * }; * \endcode * * In the above example, \c WorkType is any integer type for which a * valid conversion from \c size_t to \c IntType exists. Its * operator() method defines the operation to parallelize, * over the range of integer indices iwork=[0,work_count-1]. * This compares to a single iteration \c iwork of a \c for loop. * If \c execution_space is not defined DefaultExecutionSpace will be used. */ template< class ExecPolicy , class FunctorType > inline void parallel_for( const ExecPolicy & policy , const FunctorType & functor , const std::string& str = "" , typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0 ) { #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelFor< FunctorType , ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelFor(kpID); } #endif } template< class FunctorType > inline void parallel_for( const size_t work_count , const FunctorType & functor , const std::string& str = "" ) { typedef typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space execution_space ; typedef RangePolicy< execution_space > policy ; #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelFor< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelFor(kpID); } #endif } template< class ExecPolicy , class FunctorType > inline void parallel_for( const std::string & str , const ExecPolicy & policy , const FunctorType & functor ) { #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl; #endif parallel_for(policy,functor,str); #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl; #endif (void) str; } //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- /** \brief Parallel reduction * * Example of a parallel_reduce functor for a POD (plain old data) value type: * \code * class FunctorType { // For POD value type * public: * typedef ... execution_space ; * typedef value_type ; * void operator()( iwork , & update ) const ; * void init( & update ) const ; * void join( volatile & update , * volatile const & input ) const ; * * typedef true_type has_final ; * void final( & update ) const ; * }; * \endcode * * Example of a parallel_reduce functor for an array of POD (plain old data) values: * \code * class FunctorType { // For array of POD value * public: * typedef ... execution_space ; * typedef value_type[] ; * void operator()( , update[] ) const ; * void init( update[] ) const ; * void join( volatile update[] , * volatile const input[] ) const ; * * typedef true_type has_final ; * void final( update[] ) const ; * }; * \endcode */ template< class ExecPolicy , class FunctorType > inline void parallel_reduce( const ExecPolicy & policy , const FunctorType & functor , const std::string& str = "" , typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0 ) { // typedef typename // Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space // execution_space ; typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ; typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0) , typename ValueTraits::value_type , typename ValueTraits::pointer_type >::type value_type ; Kokkos::View< value_type , HostSpace , Kokkos::MemoryUnmanaged > result_view ; #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelReduce< FunctorType , ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , result_view ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelReduce(kpID); } #endif } // integral range policy template< class FunctorType > inline void parallel_reduce( const size_t work_count , const FunctorType & functor , const std::string& str = "" ) { typedef typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space execution_space ; typedef RangePolicy< execution_space > policy ; typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0) , typename ValueTraits::value_type , typename ValueTraits::pointer_type >::type value_type ; Kokkos::View< value_type , HostSpace , Kokkos::MemoryUnmanaged > result_view ; #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelReduce< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) , result_view ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelReduce(kpID); } #endif } // general policy and view ouput template< class ExecPolicy , class FunctorType , class ViewType > inline void parallel_reduce( const ExecPolicy & policy , const FunctorType & functor , const ViewType & result_view , const std::string& str = "" , typename Impl::enable_if< ( Impl::is_view::value && ! Impl::is_integral< ExecPolicy >::value #ifdef KOKKOS_HAVE_CUDA && ! Impl::is_same::value #endif )>::type * = 0 ) { #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , Impl::CopyWithoutTracking::apply(result_view) ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelReduce(kpID); } #endif } // general policy and pod or array of pod output template< class ExecPolicy , class FunctorType > void parallel_reduce( const ExecPolicy & policy , const FunctorType & functor #ifdef KOKKOS_HAVE_CUDA , typename Impl::enable_if< ( ! Impl::is_integral< ExecPolicy >::value && ! Impl::is_same::value ) , typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type>::type result_ref , const std::string& str = "" , typename Impl::enable_if::value >::type* = 0 ) #else , typename Impl::enable_if< ( ! Impl::is_integral< ExecPolicy >::value) , typename Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag >::reference_type >::type result_ref , const std::string& str = "" ) #endif { typedef Kokkos::Impl::FunctorValueTraits< FunctorType , typename ExecPolicy::work_tag > ValueTraits ; typedef Kokkos::Impl::FunctorValueOps< FunctorType , typename ExecPolicy::work_tag > ValueOps ; // Wrap the result output request in a view to inform the implementation // of the type and memory space. typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0) , typename ValueTraits::value_type , typename ValueTraits::pointer_type >::type value_type ; Kokkos::View< value_type , HostSpace , Kokkos::MemoryUnmanaged > result_view( ValueOps::pointer( result_ref ) , ValueTraits::value_count( functor ) ); #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , policy , Impl::CopyWithoutTracking::apply(result_view) ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelReduce(kpID); } #endif } // integral range policy and view ouput template< class FunctorType , class ViewType > inline void parallel_reduce( const size_t work_count , const FunctorType & functor , const ViewType & result_view , const std::string& str = "" , typename Impl::enable_if<( Impl::is_view::value #ifdef KOKKOS_HAVE_CUDA && ! Impl::is_same< typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space, Kokkos::Cuda>::value #endif )>::type * = 0 ) { typedef typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space execution_space ; typedef RangePolicy< execution_space > ExecPolicy ; #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelReduce< FunctorType, ExecPolicy >( Impl::CopyWithoutTracking::apply(functor) , ExecPolicy(0,work_count) , Impl::CopyWithoutTracking::apply(result_view) ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelReduce(kpID); } #endif } // integral range policy and pod or array of pod output template< class FunctorType > inline void parallel_reduce( const size_t work_count , const FunctorType & functor , typename Kokkos::Impl::FunctorValueTraits< typename Impl::if_c::value || Impl::is_integral::value, void,FunctorType>::type , void >::reference_type result , const std::string& str = "" , typename Impl::enable_if< true #ifdef KOKKOS_HAVE_CUDA && ! Impl::is_same< typename Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space, Kokkos::Cuda>::value #endif >::type * = 0 ) { typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; typedef Kokkos::Impl::FunctorValueOps< FunctorType , void > ValueOps ; typedef typename Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space execution_space ; typedef Kokkos::RangePolicy< execution_space > policy ; // Wrap the result output request in a view to inform the implementation // of the type and memory space. typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0) , typename ValueTraits::value_type , typename ValueTraits::pointer_type >::type value_type ; Kokkos::View< value_type , HostSpace , Kokkos::MemoryUnmanaged > result_view( ValueOps::pointer( result ) , ValueTraits::value_count( functor ) ); #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelReduce("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelReduce< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) , Impl::CopyWithoutTracking::apply(result_view) ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelReduce(kpID); } #endif } template< class ExecPolicy , class FunctorType , class ResultType > inline void parallel_reduce( const std::string & str , const ExecPolicy & policy , const FunctorType & functor , ResultType * result) { #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl; #endif parallel_reduce(policy,functor,result,str); #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl; #endif (void) str; } template< class ExecPolicy , class FunctorType , class ResultType > inline void parallel_reduce( const std::string & str , const ExecPolicy & policy , const FunctorType & functor , ResultType & result) { #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl; #endif parallel_reduce(policy,functor,result,str); #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl; #endif (void) str; } template< class ExecPolicy , class FunctorType > inline void parallel_reduce( const std::string & str , const ExecPolicy & policy , const FunctorType & functor) { #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG Start parallel_reduce kernel: " << str << std::endl; #endif parallel_reduce(policy,functor,str); #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG End parallel_reduce kernel: " << str << std::endl; #endif (void) str; } } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { /// \fn parallel_scan /// \tparam ExecutionPolicy The execution policy type. /// \tparam FunctorType The scan functor type. /// /// \param policy [in] The execution policy. /// \param functor [in] The scan functor. /// /// This function implements a parallel scan pattern. The scan can /// be either inclusive or exclusive, depending on how you implement /// the scan functor. /// /// A scan functor looks almost exactly like a reduce functor, except /// that its operator() takes a third \c bool argument, \c final_pass, /// which indicates whether this is the last pass of the scan /// operation. We will show below how to use the \c final_pass /// argument to control whether the scan is inclusive or exclusive. /// /// Here is the minimum required interface of a scan functor for a POD /// (plain old data) value type \c PodType. That is, the result is a /// View of zero or more PodType. It is also possible for the result /// to be an array of (same-sized) arrays of PodType, but we do not /// show the required interface for that here. /// \code /// template< class ExecPolicy , class FunctorType > /// class ScanFunctor { /// public: /// // The Kokkos device type /// typedef ... execution_space; /// // Type of an entry of the array containing the result; /// // also the type of each of the entries combined using /// // operator() or join(). /// typedef PodType value_type; /// /// void operator () (const ExecPolicy::member_type & i, value_type& update, const bool final_pass) const; /// void init (value_type& update) const; /// void join (volatile value_type& update, volatile const value_type& input) const /// }; /// \endcode /// /// Here is an example of a functor which computes an inclusive plus-scan /// of an array of \c int, in place. If given an array [1, 2, 3, 4], this /// scan will overwrite that array with [1, 3, 6, 10]. /// /// \code /// template /// class InclScanFunctor { /// public: /// typedef SpaceType execution_space; /// typedef int value_type; /// typedef typename SpaceType::size_type size_type; /// /// InclScanFunctor( Kokkos::View x /// , Kokkos::View y ) : m_x(x), m_y(y) {} /// /// void operator () (const size_type i, value_type& update, const bool final_pass) const { /// update += m_x(i); /// if (final_pass) { /// m_y(i) = update; /// } /// } /// void init (value_type& update) const { /// update = 0; /// } /// void join (volatile value_type& update, volatile const value_type& input) const { /// update += input; /// } /// /// private: /// Kokkos::View m_x; /// Kokkos::View m_y; /// }; /// \endcode /// /// Here is an example of a functor which computes an exclusive /// scan of an array of \c int, in place. In operator(), note both /// that the final_pass test and the update have switched places, and /// the use of a temporary. If given an array [1, 2, 3, 4], this scan /// will overwrite that array with [0, 1, 3, 6]. /// /// \code /// template /// class ExclScanFunctor { /// public: /// typedef SpaceType execution_space; /// typedef int value_type; /// typedef typename SpaceType::size_type size_type; /// /// ExclScanFunctor (Kokkos::View x) : x_ (x) {} /// /// void operator () (const size_type i, value_type& update, const bool final_pass) const { /// const value_type x_i = x_(i); /// if (final_pass) { /// x_(i) = update; /// } /// update += x_i; /// } /// void init (value_type& update) const { /// update = 0; /// } /// void join (volatile value_type& update, volatile const value_type& input) const { /// update += input; /// } /// /// private: /// Kokkos::View x_; /// }; /// \endcode /// /// Here is an example of a functor which builds on the above /// exclusive scan example, to compute an offsets array from a /// population count array, in place. We assume that the pop count /// array has an extra entry at the end to store the final count. If /// given an array [1, 2, 3, 4, 0], this scan will overwrite that /// array with [0, 1, 3, 6, 10]. /// /// \code /// template /// class OffsetScanFunctor { /// public: /// typedef SpaceType execution_space; /// typedef int value_type; /// typedef typename SpaceType::size_type size_type; /// /// // lastIndex_ is the last valid index (zero-based) of x. /// // If x has length zero, then lastIndex_ won't be used anyway. /// OffsetScanFunctor( Kokkos::View x /// , Kokkos::View y ) /// : m_x(x), m_y(y), last_index_ (x.dimension_0 () == 0 ? 0 : x.dimension_0 () - 1) /// {} /// /// void operator () (const size_type i, int& update, const bool final_pass) const { /// if (final_pass) { /// m_y(i) = update; /// } /// update += m_x(i); /// // The last entry of m_y gets the final sum. /// if (final_pass && i == last_index_) { /// m_y(i+1) = update; /// } /// } /// void init (value_type& update) const { /// update = 0; /// } /// void join (volatile value_type& update, volatile const value_type& input) const { /// update += input; /// } /// /// private: /// Kokkos::View m_x; /// Kokkos::View m_y; /// const size_type last_index_; /// }; /// \endcode /// template< class ExecutionPolicy , class FunctorType > inline void parallel_scan( const ExecutionPolicy & policy , const FunctorType & functor , const std::string& str = "" , typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0 ) { #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif Impl::ParallelScan< FunctorType , ExecutionPolicy > scan( Impl::CopyWithoutTracking::apply(functor) , policy ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelScan(kpID); } #endif } template< class FunctorType > inline void parallel_scan( const size_t work_count , const FunctorType & functor , const std::string& str = "" ) { typedef typename Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space execution_space ; typedef Kokkos::RangePolicy< execution_space > policy ; #ifdef KOKKOSP_ENABLE_PROFILING uint64_t kpID = 0; if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif (void) Impl::ParallelScan< FunctorType , policy >( Impl::CopyWithoutTracking::apply(functor) , policy(0,work_count) ); #ifdef KOKKOSP_ENABLE_PROFILING if(Kokkos::Experimental::profileLibraryLoaded()) { Kokkos::Experimental::endParallelScan(kpID); } #endif } template< class ExecutionPolicy , class FunctorType > inline void parallel_scan( const std::string& str , const ExecutionPolicy & policy , const FunctorType & functor) { #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl; #endif parallel_scan(policy,functor,str); #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES Kokkos::fence(); std::cout << "KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl; #endif (void) str; } } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { template< class FunctorType , class Enable = void > struct FunctorTeamShmemSize { static inline size_t value( const FunctorType & , int ) { return 0 ; } }; template< class FunctorType > struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type > { static inline size_t value( const FunctorType & f , int team_size ) { return f.team_shmem_size( team_size ) ; } }; template< class FunctorType > struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type > { static inline size_t value( const FunctorType & f , int team_size ) { return f.shmem_size( team_size ) ; } }; } // namespace Impl } // namespace Kokkos //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- #endif /* KOKKOS_PARALLEL_HPP */