/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) // // ************************************************************************ //@HEADER */ #include #include #include #include #include namespace Test { template< typename ScalarType, class DeviceType > class ReduceFunctor { public: typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; struct value_type { ScalarType value[3]; }; const size_type nwork; ReduceFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} ReduceFunctor( const ReduceFunctor & rhs ) : nwork( rhs.nwork ) {} /* KOKKOS_INLINE_FUNCTION void init( value_type & dst ) const { dst.value[0] = 0; dst.value[1] = 0; dst.value[2] = 0; } */ KOKKOS_INLINE_FUNCTION void join( volatile value_type & dst, const volatile value_type & src ) const { dst.value[0] += src.value[0]; dst.value[1] += src.value[1]; dst.value[2] += src.value[2]; } KOKKOS_INLINE_FUNCTION void operator()( size_type iwork, value_type & dst ) const { dst.value[0] += 1; dst.value[1] += iwork + 1; dst.value[2] += nwork - iwork; } }; template< class DeviceType > class ReduceFunctorFinal : public ReduceFunctor< long, DeviceType > { public: typedef typename ReduceFunctor< long, DeviceType >::value_type value_type; ReduceFunctorFinal( const size_t n ) : ReduceFunctor< long, DeviceType >( n ) {} KOKKOS_INLINE_FUNCTION void final( value_type & dst ) const { dst.value[0] = -dst.value[0]; dst.value[1] = -dst.value[1]; dst.value[2] = -dst.value[2]; } }; template< typename ScalarType, class DeviceType > class RuntimeReduceFunctor { public: // Required for functor: typedef DeviceType execution_space; typedef ScalarType value_type[]; const unsigned value_count; // Unit test details: typedef typename execution_space::size_type size_type; const size_type nwork; RuntimeReduceFunctor( const size_type arg_nwork, const size_type arg_count ) : value_count( arg_count ) , nwork( arg_nwork ) {} KOKKOS_INLINE_FUNCTION void init( ScalarType dst[] ) const { for ( unsigned i = 0; i < value_count; ++i ) dst[i] = 0; } KOKKOS_INLINE_FUNCTION void join( volatile ScalarType dst[], const volatile ScalarType src[] ) const { for ( unsigned i = 0; i < value_count; ++i ) dst[i] += src[i]; } KOKKOS_INLINE_FUNCTION void operator()( size_type iwork, ScalarType dst[] ) const { const size_type tmp[3] = { 1, iwork + 1, nwork - iwork }; for ( size_type i = 0; i < value_count; ++i ) { dst[i] += tmp[ i % 3 ]; } } }; template< typename ScalarType, class DeviceType > class RuntimeReduceMinMax { public: // Required for functor: typedef DeviceType execution_space; typedef ScalarType value_type[]; const unsigned value_count; // Unit test details: typedef typename execution_space::size_type size_type; const size_type nwork; const ScalarType amin; const ScalarType amax; RuntimeReduceMinMax( const size_type arg_nwork, const size_type arg_count ) : value_count( arg_count ) , nwork( arg_nwork ) , amin( std::numeric_limits< ScalarType >::min() ) , amax( std::numeric_limits< ScalarType >::max() ) {} KOKKOS_INLINE_FUNCTION void init( ScalarType dst[] ) const { for ( unsigned i = 0; i < value_count; ++i ) { dst[i] = i % 2 ? amax : amin; } } KOKKOS_INLINE_FUNCTION void join( volatile ScalarType dst[], const volatile ScalarType src[] ) const { for ( unsigned i = 0; i < value_count; ++i ) { dst[i] = i % 2 ? ( dst[i] < src[i] ? dst[i] : src[i] ) // min : ( dst[i] > src[i] ? dst[i] : src[i] ); // max } } KOKKOS_INLINE_FUNCTION void operator()( size_type iwork, ScalarType dst[] ) const { const ScalarType tmp[2] = { ScalarType( iwork + 1 ) , ScalarType( nwork - iwork ) }; for ( size_type i = 0; i < value_count; ++i ) { dst[i] = i % 2 ? ( dst[i] < tmp[i % 2] ? dst[i] : tmp[i % 2] ) : ( dst[i] > tmp[i % 2] ? dst[i] : tmp[i % 2] ); } } }; template< class DeviceType > class RuntimeReduceFunctorFinal : public RuntimeReduceFunctor< long, DeviceType > { public: typedef RuntimeReduceFunctor< long, DeviceType > base_type; typedef typename base_type::value_type value_type; typedef long scalar_type; RuntimeReduceFunctorFinal( const size_t theNwork, const size_t count ) : base_type( theNwork, count ) {} KOKKOS_INLINE_FUNCTION void final( value_type dst ) const { for ( unsigned i = 0; i < base_type::value_count; ++i ) { dst[i] = -dst[i]; } } }; } // namespace Test namespace { template< typename ScalarType, class DeviceType > class TestReduce { public: typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; TestReduce( const size_type & nwork ) { run_test( nwork ); run_test_final( nwork ); } void run_test( const size_type & nwork ) { typedef Test::ReduceFunctor< ScalarType, execution_space > functor_type; typedef typename functor_type::value_type value_type; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; const unsigned long nw = nwork; const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) : ( nw / 2 ) * ( nw + 1 ); for ( unsigned i = 0; i < Repeat; ++i ) { Kokkos::parallel_reduce( nwork, functor_type( nwork ), result[i] ); } for ( unsigned i = 0; i < Repeat; ++i ) { for ( unsigned j = 0; j < Count; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ( (ScalarType) correct, result[i].value[j] ); } } } void run_test_final( const size_type & nwork ) { typedef Test::ReduceFunctorFinal< execution_space > functor_type; typedef typename functor_type::value_type value_type; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; const unsigned long nw = nwork; const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) : ( nw / 2 ) * ( nw + 1 ); for ( unsigned i = 0; i < Repeat; ++i ) { if ( i % 2 == 0 ) { Kokkos::parallel_reduce( nwork, functor_type( nwork ), result[i] ); } else { Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork ), result[i] ); } } for ( unsigned i = 0; i < Repeat; ++i ) { for ( unsigned j = 0; j < Count; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ( (ScalarType) correct, -result[i].value[j] ); } } } }; template< typename ScalarType, class DeviceType > class TestReduceDynamic { public: typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; TestReduceDynamic( const size_type nwork ) { run_test_dynamic( nwork ); run_test_dynamic_minmax( nwork ); run_test_dynamic_final( nwork ); } void run_test_dynamic( const size_type nwork ) { typedef Test::RuntimeReduceFunctor< ScalarType, execution_space > functor_type; enum { Count = 3 }; enum { Repeat = 100 }; ScalarType result[ Repeat ][ Count ]; const unsigned long nw = nwork; const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) : ( nw / 2 ) * ( nw + 1 ); for ( unsigned i = 0; i < Repeat; ++i ) { if ( i % 2 == 0 ) { Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); } else { Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork, Count ), result[i] ); } } for ( unsigned i = 0; i < Repeat; ++i ) { for ( unsigned j = 0; j < Count; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ( (ScalarType) correct, result[i][j] ); } } } void run_test_dynamic_minmax( const size_type nwork ) { typedef Test::RuntimeReduceMinMax< ScalarType, execution_space > functor_type; enum { Count = 2 }; enum { Repeat = 100 }; ScalarType result[ Repeat ][ Count ]; for ( unsigned i = 0; i < Repeat; ++i ) { if ( i % 2 == 0 ) { Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); } else { Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork, Count ), result[i] ); } } for ( unsigned i = 0; i < Repeat; ++i ) { for ( unsigned j = 0; j < Count; ++j ) { if ( nwork == 0 ) { ScalarType amin( std::numeric_limits< ScalarType >::min() ); ScalarType amax( std::numeric_limits< ScalarType >::max() ); const ScalarType correct = ( j % 2 ) ? amax : amin; ASSERT_EQ( (ScalarType) correct, result[i][j] ); } else { const unsigned long correct = j % 2 ? 1 : nwork; ASSERT_EQ( (ScalarType) correct, result[i][j] ); } } } } void run_test_dynamic_final( const size_type nwork ) { typedef Test::RuntimeReduceFunctorFinal< execution_space > functor_type; enum { Count = 3 }; enum { Repeat = 100 }; typename functor_type::scalar_type result[ Repeat ][ Count ]; const unsigned long nw = nwork; const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) : ( nw / 2 ) * ( nw + 1 ); for ( unsigned i = 0; i < Repeat; ++i ) { if ( i % 2 == 0 ) { Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); } else { Kokkos::parallel_reduce( "TestKernelReduce", nwork, functor_type( nwork, Count ), result[i] ); } } for ( unsigned i = 0; i < Repeat; ++i ) { for ( unsigned j = 0; j < Count; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ( (ScalarType) correct, -result[i][j] ); } } } }; template< typename ScalarType, class DeviceType > class TestReduceDynamicView { public: typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; TestReduceDynamicView( const size_type nwork ) { run_test_dynamic_view( nwork ); } void run_test_dynamic_view( const size_type nwork ) { typedef Test::RuntimeReduceFunctor< ScalarType, execution_space > functor_type; typedef Kokkos::View< ScalarType*, DeviceType > result_type; typedef typename result_type::HostMirror result_host_type; const unsigned CountLimit = 23; const unsigned long nw = nwork; const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) : ( nw / 2 ) * ( nw + 1 ); for ( unsigned count = 0; count < CountLimit; ++count ) { result_type result( "result", count ); result_host_type host_result = Kokkos::create_mirror( result ); // Test result to host pointer: std::string str( "TestKernelReduce" ); if ( count % 2 == 0 ) { Kokkos::parallel_reduce( nw, functor_type( nw, count ), host_result.ptr_on_device() ); } else { Kokkos::parallel_reduce( str, nw, functor_type( nw, count ), host_result.ptr_on_device() ); } for ( unsigned j = 0; j < count; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ( host_result( j ), (ScalarType) correct ); host_result( j ) = 0; } } } }; } // namespace // Computes y^T*A*x // ( modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) template< typename ScalarType, class DeviceType > class TestTripleNestedReduce { public: typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; TestTripleNestedReduce( const size_type & nrows, const size_type & ncols , const size_type & team_size, const size_type & vector_length ) { run_test( nrows, ncols, team_size, vector_length ); } void run_test( const size_type & nrows, const size_type & ncols , const size_type & team_size, const size_type & vector_length ) { //typedef Kokkos::LayoutLeft Layout; typedef Kokkos::LayoutRight Layout; typedef Kokkos::View< ScalarType*, DeviceType > ViewVector; typedef Kokkos::View< ScalarType**, Layout, DeviceType > ViewMatrix; ViewVector y( "y", nrows ); ViewVector x( "x", ncols ); ViewMatrix A( "A", nrows, ncols ); typedef Kokkos::RangePolicy range_policy; // Initialize y vector. Kokkos::parallel_for( range_policy( 0, nrows ), KOKKOS_LAMBDA ( const int i ) { y( i ) = 1; } ); // Initialize x vector. Kokkos::parallel_for( range_policy( 0, ncols ), KOKKOS_LAMBDA ( const int i ) { x( i ) = 1; } ); typedef Kokkos::TeamPolicy< DeviceType > team_policy; typedef typename Kokkos::TeamPolicy< DeviceType >::member_type member_type; // Initialize A matrix, note 2D indexing computation. Kokkos::parallel_for( team_policy( nrows, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type & teamMember ) { const int j = teamMember.league_rank(); Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, ncols ), [&] ( const int i ) { A( j, i ) = 1; } ); } ); // Three level parallelism kernel to force caching of vector x. ScalarType result = 0.0; int chunk_size = 128; Kokkos::parallel_reduce( team_policy( nrows / chunk_size, team_size, vector_length ), KOKKOS_LAMBDA ( const member_type & teamMember, double & update ) { const int row_start = teamMember.league_rank() * chunk_size; const int row_end = row_start + chunk_size; Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, row_start, row_end ), [&] ( const int i ) { ScalarType sum_i = 0.0; Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember, ncols ), [&] ( const int j, ScalarType &innerUpdate ) { innerUpdate += A( i, j ) * x( j ); }, sum_i ); Kokkos::single( Kokkos::PerThread( teamMember ), [&] () { update += y( i ) * sum_i; } ); } ); }, result ); const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols; ASSERT_EQ( solution, result ); } }; #else // #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) template< typename ScalarType, class DeviceType > class TestTripleNestedReduce { public: typedef DeviceType execution_space; typedef typename execution_space::size_type size_type; TestTripleNestedReduce( const size_type &, const size_type , const size_type &, const size_type ) {} }; #endif //-------------------------------------------------------------------------- namespace Test { namespace ReduceCombinatorical { template< class Scalar, class Space = Kokkos::HostSpace > struct AddPlus { public: // Required. typedef AddPlus reducer_type; typedef Scalar value_type; typedef Kokkos::View< value_type, Space, Kokkos::MemoryTraits > result_view_type; private: result_view_type result; public: AddPlus( value_type & result_ ) : result( &result_ ) {} // Required. KOKKOS_INLINE_FUNCTION void join( value_type & dest, const value_type & src ) const { dest += src + 1; } KOKKOS_INLINE_FUNCTION void join( volatile value_type & dest, const volatile value_type & src ) const { dest += src + 1; } // Optional. KOKKOS_INLINE_FUNCTION void init( value_type & val ) const { val = value_type(); } result_view_type result_view() const { return result; } }; template< int ISTEAM > struct FunctorScalar; template<> struct FunctorScalar< 0 > { Kokkos::View< double > result; FunctorScalar( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const int & i, double & update ) const { update += i; } }; template<> struct FunctorScalar< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; Kokkos::View< double > result; FunctorScalar( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const team_type & team, double & update ) const { update += 1.0 / team.team_size() * team.league_rank(); } }; template< int ISTEAM > struct FunctorScalarInit; template<> struct FunctorScalarInit< 0 > { Kokkos::View< double > result; FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION void init( double & update ) const { update = 0.0; } }; template<> struct FunctorScalarInit< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; Kokkos::View< double > result; FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const team_type & team, double & update ) const { update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION void init( double & update ) const { update = 0.0; } }; template< int ISTEAM > struct FunctorScalarFinal; template<> struct FunctorScalarFinal< 0 > { Kokkos::View result; FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION void final( double & update ) const { result() = update; } }; template<> struct FunctorScalarFinal< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; Kokkos::View< double > result; FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const team_type & team, double & update ) const { update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION void final( double & update ) const { result() = update; } }; template< int ISTEAM > struct FunctorScalarJoin; template<> struct FunctorScalarJoin< 0 > { Kokkos::View result; FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION void join( volatile double & dst, const volatile double & update ) const { dst += update; } }; template<> struct FunctorScalarJoin< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; Kokkos::View< double > result; FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const team_type & team, double & update ) const { update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION void join( volatile double & dst, const volatile double & update ) const { dst += update; } }; template< int ISTEAM > struct FunctorScalarJoinFinal; template<> struct FunctorScalarJoinFinal< 0 > { Kokkos::View< double > result; FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION void final( double & update ) const { result() = update; } }; template<> struct FunctorScalarJoinFinal< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; Kokkos::View< double > result; FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const team_type & team, double & update ) const { update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION void final( double & update ) const { result() = update; } }; template< int ISTEAM > struct FunctorScalarJoinInit; template<> struct FunctorScalarJoinInit< 0 > { Kokkos::View< double > result; FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION void init( double & update ) const { update = 0.0; } }; template<> struct FunctorScalarJoinInit< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; Kokkos::View< double > result; FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const team_type & team, double & update ) const { update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION void init( double & update ) const { update = 0.0; } }; template< int ISTEAM > struct FunctorScalarJoinFinalInit; template<> struct FunctorScalarJoinFinalInit< 0 > { Kokkos::View result; FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION void final( double & update ) const { result() = update; } KOKKOS_INLINE_FUNCTION void init( double & update ) const { update = 0.0; } }; template<> struct FunctorScalarJoinFinalInit< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; Kokkos::View< double > result; FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION void operator()( const team_type & team, double & update ) const { update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION void final( double & update ) const { result() = update; } KOKKOS_INLINE_FUNCTION void init( double & update ) const { update = 0.0; } }; struct Functor1 { KOKKOS_INLINE_FUNCTION void operator()( const int & i, double & update ) const { update += i; } }; struct Functor2 { typedef double value_type[]; const unsigned value_count; Functor2( unsigned n ) : value_count( n ) {} KOKKOS_INLINE_FUNCTION void operator()( const unsigned & i, double update[] ) const { for ( unsigned j = 0; j < value_count; j++ ) { update[j] += i; } } KOKKOS_INLINE_FUNCTION void init( double dst[] ) const { for ( unsigned i = 0; i < value_count; ++i ) dst[i] = 0; } KOKKOS_INLINE_FUNCTION void join( volatile double dst[], const volatile double src[] ) const { for ( unsigned i = 0; i < value_count; ++i ) dst[i] += src[i]; } }; } // namespace ReduceCombinatorical } // namespace Test namespace Test { template< class ExecSpace = Kokkos::DefaultExecutionSpace > struct TestReduceCombinatoricalInstantiation { template< class ... Args > static void CallParallelReduce( Args... args ) { Kokkos::parallel_reduce( args... ); } template< class ... Args > static void AddReturnArgument( Args... args ) { Kokkos::View< double, Kokkos::HostSpace > result_view( "ResultView" ); double expected_result = 1000.0 * 999.0 / 2.0; double value = 0; Kokkos::parallel_reduce( args..., value ); ASSERT_EQ( expected_result, value ); result_view() = 0; CallParallelReduce( args..., result_view ); ASSERT_EQ( expected_result, result_view() ); value = 0; CallParallelReduce( args..., Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits >( &value ) ); ASSERT_EQ( expected_result, value ); result_view() = 0; const Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits > result_view_const_um = result_view; CallParallelReduce( args..., result_view_const_um ); ASSERT_EQ( expected_result, result_view_const_um() ); value = 0; CallParallelReduce( args..., Test::ReduceCombinatorical::AddPlus< double >( value ) ); if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { ASSERT_TRUE( expected_result < value ); } else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { ASSERT_TRUE( expected_result <= value ); } else { ASSERT_EQ( expected_result, value ); } value = 0; Test::ReduceCombinatorical::AddPlus< double > add( value ); CallParallelReduce( args..., add ); if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { ASSERT_TRUE( expected_result < value ); } else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { ASSERT_TRUE( expected_result <= value ); } else { ASSERT_EQ( expected_result, value ); } } template< class ... Args > static void AddLambdaRange( void*, Args... args ) { AddReturnArgument( args..., KOKKOS_LAMBDA ( const int & i, double & lsum ) { lsum += i; }); } template< class ... Args > static void AddLambdaTeam( void*, Args... args ) { AddReturnArgument( args..., KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type & team, double & update ) { update += 1.0 / team.team_size() * team.league_rank(); }); } template< class ... Args > static void AddLambdaRange( Kokkos::InvalidType, Args... args ) {} template< class ... Args > static void AddLambdaTeam( Kokkos::InvalidType, Args... args ) {} template< int ISTEAM, class ... Args > static void AddFunctor( Args... args ) { Kokkos::View< double > result_view( "FunctorView" ); auto h_r = Kokkos::create_mirror_view( result_view ); Test::ReduceCombinatorical::FunctorScalar< ISTEAM > functor( result_view ); double expected_result = 1000.0 * 999.0 / 2.0; AddReturnArgument( args..., functor ); AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalar< ISTEAM >( result_view ) ); AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarInit< ISTEAM >( result_view ) ); AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoin< ISTEAM >( result_view ) ); AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoinInit< ISTEAM >( result_view ) ); h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarFinal< ISTEAM >( result_view ) ); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal< ISTEAM >( result_view ) ); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); h_r() = 0; Kokkos::deep_copy( result_view, h_r ); CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit< ISTEAM >( result_view ) ); Kokkos::deep_copy( h_r, result_view ); ASSERT_EQ( expected_result, h_r() ); } template< class ... Args > static void AddFunctorLambdaRange( Args... args ) { AddFunctor< 0, Args... >( args... ); #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA AddLambdaRange( typename std::conditional< std::is_same::value, void*, Kokkos::InvalidType >::type(), args... ); #endif } template< class ... Args > static void AddFunctorLambdaTeam( Args... args ) { AddFunctor< 1, Args... >( args... ); #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA AddLambdaTeam( typename std::conditional< std::is_same::value, void*, Kokkos::InvalidType >::type(), args... ); #endif } template< class ... Args > static void AddPolicy( Args... args ) { int N = 1000; Kokkos::RangePolicy< ExecSpace > policy( 0, N ); AddFunctorLambdaRange( args..., 1000 ); AddFunctorLambdaRange( args..., N ); AddFunctorLambdaRange( args..., policy ); AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace >( 0, N ) ); AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule >( 0, N ) ); AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule >( 0, N ).set_chunk_size( 10 ) ); AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule >( 0, N ).set_chunk_size( 10 ) ); AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace >( N, Kokkos::AUTO ) ); AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule >( N, Kokkos::AUTO ) ); AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); } static void execute_a() { AddPolicy(); } static void execute_b() { std::string s( "Std::String" ); AddPolicy( s.c_str() ); AddPolicy( "Char Constant" ); } static void execute_c() { std::string s( "Std::String" ); AddPolicy( s ); } }; template< class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace > struct TestReducers { struct SumFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { value += values( i ); } }; struct ProdFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { value *= values( i ); } }; struct MinFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { if ( values( i ) < value ) value = values( i ); } }; struct MaxFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { if ( values( i ) > value ) value = values( i ); } }; struct MinLocFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type & value ) const { if ( values( i ) < value.val ) { value.val = values( i ); value.loc = i; } } }; struct MaxLocFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type & value ) const { if ( values( i ) > value.val ) { value.val = values( i ); value.loc = i; } } }; struct MinMaxLocFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type & value ) const { if ( values( i ) > value.max_val ) { value.max_val = values( i ); value.max_loc = i; } if ( values( i ) < value.min_val ) { value.min_val = values( i ); value.min_loc = i; } } }; struct BAndFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { value = value & values( i ); } }; struct BOrFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { value = value | values( i ); } }; struct BXorFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { value = value ^ values( i ); } }; struct LAndFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { value = value && values( i ); } }; struct LOrFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { value = value || values( i ); } }; struct LXorFunctor { Kokkos::View< const Scalar*, ExecSpace > values; KOKKOS_INLINE_FUNCTION void operator()( const int & i, Scalar & value ) const { value = value ? ( !values( i ) ) : values( i ); } }; static void test_sum( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_sum = 0; for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 100 ); reference_sum += h_values( i ); } Kokkos::deep_copy( values, h_values ); SumFunctor f; f.values = values; Scalar init = 0; { Scalar sum_scalar = init; Kokkos::Experimental::Sum< Scalar > reducer_scalar( sum_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( sum_scalar, reference_sum ); Scalar sum_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( sum_scalar_view, reference_sum ); } { Scalar sum_scalar_init = init; Kokkos::Experimental::Sum< Scalar > reducer_scalar_init( sum_scalar_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); ASSERT_EQ( sum_scalar_init, reference_sum ); Scalar sum_scalar_init_view = reducer_scalar_init.result_view()(); ASSERT_EQ( sum_scalar_init_view, reference_sum ); } { Kokkos::View< Scalar, Kokkos::HostSpace> sum_view( "View" ); sum_view() = init; Kokkos::Experimental::Sum< Scalar > reducer_view( sum_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar sum_view_scalar = sum_view(); ASSERT_EQ( sum_view_scalar, reference_sum ); Scalar sum_view_view = reducer_view.result_view()(); ASSERT_EQ( sum_view_view, reference_sum ); } { Kokkos::View< Scalar, Kokkos::HostSpace > sum_view_init( "View" ); sum_view_init() = init; Kokkos::Experimental::Sum< Scalar > reducer_view_init( sum_view_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); Scalar sum_view_init_scalar = sum_view_init(); ASSERT_EQ( sum_view_init_scalar, reference_sum ); Scalar sum_view_init_view = reducer_view_init.result_view()(); ASSERT_EQ( sum_view_init_view, reference_sum ); } } static void test_prod( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_prod = 1; for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 4 + 1 ); reference_prod *= h_values( i ); } Kokkos::deep_copy( values, h_values ); ProdFunctor f; f.values = values; Scalar init = 1; if ( std::is_arithmetic< Scalar >::value ) { Scalar prod_scalar = init; Kokkos::Experimental::Prod< Scalar > reducer_scalar( prod_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( prod_scalar, reference_prod ); Scalar prod_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( prod_scalar_view, reference_prod ); } { Scalar prod_scalar_init = init; Kokkos::Experimental::Prod< Scalar > reducer_scalar_init( prod_scalar_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); ASSERT_EQ( prod_scalar_init, reference_prod ); Scalar prod_scalar_init_view = reducer_scalar_init.result_view()(); ASSERT_EQ( prod_scalar_init_view, reference_prod ); } if ( std::is_arithmetic< Scalar >::value ) { Kokkos::View< Scalar, Kokkos::HostSpace > prod_view( "View" ); prod_view() = init; Kokkos::Experimental::Prod< Scalar > reducer_view( prod_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar prod_view_scalar = prod_view(); ASSERT_EQ( prod_view_scalar, reference_prod ); Scalar prod_view_view = reducer_view.result_view()(); ASSERT_EQ( prod_view_view, reference_prod ); } { Kokkos::View< Scalar, Kokkos::HostSpace > prod_view_init( "View" ); prod_view_init() = init; Kokkos::Experimental::Prod< Scalar > reducer_view_init( prod_view_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); Scalar prod_view_init_scalar = prod_view_init(); ASSERT_EQ( prod_view_init_scalar, reference_prod ); Scalar prod_view_init_view = reducer_view_init.result_view()(); ASSERT_EQ( prod_view_init_view, reference_prod ); } } static void test_min( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_min = std::numeric_limits< Scalar >::max(); for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 100000 ); if ( h_values( i ) < reference_min ) reference_min = h_values( i ); } Kokkos::deep_copy( values, h_values ); MinFunctor f; f.values = values; Scalar init = std::numeric_limits< Scalar >::max(); { Scalar min_scalar = init; Kokkos::Experimental::Min< Scalar > reducer_scalar( min_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( min_scalar, reference_min ); Scalar min_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( min_scalar_view, reference_min ); } { Scalar min_scalar_init = init; Kokkos::Experimental::Min< Scalar > reducer_scalar_init( min_scalar_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); ASSERT_EQ( min_scalar_init, reference_min ); Scalar min_scalar_init_view = reducer_scalar_init.result_view()(); ASSERT_EQ( min_scalar_init_view, reference_min ); } { Kokkos::View< Scalar, Kokkos::HostSpace > min_view( "View" ); min_view() = init; Kokkos::Experimental::Min< Scalar > reducer_view( min_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar min_view_scalar = min_view(); ASSERT_EQ( min_view_scalar, reference_min ); Scalar min_view_view = reducer_view.result_view()(); ASSERT_EQ( min_view_view, reference_min ); } { Kokkos::View< Scalar, Kokkos::HostSpace > min_view_init( "View" ); min_view_init() = init; Kokkos::Experimental::Min< Scalar > reducer_view_init( min_view_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); Scalar min_view_init_scalar = min_view_init(); ASSERT_EQ( min_view_init_scalar, reference_min ); Scalar min_view_init_view = reducer_view_init.result_view()(); ASSERT_EQ( min_view_init_view, reference_min ); } } static void test_max( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_max = std::numeric_limits< Scalar >::min(); for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 100000 + 1 ); if ( h_values( i ) > reference_max ) reference_max = h_values( i ); } Kokkos::deep_copy( values, h_values ); MaxFunctor f; f.values = values; Scalar init = std::numeric_limits< Scalar >::min(); { Scalar max_scalar = init; Kokkos::Experimental::Max< Scalar > reducer_scalar( max_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( max_scalar, reference_max ); Scalar max_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( max_scalar_view, reference_max ); } { Scalar max_scalar_init = init; Kokkos::Experimental::Max< Scalar > reducer_scalar_init( max_scalar_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); ASSERT_EQ( max_scalar_init, reference_max ); Scalar max_scalar_init_view = reducer_scalar_init.result_view()(); ASSERT_EQ( max_scalar_init_view, reference_max ); } { Kokkos::View< Scalar, Kokkos::HostSpace > max_view( "View" ); max_view() = init; Kokkos::Experimental::Max< Scalar > reducer_view( max_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar max_view_scalar = max_view(); ASSERT_EQ( max_view_scalar, reference_max ); Scalar max_view_view = reducer_view.result_view()(); ASSERT_EQ( max_view_view, reference_max ); } { Kokkos::View< Scalar, Kokkos::HostSpace > max_view_init( "View" ); max_view_init() = init; Kokkos::Experimental::Max< Scalar > reducer_view_init( max_view_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); Scalar max_view_init_scalar = max_view_init(); ASSERT_EQ( max_view_init_scalar, reference_max ); Scalar max_view_init_view = reducer_view_init.result_view()(); ASSERT_EQ( max_view_init_view, reference_max ); } } static void test_minloc( int N ) { typedef typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type value_type; Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_min = std::numeric_limits< Scalar >::max(); int reference_loc = -1; for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 100000 ); if ( h_values( i ) < reference_min ) { reference_min = h_values( i ); reference_loc = i; } else if ( h_values( i ) == reference_min ) { // Make min unique. h_values( i ) += std::numeric_limits< Scalar >::epsilon(); } } Kokkos::deep_copy( values, h_values ); MinLocFunctor f; f.values = values; Scalar init = std::numeric_limits< Scalar >::max(); { value_type min_scalar; Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar( min_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( min_scalar.val, reference_min ); ASSERT_EQ( min_scalar.loc, reference_loc ); value_type min_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( min_scalar_view.val, reference_min ); ASSERT_EQ( min_scalar_view.loc, reference_loc ); } { value_type min_scalar_init; Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar_init( min_scalar_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); ASSERT_EQ( min_scalar_init.val, reference_min ); ASSERT_EQ( min_scalar_init.loc, reference_loc ); value_type min_scalar_init_view = reducer_scalar_init.result_view()(); ASSERT_EQ( min_scalar_init_view.val, reference_min ); ASSERT_EQ( min_scalar_init_view.loc, reference_loc ); } { Kokkos::View< value_type, Kokkos::HostSpace > min_view( "View" ); Kokkos::Experimental::MinLoc< Scalar, int > reducer_view( min_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); value_type min_view_scalar = min_view(); ASSERT_EQ( min_view_scalar.val, reference_min ); ASSERT_EQ( min_view_scalar.loc, reference_loc ); value_type min_view_view = reducer_view.result_view()(); ASSERT_EQ( min_view_view.val, reference_min ); ASSERT_EQ( min_view_view.loc, reference_loc ); } { Kokkos::View< value_type, Kokkos::HostSpace > min_view_init( "View" ); Kokkos::Experimental::MinLoc< Scalar, int > reducer_view_init( min_view_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); value_type min_view_init_scalar = min_view_init(); ASSERT_EQ( min_view_init_scalar.val, reference_min ); ASSERT_EQ( min_view_init_scalar.loc, reference_loc ); value_type min_view_init_view = reducer_view_init.result_view()(); ASSERT_EQ( min_view_init_view.val, reference_min ); ASSERT_EQ( min_view_init_view.loc, reference_loc ); } } static void test_maxloc( int N ) { typedef typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type value_type; Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_max = std::numeric_limits< Scalar >::min(); int reference_loc = -1; for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 100000 ); if ( h_values( i ) > reference_max ) { reference_max = h_values( i ); reference_loc = i; } else if ( h_values( i ) == reference_max ) { // Make max unique. h_values( i ) -= std::numeric_limits< Scalar >::epsilon(); } } Kokkos::deep_copy( values, h_values ); MaxLocFunctor f; f.values = values; Scalar init = std::numeric_limits< Scalar >::min(); { value_type max_scalar; Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar( max_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( max_scalar.val, reference_max ); ASSERT_EQ( max_scalar.loc, reference_loc ); value_type max_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( max_scalar_view.val, reference_max ); ASSERT_EQ( max_scalar_view.loc, reference_loc ); } { value_type max_scalar_init; Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar_init( max_scalar_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); ASSERT_EQ( max_scalar_init.val, reference_max ); ASSERT_EQ( max_scalar_init.loc, reference_loc ); value_type max_scalar_init_view = reducer_scalar_init.result_view()(); ASSERT_EQ( max_scalar_init_view.val, reference_max ); ASSERT_EQ( max_scalar_init_view.loc, reference_loc ); } { Kokkos::View< value_type, Kokkos::HostSpace > max_view( "View" ); Kokkos::Experimental::MaxLoc< Scalar, int > reducer_view( max_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); value_type max_view_scalar = max_view(); ASSERT_EQ( max_view_scalar.val, reference_max ); ASSERT_EQ( max_view_scalar.loc, reference_loc ); value_type max_view_view = reducer_view.result_view()(); ASSERT_EQ( max_view_view.val, reference_max ); ASSERT_EQ( max_view_view.loc, reference_loc ); } { Kokkos::View< value_type, Kokkos::HostSpace > max_view_init( "View" ); Kokkos::Experimental::MaxLoc< Scalar, int > reducer_view_init( max_view_init, init ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); value_type max_view_init_scalar = max_view_init(); ASSERT_EQ( max_view_init_scalar.val, reference_max ); ASSERT_EQ( max_view_init_scalar.loc, reference_loc ); value_type max_view_init_view = reducer_view_init.result_view()(); ASSERT_EQ( max_view_init_view.val, reference_max ); ASSERT_EQ( max_view_init_view.loc, reference_loc ); } } static void test_minmaxloc( int N ) { typedef typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type value_type; Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_max = std::numeric_limits< Scalar >::min(); Scalar reference_min = std::numeric_limits< Scalar >::max(); int reference_minloc = -1; int reference_maxloc = -1; for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 100000 ); } for ( int i = 0; i < N; i++ ) { if ( h_values( i ) > reference_max ) { reference_max = h_values( i ); reference_maxloc = i; } else if ( h_values( i ) == reference_max ) { // Make max unique. h_values( i ) -= std::numeric_limits< Scalar >::epsilon(); } } for ( int i = 0; i < N; i++ ) { if ( h_values( i ) < reference_min ) { reference_min = h_values( i ); reference_minloc = i; } else if ( h_values( i ) == reference_min ) { // Make min unique. h_values( i ) += std::numeric_limits< Scalar >::epsilon(); } } Kokkos::deep_copy( values, h_values ); MinMaxLocFunctor f; f.values = values; Scalar init_min = std::numeric_limits< Scalar >::max(); Scalar init_max = std::numeric_limits< Scalar >::min(); { value_type minmax_scalar; Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar( minmax_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( minmax_scalar.min_val, reference_min ); for ( int i = 0; i < N; i++ ) { if ( ( i == minmax_scalar.min_loc ) && ( h_values( i ) == reference_min ) ) { reference_minloc = i; } } ASSERT_EQ( minmax_scalar.min_loc, reference_minloc ); ASSERT_EQ( minmax_scalar.max_val, reference_max ); for ( int i = 0; i < N; i++ ) { if ( ( i == minmax_scalar.max_loc ) && ( h_values( i ) == reference_max ) ) { reference_maxloc = i; } } ASSERT_EQ( minmax_scalar.max_loc, reference_maxloc ); value_type minmax_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( minmax_scalar_view.min_val, reference_min ); ASSERT_EQ( minmax_scalar_view.min_loc, reference_minloc ); ASSERT_EQ( minmax_scalar_view.max_val, reference_max ); ASSERT_EQ( minmax_scalar_view.max_loc, reference_maxloc ); } { value_type minmax_scalar_init; Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar_init( minmax_scalar_init, init_min, init_max ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); ASSERT_EQ( minmax_scalar_init.min_val, reference_min ); ASSERT_EQ( minmax_scalar_init.min_loc, reference_minloc ); ASSERT_EQ( minmax_scalar_init.max_val, reference_max ); ASSERT_EQ( minmax_scalar_init.max_loc, reference_maxloc ); value_type minmax_scalar_init_view = reducer_scalar_init.result_view()(); ASSERT_EQ( minmax_scalar_init_view.min_val, reference_min ); ASSERT_EQ( minmax_scalar_init_view.min_loc, reference_minloc ); ASSERT_EQ( minmax_scalar_init_view.max_val, reference_max ); ASSERT_EQ( minmax_scalar_init_view.max_loc, reference_maxloc ); } { Kokkos::View< value_type, Kokkos::HostSpace > minmax_view( "View" ); Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_view( minmax_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); value_type minmax_view_scalar = minmax_view(); ASSERT_EQ( minmax_view_scalar.min_val, reference_min ); ASSERT_EQ( minmax_view_scalar.min_loc, reference_minloc ); ASSERT_EQ( minmax_view_scalar.max_val, reference_max ); ASSERT_EQ( minmax_view_scalar.max_loc, reference_maxloc ); value_type minmax_view_view = reducer_view.result_view()(); ASSERT_EQ( minmax_view_view.min_val, reference_min ); ASSERT_EQ( minmax_view_view.min_loc, reference_minloc ); ASSERT_EQ( minmax_view_view.max_val, reference_max ); ASSERT_EQ( minmax_view_view.max_loc, reference_maxloc ); } { Kokkos::View< value_type, Kokkos::HostSpace > minmax_view_init( "View" ); Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_view_init( minmax_view_init, init_min, init_max ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); value_type minmax_view_init_scalar = minmax_view_init(); ASSERT_EQ( minmax_view_init_scalar.min_val, reference_min ); ASSERT_EQ( minmax_view_init_scalar.min_loc, reference_minloc ); ASSERT_EQ( minmax_view_init_scalar.max_val, reference_max ); ASSERT_EQ( minmax_view_init_scalar.max_loc, reference_maxloc ); value_type minmax_view_init_view = reducer_view_init.result_view()(); ASSERT_EQ( minmax_view_init_view.min_val, reference_min ); ASSERT_EQ( minmax_view_init_view.min_loc, reference_minloc ); ASSERT_EQ( minmax_view_init_view.max_val, reference_max ); ASSERT_EQ( minmax_view_init_view.max_loc, reference_maxloc ); } } static void test_BAnd( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_band = Scalar() | ( ~Scalar() ); for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 100000 + 1 ); reference_band = reference_band & h_values( i ); } Kokkos::deep_copy( values, h_values ); BAndFunctor f; f.values = values; Scalar init = Scalar() | ( ~Scalar() ); { Scalar band_scalar = init; Kokkos::Experimental::BAnd< Scalar > reducer_scalar( band_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( band_scalar, reference_band ); Scalar band_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( band_scalar_view, reference_band ); } { Kokkos::View< Scalar, Kokkos::HostSpace > band_view( "View" ); band_view() = init; Kokkos::Experimental::BAnd< Scalar > reducer_view( band_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar band_view_scalar = band_view(); ASSERT_EQ( band_view_scalar, reference_band ); Scalar band_view_view = reducer_view.result_view()(); ASSERT_EQ( band_view_view, reference_band ); } } static void test_BOr( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_bor = Scalar() & ( ~Scalar() ); for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( ( rand() % 100000 + 1 ) * 2 ); reference_bor = reference_bor | h_values( i ); } Kokkos::deep_copy( values, h_values ); BOrFunctor f; f.values = values; Scalar init = Scalar() & ( ~Scalar() ); { Scalar bor_scalar = init; Kokkos::Experimental::BOr< Scalar > reducer_scalar( bor_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( bor_scalar, reference_bor ); Scalar bor_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( bor_scalar_view, reference_bor ); } { Kokkos::View< Scalar, Kokkos::HostSpace > bor_view( "View" ); bor_view() = init; Kokkos::Experimental::BOr< Scalar > reducer_view( bor_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar bor_view_scalar = bor_view(); ASSERT_EQ( bor_view_scalar, reference_bor ); Scalar bor_view_view = reducer_view.result_view()(); ASSERT_EQ( bor_view_view, reference_bor ); } } static void test_BXor( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_bxor = Scalar() & ( ~Scalar() ); for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( ( rand() % 100000 + 1 ) * 2 ); reference_bxor = reference_bxor ^ h_values( i ); } Kokkos::deep_copy( values, h_values ); BXorFunctor f; f.values = values; Scalar init = Scalar() & ( ~Scalar() ); { Scalar bxor_scalar = init; Kokkos::Experimental::BXor< Scalar > reducer_scalar( bxor_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( bxor_scalar, reference_bxor ); Scalar bxor_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( bxor_scalar_view, reference_bxor ); } { Kokkos::View< Scalar, Kokkos::HostSpace > bxor_view( "View" ); bxor_view() = init; Kokkos::Experimental::BXor< Scalar > reducer_view( bxor_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar bxor_view_scalar = bxor_view(); ASSERT_EQ( bxor_view_scalar, reference_bxor ); Scalar bxor_view_view = reducer_view.result_view()(); ASSERT_EQ( bxor_view_view, reference_bxor ); } } static void test_LAnd( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_land = 1; for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 2 ); reference_land = reference_land && h_values( i ); } Kokkos::deep_copy( values, h_values ); LAndFunctor f; f.values = values; Scalar init = 1; { Scalar land_scalar = init; Kokkos::Experimental::LAnd< Scalar > reducer_scalar( land_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( land_scalar, reference_land ); Scalar land_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( land_scalar_view, reference_land ); } { Kokkos::View< Scalar, Kokkos::HostSpace > land_view( "View" ); land_view() = init; Kokkos::Experimental::LAnd< Scalar > reducer_view( land_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar land_view_scalar = land_view(); ASSERT_EQ( land_view_scalar, reference_land ); Scalar land_view_view = reducer_view.result_view()(); ASSERT_EQ( land_view_view, reference_land ); } } static void test_LOr( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_lor = 0; for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 2 ); reference_lor = reference_lor || h_values( i ); } Kokkos::deep_copy( values, h_values ); LOrFunctor f; f.values = values; Scalar init = 0; { Scalar lor_scalar = init; Kokkos::Experimental::LOr< Scalar > reducer_scalar( lor_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( lor_scalar, reference_lor ); Scalar lor_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( lor_scalar_view, reference_lor ); } { Kokkos::View< Scalar, Kokkos::HostSpace > lor_view( "View" ); lor_view() = init; Kokkos::Experimental::LOr< Scalar > reducer_view( lor_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar lor_view_scalar = lor_view(); ASSERT_EQ( lor_view_scalar, reference_lor ); Scalar lor_view_view = reducer_view.result_view()(); ASSERT_EQ( lor_view_view, reference_lor ); } } static void test_LXor( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_lxor = 0; for ( int i = 0; i < N; i++ ) { h_values( i ) = (Scalar) ( rand() % 2 ); reference_lxor = reference_lxor ? ( !h_values( i ) ) : h_values( i ); } Kokkos::deep_copy( values, h_values ); LXorFunctor f; f.values = values; Scalar init = 0; { Scalar lxor_scalar = init; Kokkos::Experimental::LXor< Scalar > reducer_scalar( lxor_scalar ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( lxor_scalar, reference_lxor ); Scalar lxor_scalar_view = reducer_scalar.result_view()(); ASSERT_EQ( lxor_scalar_view, reference_lxor ); } { Kokkos::View< Scalar, Kokkos::HostSpace > lxor_view( "View" ); lxor_view() = init; Kokkos::Experimental::LXor< Scalar > reducer_view( lxor_view ); Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); Scalar lxor_view_scalar = lxor_view(); ASSERT_EQ( lxor_view_scalar, reference_lxor ); Scalar lxor_view_view = reducer_view.result_view()(); ASSERT_EQ( lxor_view_view, reference_lxor ); } } static void execute_float() { test_sum( 10001 ); test_prod( 35 ); test_min( 10003 ); test_minloc( 10003 ); test_max( 10007 ); test_maxloc( 10007 ); test_minmaxloc( 10007 ); } static void execute_integer() { test_sum( 10001 ); test_prod( 35 ); test_min( 10003 ); test_minloc( 10003 ); test_max( 10007 ); test_maxloc( 10007 ); test_minmaxloc( 10007 ); test_BAnd( 35 ); test_BOr( 35 ); test_BXor( 35 ); test_LAnd( 35 ); test_LOr( 35 ); test_LXor( 35 ); } static void execute_basic() { test_sum( 10001 ); test_prod( 35 ); } }; } // namespace Test