/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) // // ************************************************************************ //@HEADER */ #include #include #include #include #include /*--------------------------------------------------------------------------*/ namespace Test { template< typename ScalarType , class DeviceType > class ReduceFunctor { public: typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type ; struct value_type { ScalarType value[3] ; }; const size_type nwork ; ReduceFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} ReduceFunctor( const ReduceFunctor & rhs ) : nwork( rhs.nwork ) {} /* KOKKOS_INLINE_FUNCTION void init( value_type & dst ) const { dst.value[0] = 0 ; dst.value[1] = 0 ; dst.value[2] = 0 ; } */ KOKKOS_INLINE_FUNCTION void join( volatile value_type & dst , const volatile value_type & src ) const { dst.value[0] += src.value[0] ; dst.value[1] += src.value[1] ; dst.value[2] += src.value[2] ; } KOKKOS_INLINE_FUNCTION void operator()( size_type iwork , value_type & dst ) const { dst.value[0] += 1 ; dst.value[1] += iwork + 1 ; dst.value[2] += nwork - iwork ; } }; template< class DeviceType > class ReduceFunctorFinal : public ReduceFunctor< long , DeviceType > { public: typedef typename ReduceFunctor< long , DeviceType >::value_type value_type ; ReduceFunctorFinal( const size_t n ) : ReduceFunctor(n) {} KOKKOS_INLINE_FUNCTION void final( value_type & dst ) const { dst.value[0] = - dst.value[0] ; dst.value[1] = - dst.value[1] ; dst.value[2] = - dst.value[2] ; } }; template< typename ScalarType , class DeviceType > class RuntimeReduceFunctor { public: // Required for functor: typedef DeviceType execution_space ; typedef ScalarType value_type[] ; const unsigned value_count ; // Unit test details: typedef typename execution_space::size_type size_type ; const size_type nwork ; RuntimeReduceFunctor( const size_type arg_nwork , const size_type arg_count ) : value_count( arg_count ) , nwork( arg_nwork ) {} KOKKOS_INLINE_FUNCTION void init( ScalarType dst[] ) const { for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] = 0 ; } KOKKOS_INLINE_FUNCTION void join( volatile ScalarType dst[] , const volatile ScalarType src[] ) const { for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] += src[i] ; } KOKKOS_INLINE_FUNCTION void operator()( size_type iwork , ScalarType dst[] ) const { const size_type tmp[3] = { 1 , iwork + 1 , nwork - iwork }; for ( size_type i = 0 ; i < value_count ; ++i ) { dst[i] += tmp[ i % 3 ]; } } }; template< typename ScalarType , class DeviceType > class RuntimeReduceMinMax { public: // Required for functor: typedef DeviceType execution_space ; typedef ScalarType value_type[] ; const unsigned value_count ; // Unit test details: typedef typename execution_space::size_type size_type ; const size_type nwork ; const ScalarType amin ; const ScalarType amax ; RuntimeReduceMinMax( const size_type arg_nwork , const size_type arg_count ) : value_count( arg_count ) , nwork( arg_nwork ) , amin( std::numeric_limits::min() ) , amax( std::numeric_limits::max() ) {} KOKKOS_INLINE_FUNCTION void init( ScalarType dst[] ) const { for ( unsigned i = 0 ; i < value_count ; ++i ) { dst[i] = i % 2 ? amax : amin ; } } KOKKOS_INLINE_FUNCTION void join( volatile ScalarType dst[] , const volatile ScalarType src[] ) const { for ( unsigned i = 0 ; i < value_count ; ++i ) { dst[i] = i % 2 ? ( dst[i] < src[i] ? dst[i] : src[i] ) // min : ( dst[i] > src[i] ? dst[i] : src[i] ); // max } } KOKKOS_INLINE_FUNCTION void operator()( size_type iwork , ScalarType dst[] ) const { const ScalarType tmp[2] = { ScalarType(iwork + 1) , ScalarType(nwork - iwork) }; for ( size_type i = 0 ; i < value_count ; ++i ) { dst[i] = i % 2 ? ( dst[i] < tmp[i%2] ? dst[i] : tmp[i%2] ) : ( dst[i] > tmp[i%2] ? dst[i] : tmp[i%2] ); } } }; template< class DeviceType > class RuntimeReduceFunctorFinal : public RuntimeReduceFunctor< long , DeviceType > { public: typedef RuntimeReduceFunctor< long , DeviceType > base_type ; typedef typename base_type::value_type value_type ; typedef long scalar_type ; RuntimeReduceFunctorFinal( const size_t theNwork , const size_t count ) : base_type(theNwork,count) {} KOKKOS_INLINE_FUNCTION void final( value_type dst ) const { for ( unsigned i = 0 ; i < base_type::value_count ; ++i ) { dst[i] = - dst[i] ; } } }; } // namespace Test namespace { template< typename ScalarType , class DeviceType > class TestReduce { public: typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type ; //------------------------------------ TestReduce( const size_type & nwork ) { run_test(nwork); run_test_final(nwork); } void run_test( const size_type & nwork ) { typedef Test::ReduceFunctor< ScalarType , execution_space > functor_type ; typedef typename functor_type::value_type value_type ; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; const unsigned long nw = nwork ; const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) : (nw/2) * ( nw + 1 ); for ( unsigned i = 0 ; i < Repeat ; ++i ) { Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] ); } for ( unsigned i = 0 ; i < Repeat ; ++i ) { for ( unsigned j = 0 ; j < Count ; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum ; ASSERT_EQ( (ScalarType) correct , result[i].value[j] ); } } } void run_test_final( const size_type & nwork ) { typedef Test::ReduceFunctorFinal< execution_space > functor_type ; typedef typename functor_type::value_type value_type ; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; const unsigned long nw = nwork ; const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) : (nw/2) * ( nw + 1 ); for ( unsigned i = 0 ; i < Repeat ; ++i ) { if(i%2==0) Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] ); else Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork) , result[i] ); } for ( unsigned i = 0 ; i < Repeat ; ++i ) { for ( unsigned j = 0 ; j < Count ; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum ; ASSERT_EQ( (ScalarType) correct , - result[i].value[j] ); } } } }; template< typename ScalarType , class DeviceType > class TestReduceDynamic { public: typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type ; //------------------------------------ TestReduceDynamic( const size_type nwork ) { run_test_dynamic(nwork); run_test_dynamic_minmax(nwork); run_test_dynamic_final(nwork); } void run_test_dynamic( const size_type nwork ) { typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ; enum { Count = 3 }; enum { Repeat = 100 }; ScalarType result[ Repeat ][ Count ] ; const unsigned long nw = nwork ; const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) : (nw/2) * ( nw + 1 ); for ( unsigned i = 0 ; i < Repeat ; ++i ) { if(i%2==0) Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); else Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] ); } for ( unsigned i = 0 ; i < Repeat ; ++i ) { for ( unsigned j = 0 ; j < Count ; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum ; ASSERT_EQ( (ScalarType) correct , result[i][j] ); } } } void run_test_dynamic_minmax( const size_type nwork ) { typedef Test::RuntimeReduceMinMax< ScalarType , execution_space > functor_type ; enum { Count = 2 }; enum { Repeat = 100 }; ScalarType result[ Repeat ][ Count ] ; for ( unsigned i = 0 ; i < Repeat ; ++i ) { if(i%2==0) Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); else Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] ); } for ( unsigned i = 0 ; i < Repeat ; ++i ) { for ( unsigned j = 0 ; j < Count ; ++j ) { const unsigned long correct = j % 2 ? 1 : nwork ; ASSERT_EQ( (ScalarType) correct , result[i][j] ); } } } void run_test_dynamic_final( const size_type nwork ) { typedef Test::RuntimeReduceFunctorFinal< execution_space > functor_type ; enum { Count = 3 }; enum { Repeat = 100 }; typename functor_type::scalar_type result[ Repeat ][ Count ] ; const unsigned long nw = nwork ; const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) : (nw/2) * ( nw + 1 ); for ( unsigned i = 0 ; i < Repeat ; ++i ) { if(i%2==0) Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); else Kokkos::parallel_reduce( "TestKernelReduce" , nwork , functor_type(nwork,Count) , result[i] ); } for ( unsigned i = 0 ; i < Repeat ; ++i ) { for ( unsigned j = 0 ; j < Count ; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum ; ASSERT_EQ( (ScalarType) correct , - result[i][j] ); } } } }; template< typename ScalarType , class DeviceType > class TestReduceDynamicView { public: typedef DeviceType execution_space ; typedef typename execution_space::size_type size_type ; //------------------------------------ TestReduceDynamicView( const size_type nwork ) { run_test_dynamic_view(nwork); } void run_test_dynamic_view( const size_type nwork ) { typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ; typedef Kokkos::View< ScalarType* , DeviceType > result_type ; typedef typename result_type::HostMirror result_host_type ; const unsigned CountLimit = 23 ; const unsigned long nw = nwork ; const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) : (nw/2) * ( nw + 1 ); for ( unsigned count = 0 ; count < CountLimit ; ++count ) { result_type result("result",count); result_host_type host_result = Kokkos::create_mirror( result ); // Test result to host pointer: std::string str("TestKernelReduce"); if(count%2==0) Kokkos::parallel_reduce( nw , functor_type(nw,count) , host_result.ptr_on_device() ); else Kokkos::parallel_reduce( str , nw , functor_type(nw,count) , host_result.ptr_on_device() ); for ( unsigned j = 0 ; j < count ; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum ; ASSERT_EQ( host_result(j), (ScalarType) correct ); host_result(j) = 0 ; } } } }; } /*--------------------------------------------------------------------------*/