/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) // // ************************************************************************ //@HEADER */ #include #include #include #include #include /*--------------------------------------------------------------------------*/ namespace Test { namespace { template< class ExecSpace, class ScheduleType > struct TestTeamPolicy { typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member ; typedef Kokkos::View view_type ; view_type m_flags ; TestTeamPolicy( const size_t league_size ) : m_flags( Kokkos::ViewAllocateWithoutInitializing("flags") , Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this ) , league_size ) {} struct VerifyInitTag {}; KOKKOS_INLINE_FUNCTION void operator()( const team_member & member ) const { const int tid = member.team_rank() + member.team_size() * member.league_rank(); m_flags( member.team_rank() , member.league_rank() ) = tid ; } KOKKOS_INLINE_FUNCTION void operator()( const VerifyInitTag & , const team_member & member ) const { const int tid = member.team_rank() + member.team_size() * member.league_rank(); if ( tid != m_flags( member.team_rank() , member.league_rank() ) ) { printf("TestTeamPolicy member(%d,%d) error %d != %d\n" , member.league_rank() , member.team_rank() , tid , m_flags( member.team_rank() , member.league_rank() ) ); } } // included for test_small_league_size TestTeamPolicy() : m_flags() {} // included for test_small_league_size struct NoOpTag {} ; KOKKOS_INLINE_FUNCTION void operator()( const NoOpTag & , const team_member & member ) const {} static void test_small_league_size() { int bs = 8; // batch size (number of elements per batch) int ns = 16; // total number of "problems" to process // calculate total scratch memory space size const int level = 0; int mem_size = 960; const int num_teams = ns/bs; const Kokkos::TeamPolicy< ExecSpace, NoOpTag > policy(num_teams, Kokkos::AUTO()); Kokkos::parallel_for ( policy.set_scratch_size(level, Kokkos::PerTeam(mem_size), Kokkos::PerThread(0)) , TestTeamPolicy() ); } static void test_for( const size_t league_size ) { TestTeamPolicy functor( league_size ); const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor ); Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace , VerifyInitTag >( league_size , team_size ) , functor ); test_small_league_size(); } struct ReduceTag {}; typedef long value_type ; KOKKOS_INLINE_FUNCTION void operator()( const team_member & member , value_type & update ) const { update += member.team_rank() + member.team_size() * member.league_rank(); } KOKKOS_INLINE_FUNCTION void operator()( const ReduceTag & , const team_member & member , value_type & update ) const { update += 1 + member.team_rank() + member.team_size() * member.league_rank(); } static void test_reduce( const size_t league_size ) { TestTeamPolicy functor( league_size ); const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); const long N = team_size * league_size ; long total = 0 ; Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor , total ); ASSERT_EQ( size_t((N-1)*(N))/2 , size_t(total) ); Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace , ReduceTag >( league_size , team_size ) , functor , total ); ASSERT_EQ( (size_t(N)*size_t(N+1))/2 , size_t(total) ); } }; } } /*--------------------------------------------------------------------------*/ namespace Test { template< typename ScalarType , class DeviceType, class ScheduleType > class ReduceTeamFunctor { public: typedef DeviceType execution_space ; typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; typedef typename execution_space::size_type size_type ; struct value_type { ScalarType value[3] ; }; const size_type nwork ; ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} ReduceTeamFunctor( const ReduceTeamFunctor & rhs ) : nwork( rhs.nwork ) {} KOKKOS_INLINE_FUNCTION void init( value_type & dst ) const { dst.value[0] = 0 ; dst.value[1] = 0 ; dst.value[2] = 0 ; } KOKKOS_INLINE_FUNCTION void join( volatile value_type & dst , const volatile value_type & src ) const { dst.value[0] += src.value[0] ; dst.value[1] += src.value[1] ; dst.value[2] += src.value[2] ; } KOKKOS_INLINE_FUNCTION void operator()( const typename policy_type::member_type ind , value_type & dst ) const { const int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank(); const int thread_size = ind.team_size() * ind.league_size(); const int chunk = ( nwork + thread_size - 1 ) / thread_size ; size_type iwork = chunk * thread_rank ; const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork ; for ( ; iwork < iwork_end ; ++iwork ) { dst.value[0] += 1 ; dst.value[1] += iwork + 1 ; dst.value[2] += nwork - iwork ; } } }; } // namespace Test namespace { template< typename ScalarType , class DeviceType, class ScheduleType > class TestReduceTeam { public: typedef DeviceType execution_space ; typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; typedef typename execution_space::size_type size_type ; //------------------------------------ TestReduceTeam( const size_type & nwork ) { run_test(nwork); } void run_test( const size_type & nwork ) { typedef Test::ReduceTeamFunctor< ScalarType , execution_space , ScheduleType> functor_type ; typedef typename functor_type::value_type value_type ; typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type ; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; const unsigned long nw = nwork ; const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) : (nw/2) * ( nw + 1 ); const unsigned team_size = policy_type::team_size_recommended( functor_type(nwork) ); const unsigned league_size = ( nwork + team_size - 1 ) / team_size ; policy_type team_exec( league_size , team_size ); for ( unsigned i = 0 ; i < Repeat ; ++i ) { result_type tmp( & result[i] ); Kokkos::parallel_reduce( team_exec , functor_type(nwork) , tmp ); } execution_space::fence(); for ( unsigned i = 0 ; i < Repeat ; ++i ) { for ( unsigned j = 0 ; j < Count ; ++j ) { const unsigned long correct = 0 == j % 3 ? nw : nsum ; ASSERT_EQ( (ScalarType) correct , result[i].value[j] ); } } } }; } /*--------------------------------------------------------------------------*/ namespace Test { template< class DeviceType, class ScheduleType > class ScanTeamFunctor { public: typedef DeviceType execution_space ; typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; typedef long int value_type ; Kokkos::View< value_type , execution_space > accum ; Kokkos::View< value_type , execution_space > total ; ScanTeamFunctor() : accum("accum"), total("total") {} KOKKOS_INLINE_FUNCTION void init( value_type & error ) const { error = 0 ; } KOKKOS_INLINE_FUNCTION void join( value_type volatile & error , value_type volatile const & input ) const { if ( input ) error = 1 ; } struct JoinMax { typedef long int value_type ; KOKKOS_INLINE_FUNCTION void join( value_type volatile & dst , value_type volatile const & input ) const { if ( dst < input ) dst = input ; } }; KOKKOS_INLINE_FUNCTION void operator()( const typename policy_type::member_type ind , value_type & error ) const { if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) { const long int thread_count = ind.league_size() * ind.team_size(); total() = ( thread_count * ( thread_count + 1 ) ) / 2 ; } // Team max: const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() ); if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) { printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n" , ind.league_rank(), ind.team_rank() , ind.league_size(), ind.team_size() , (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m ); } // Scan: const long int answer = ( ind.league_rank() + 1 ) * ind.team_rank() + ( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ; const long int result = ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 ); const long int result2 = ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 ); if ( answer != result || answer != result2 ) { printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n", ind.league_rank(), ind.team_rank(), ind.league_size(), ind.team_size(), answer,result,result2); error = 1 ; } const long int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank(); ind.team_scan( 1 + thread_rank , accum.ptr_on_device() ); } }; template< class DeviceType, class ScheduleType > class TestScanTeam { public: typedef DeviceType execution_space ; typedef long int value_type ; typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; typedef Test::ScanTeamFunctor functor_type ; //------------------------------------ TestScanTeam( const size_t nteam ) { run_test(nteam); } void run_test( const size_t nteam ) { typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; const unsigned REPEAT = 100000 ; unsigned Repeat; if ( nteam == 0 ) { Repeat = 1; } else { Repeat = ( REPEAT + nteam - 1 ) / nteam ; //error here } functor_type functor ; policy_type team_exec( nteam , policy_type::team_size_max( functor ) ); for ( unsigned i = 0 ; i < Repeat ; ++i ) { long int accum = 0 ; long int total = 0 ; long int error = 0 ; Kokkos::deep_copy( functor.accum , total ); Kokkos::parallel_reduce( team_exec , functor , result_type( & error ) ); DeviceType::fence(); Kokkos::deep_copy( accum , functor.accum ); Kokkos::deep_copy( total , functor.total ); ASSERT_EQ( error , 0 ); ASSERT_EQ( total , accum ); } execution_space::fence(); } }; } // namespace Test /*--------------------------------------------------------------------------*/ namespace Test { template< class ExecSpace, class ScheduleType > struct SharedTeamFunctor { typedef ExecSpace execution_space ; typedef int value_type ; typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; enum { SHARED_COUNT = 1000 }; typedef typename ExecSpace::scratch_memory_space shmem_space ; // tbd: MemoryUnmanaged should be the default for shared memory space typedef Kokkos::View shared_int_array_type ; // Tell how much shared memory will be required by this functor: inline unsigned team_shmem_size( int team_size ) const { return shared_int_array_type::shmem_size( SHARED_COUNT ) + shared_int_array_type::shmem_size( SHARED_COUNT ); } KOKKOS_INLINE_FUNCTION void operator()( const typename policy_type::member_type & ind , value_type & update ) const { const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT ); const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT ); if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) || (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) { printf ("Failed to allocate shared memory of size %lu\n", static_cast (SHARED_COUNT)); ++update; // failure to allocate is an error } else { for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) { shared_A[i] = i + ind.league_rank(); shared_B[i] = 2 * i + ind.league_rank(); } ind.team_barrier(); if ( ind.team_rank() + 1 == ind.team_size() ) { for ( int i = 0 ; i < SHARED_COUNT ; ++i ) { if ( shared_A[i] != i + ind.league_rank() ) { ++update ; } if ( shared_B[i] != 2 * i + ind.league_rank() ) { ++update ; } } } } } }; } namespace { template< class ExecSpace, class ScheduleType > struct TestSharedTeam { TestSharedTeam() { run(); } void run() { typedef Test::SharedTeamFunctor Functor ; typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size ); typename Functor::value_type error_count = 0 ; Kokkos::parallel_reduce( team_exec , Functor() , result_type( & error_count ) ); ASSERT_EQ( error_count , 0 ); } }; } namespace Test { #if defined (KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) template< class MemorySpace, class ExecSpace, class ScheduleType > struct TestLambdaSharedTeam { TestLambdaSharedTeam() { run(); } void run() { typedef Test::SharedTeamFunctor Functor ; //typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; typedef Kokkos::View< typename Functor::value_type , MemorySpace, Kokkos::MemoryUnmanaged > result_type ; typedef typename ExecSpace::scratch_memory_space shmem_space ; // tbd: MemoryUnmanaged should be the default for shared memory space typedef Kokkos::View shared_int_array_type ; const int SHARED_COUNT = 1000; int team_size = 1; #ifdef KOKKOS_HAVE_CUDA if(std::is_same::value) team_size = 128; #endif Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size); team_exec = team_exec.set_scratch_size(0,Kokkos::PerTeam(SHARED_COUNT*2*sizeof(int))); typename Functor::value_type error_count = 0 ; Kokkos::parallel_reduce( team_exec , KOKKOS_LAMBDA ( const typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type & ind , int & update ) { const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT ); const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT ); if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) || (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) { printf ("Failed to allocate shared memory of size %lu\n", static_cast (SHARED_COUNT)); ++update; // failure to allocate is an error } else { for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) { shared_A[i] = i + ind.league_rank(); shared_B[i] = 2 * i + ind.league_rank(); } ind.team_barrier(); if ( ind.team_rank() + 1 == ind.team_size() ) { for ( int i = 0 ; i < SHARED_COUNT ; ++i ) { if ( shared_A[i] != i + ind.league_rank() ) { ++update ; } if ( shared_B[i] != 2 * i + ind.league_rank() ) { ++update ; } } } } }, result_type( & error_count ) ); ASSERT_EQ( error_count , 0 ); } }; #endif } namespace Test { template< class ExecSpace, class ScheduleType > struct ScratchTeamFunctor { typedef ExecSpace execution_space ; typedef int value_type ; typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; enum { SHARED_TEAM_COUNT = 100 }; enum { SHARED_THREAD_COUNT = 10 }; typedef typename ExecSpace::scratch_memory_space shmem_space ; // tbd: MemoryUnmanaged should be the default for shared memory space typedef Kokkos::View shared_int_array_type ; KOKKOS_INLINE_FUNCTION void operator()( const typename policy_type::member_type & ind , value_type & update ) const { const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 3*ind.team_size() ); const shared_int_array_type scratch_A( ind.team_scratch(1) , SHARED_TEAM_COUNT ); const shared_int_array_type scratch_B( ind.thread_scratch(1) , SHARED_THREAD_COUNT ); if ((scratch_ptr.ptr_on_device () == NULL ) || (scratch_A. ptr_on_device () == NULL && SHARED_TEAM_COUNT > 0) || (scratch_B. ptr_on_device () == NULL && SHARED_THREAD_COUNT > 0)) { printf ("Failed to allocate shared memory of size %lu\n", static_cast (SHARED_TEAM_COUNT)); ++update; // failure to allocate is an error } else { Kokkos::parallel_for(Kokkos::TeamThreadRange(ind,0,(int)SHARED_TEAM_COUNT),[&] (const int &i) { scratch_A[i] = i + ind.league_rank(); }); for(int i=0; i struct TestScratchTeam { TestScratchTeam() { run(); } void run() { typedef Test::ScratchTeamFunctor Functor ; typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size ); typename Functor::value_type error_count = 0 ; int team_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) + Functor::shared_int_array_type::shmem_size(3*team_size); int thread_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_THREAD_COUNT); Kokkos::parallel_reduce( team_exec.set_scratch_size(0,Kokkos::PerTeam(team_scratch_size), Kokkos::PerThread(thread_scratch_size)) , Functor() , result_type( & error_count ) ); ASSERT_EQ( error_count , 0 ); } }; } namespace Test { template< class ExecSpace> KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body(const typename Kokkos::TeamPolicy::member_type& team) { Kokkos::View> a_team1(team.team_scratch(0),128); Kokkos::View> a_thread1(team.thread_scratch(0),16); Kokkos::View> a_team2(team.team_scratch(0),128); Kokkos::View> a_thread2(team.thread_scratch(0),16); Kokkos::View> b_team1(team.team_scratch(1),128000); Kokkos::View> b_thread1(team.thread_scratch(1),16000); Kokkos::View> b_team2(team.team_scratch(1),128000); Kokkos::View> b_thread2(team.thread_scratch(1),16000); Kokkos::View> a_team3(team.team_scratch(0),128); Kokkos::View> a_thread3(team.thread_scratch(0),16); Kokkos::View> b_team3(team.team_scratch(1),128000); Kokkos::View> b_thread3(team.thread_scratch(1),16000); // The explicit types for 0 and 128 are here to test TeamThreadRange accepting different // types for begin and end. Kokkos::parallel_for(Kokkos::TeamThreadRange(team,int(0),unsigned(128)), [&] (const int& i) { a_team1(i) = 1000000 + i; a_team2(i) = 2000000 + i; a_team3(i) = 3000000 + i; }); team.team_barrier(); Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i) { a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; }); Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) { b_team1(i) = 1000000 + i; b_team2(i) = 2000000 + i; b_team3(i) = 3000000 + i; }); team.team_barrier(); Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i) { b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; }); team.team_barrier(); int error = 0; Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) { if(a_team1(i) != 1000000 + i) error++; if(a_team2(i) != 2000000 + i) error++; if(a_team3(i) != 3000000 + i) error++; }); team.team_barrier(); Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i) { if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; }); Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) { if(b_team1(i) != 1000000 + i) error++; if(b_team2(i) != 2000000 + i) error++; if(b_team3(i) != 3000000 + i) error++; }); team.team_barrier(); Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i) { if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; }); return error; } struct TagReduce {}; struct TagFor {}; template< class ExecSpace, class ScheduleType > struct ClassNoShmemSizeFunction { Kokkos::View > errors; KOKKOS_INLINE_FUNCTION void operator() (const TagFor&, const typename Kokkos::TeamPolicy::member_type& team) const { int error = test_team_mulit_level_scratch_loop_body(team); errors() += error; } KOKKOS_INLINE_FUNCTION void operator() (const TagReduce&, const typename Kokkos::TeamPolicy::member_type& team, int& error) const { error += test_team_mulit_level_scratch_loop_body(team); } void run() { Kokkos::View d_errors = Kokkos::View("Errors"); errors = d_errors; const int per_team0 = 3*Kokkos::View>::shmem_size(128); const int per_thread0 = 3*Kokkos::View>::shmem_size(16); const int per_team1 = 3*Kokkos::View>::shmem_size(128000); const int per_thread1 = 3*Kokkos::View>::shmem_size(16000); { Kokkos::TeamPolicy policy(10,8,16); Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), *this); Kokkos::fence(); typename Kokkos::View::HostMirror h_errors = Kokkos::create_mirror_view(d_errors); Kokkos::deep_copy(h_errors,d_errors); ASSERT_EQ(h_errors(),0); } { int error = 0; Kokkos::TeamPolicy policy(10,8,16); Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), *this,error); Kokkos::fence(); ASSERT_EQ(error,0); } }; }; template< class ExecSpace, class ScheduleType > struct ClassWithShmemSizeFunction { Kokkos::View > errors; KOKKOS_INLINE_FUNCTION void operator() (const TagFor&, const typename Kokkos::TeamPolicy::member_type& team) const { int error = test_team_mulit_level_scratch_loop_body(team); errors() += error; } KOKKOS_INLINE_FUNCTION void operator() (const TagReduce&, const typename Kokkos::TeamPolicy::member_type& team, int& error) const { error += test_team_mulit_level_scratch_loop_body(team); } void run() { Kokkos::View d_errors = Kokkos::View("Errors"); errors = d_errors; const int per_team1 = 3*Kokkos::View>::shmem_size(128000); const int per_thread1 = 3*Kokkos::View>::shmem_size(16000); { Kokkos::TeamPolicy policy(10,8,16); Kokkos::parallel_for(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), *this); Kokkos::fence(); typename Kokkos::View::HostMirror h_errors= Kokkos::create_mirror_view(d_errors); Kokkos::deep_copy(h_errors,d_errors); ASSERT_EQ(h_errors(),0); } { int error = 0; Kokkos::TeamPolicy policy(10,8,16); Kokkos::parallel_reduce(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), *this,error); Kokkos::fence(); ASSERT_EQ(error,0); } }; unsigned team_shmem_size(int team_size) const { const int per_team0 = 3*Kokkos::View>::shmem_size(128); const int per_thread0 = 3*Kokkos::View>::shmem_size(16); return per_team0 + team_size * per_thread0; } }; template< class ExecSpace, class ScheduleType > void test_team_mulit_level_scratch_test_lambda() { #ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA Kokkos::View > errors; Kokkos::View d_errors("Errors"); errors = d_errors; const int per_team0 = 3*Kokkos::View>::shmem_size(128); const int per_thread0 = 3*Kokkos::View>::shmem_size(16); const int per_team1 = 3*Kokkos::View>::shmem_size(128000); const int per_thread1 = 3*Kokkos::View>::shmem_size(16000); Kokkos::TeamPolicy policy(10,8,16); Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy::member_type& team) { int error = test_team_mulit_level_scratch_loop_body(team); errors() += error; }); Kokkos::fence(); typename Kokkos::View::HostMirror h_errors= Kokkos::create_mirror_view(errors); Kokkos::deep_copy(h_errors,d_errors); ASSERT_EQ(h_errors(),0); int error = 0; Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy::member_type& team, int& count) { count += test_team_mulit_level_scratch_loop_body(team); },error); ASSERT_EQ(error,0); Kokkos::fence(); #endif } } namespace { template< class ExecSpace, class ScheduleType > struct TestMultiLevelScratchTeam { TestMultiLevelScratchTeam() { run(); } void run() { #ifdef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA Test::test_team_mulit_level_scratch_test_lambda(); #endif Test::ClassNoShmemSizeFunction c1; c1.run(); Test::ClassWithShmemSizeFunction c2; c2.run(); } }; } namespace Test { template< class ExecSpace > struct TestShmemSize { TestShmemSize() { run(); } void run() { typedef Kokkos::View< long***, ExecSpace > view_type; size_t d1 = 5; size_t d2 = 6; size_t d3 = 7; size_t size = view_type::shmem_size( d1, d2, d3 ); ASSERT_EQ( size, d1 * d2 * d3 * sizeof(long) ); } }; } /*--------------------------------------------------------------------------*/