Files
lammps/lib/kokkos/core/unit_test/TestTeam.hpp

467 lines
14 KiB
C++
Executable File

/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <stdio.h>
#include <stdexcept>
#include <sstream>
#include <iostream>
#include <Kokkos_Core.hpp>
/*--------------------------------------------------------------------------*/
namespace Test {
namespace {
template< class ExecSpace >
struct TestTeamPolicy {
typedef typename Kokkos::TeamPolicy< ExecSpace >::member_type team_member ;
typedef Kokkos::View<int**,ExecSpace> view_type ;
view_type m_flags ;
TestTeamPolicy( const size_t league_size )
: m_flags( Kokkos::ViewAllocateWithoutInitializing("flags")
, Kokkos::TeamPolicy< ExecSpace >::team_size_max( *this )
, league_size )
{}
struct VerifyInitTag {};
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member ) const
{
const int tid = member.team_rank() + member.team_size() * member.league_rank();
m_flags( member.team_rank() , member.league_rank() ) = tid ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const VerifyInitTag & , const team_member & member ) const
{
const int tid = member.team_rank() + member.team_size() * member.league_rank();
if ( tid != m_flags( member.team_rank() , member.league_rank() ) ) {
printf("TestTeamPolicy member(%d,%d) error %d != %d\n"
, member.league_rank() , member.team_rank()
, tid , m_flags( member.team_rank() , member.league_rank() ) );
}
}
static void test_for( const size_t league_size )
{
TestTeamPolicy functor( league_size );
const int team_size = Kokkos::TeamPolicy< ExecSpace >::team_size_max( functor );
Kokkos::parallel_for( Kokkos::TeamPolicy< ExecSpace >( league_size , team_size ) , functor );
Kokkos::parallel_for( Kokkos::TeamPolicy< ExecSpace , VerifyInitTag >( league_size , team_size ) , functor );
}
struct ReduceTag {};
typedef long value_type ;
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member , value_type & update ) const
{
update += member.team_rank() + member.team_size() * member.league_rank();
}
KOKKOS_INLINE_FUNCTION
void operator()( const ReduceTag & , const team_member & member , value_type & update ) const
{
update += 1 + member.team_rank() + member.team_size() * member.league_rank();
}
static void test_reduce( const size_t league_size )
{
TestTeamPolicy functor( league_size );
const int team_size = Kokkos::TeamPolicy< ExecSpace >::team_size_max( functor );
const long N = team_size * league_size ;
long total = 0 ;
Kokkos::parallel_reduce( Kokkos::TeamPolicy< ExecSpace >( league_size , team_size ) , functor , total );
ASSERT_EQ( size_t((N-1)*(N))/2 , size_t(total) );
Kokkos::parallel_reduce( Kokkos::TeamPolicy< ExecSpace , ReduceTag >( league_size , team_size ) , functor , total );
ASSERT_EQ( (size_t(N)*size_t(N+1))/2 , size_t(total) );
}
};
}
}
/*--------------------------------------------------------------------------*/
namespace Test {
template< typename ScalarType , class DeviceType >
class ReduceTeamFunctor
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
typedef typename execution_space::size_type size_type ;
struct value_type {
ScalarType value[3] ;
};
const size_type nwork ;
ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {}
ReduceTeamFunctor( const ReduceTeamFunctor & rhs )
: nwork( rhs.nwork ) {}
KOKKOS_INLINE_FUNCTION
void init( value_type & dst ) const
{
dst.value[0] = 0 ;
dst.value[1] = 0 ;
dst.value[2] = 0 ;
}
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & dst ,
const volatile value_type & src ) const
{
dst.value[0] += src.value[0] ;
dst.value[1] += src.value[1] ;
dst.value[2] += src.value[2] ;
}
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type ind , value_type & dst ) const
{
const int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank();
const int thread_size = ind.team_size() * ind.league_size();
const int chunk = ( nwork + thread_size - 1 ) / thread_size ;
size_type iwork = chunk * thread_rank ;
const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork ;
for ( ; iwork < iwork_end ; ++iwork ) {
dst.value[0] += 1 ;
dst.value[1] += iwork + 1 ;
dst.value[2] += nwork - iwork ;
}
}
};
} // namespace Test
namespace {
template< typename ScalarType , class DeviceType >
class TestReduceTeam
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
typedef typename execution_space::size_type size_type ;
//------------------------------------
TestReduceTeam( const size_type & nwork )
{
run_test(nwork);
}
void run_test( const size_type & nwork )
{
typedef Test::ReduceTeamFunctor< ScalarType , execution_space > functor_type ;
typedef typename functor_type::value_type value_type ;
typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type ;
enum { Count = 3 };
enum { Repeat = 100 };
value_type result[ Repeat ];
const unsigned long nw = nwork ;
const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 )
: (nw/2) * ( nw + 1 );
const unsigned team_size = policy_type::team_size_recommended( functor_type(nwork) );
const unsigned league_size = ( nwork + team_size - 1 ) / team_size ;
policy_type team_exec( league_size , team_size );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
result_type tmp( & result[i] );
Kokkos::parallel_reduce( team_exec , functor_type(nwork) , tmp );
}
execution_space::fence();
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
for ( unsigned j = 0 ; j < Count ; ++j ) {
const unsigned long correct = 0 == j % 3 ? nw : nsum ;
ASSERT_EQ( (ScalarType) correct , result[i].value[j] );
}
}
}
};
}
/*--------------------------------------------------------------------------*/
namespace Test {
template< class DeviceType >
class ScanTeamFunctor
{
public:
typedef DeviceType execution_space ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
typedef long int value_type ;
Kokkos::View< value_type , execution_space > accum ;
Kokkos::View< value_type , execution_space > total ;
ScanTeamFunctor() : accum("accum"), total("total") {}
KOKKOS_INLINE_FUNCTION
void init( value_type & error ) const { error = 0 ; }
KOKKOS_INLINE_FUNCTION
void join( value_type volatile & error ,
value_type volatile const & input ) const
{ if ( input ) error = 1 ; }
struct JoinMax {
typedef long int value_type ;
KOKKOS_INLINE_FUNCTION
void join( value_type volatile & dst
, value_type volatile const & input ) const
{ if ( dst < input ) dst = input ; }
};
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type ind , value_type & error ) const
{
if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) {
const long int thread_count = ind.league_size() * ind.team_size();
total() = ( thread_count * ( thread_count + 1 ) ) / 2 ;
}
// Team max:
const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() );
if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) {
printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n"
, ind.league_rank(), ind.team_rank()
, ind.league_size(), ind.team_size()
, (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m );
}
// Scan:
const long int answer =
( ind.league_rank() + 1 ) * ind.team_rank() +
( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ;
const long int result =
ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );
const long int result2 =
ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 );
if ( answer != result || answer != result2 ) {
printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n",
ind.league_rank(), ind.team_rank(),
ind.league_size(), ind.team_size(),
answer,result,result2);
error = 1 ;
}
const long int thread_rank = ind.team_rank() +
ind.team_size() * ind.league_rank();
ind.team_scan( 1 + thread_rank , accum.ptr_on_device() );
}
};
template< class DeviceType >
class TestScanTeam
{
public:
typedef DeviceType execution_space ;
typedef long int value_type ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
typedef Test::ScanTeamFunctor<DeviceType> functor_type ;
//------------------------------------
TestScanTeam( const size_t nteam )
{
run_test(nteam);
}
void run_test( const size_t nteam )
{
typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const unsigned REPEAT = 100000 ;
const unsigned Repeat = ( REPEAT + nteam - 1 ) / nteam ;
functor_type functor ;
policy_type team_exec( nteam , policy_type::team_size_max( functor ) );
for ( unsigned i = 0 ; i < Repeat ; ++i ) {
long int accum = 0 ;
long int total = 0 ;
long int error = 0 ;
Kokkos::deep_copy( functor.accum , total );
Kokkos::parallel_reduce( team_exec , functor , result_type( & error ) );
DeviceType::fence();
Kokkos::deep_copy( accum , functor.accum );
Kokkos::deep_copy( total , functor.total );
ASSERT_EQ( error , 0 );
ASSERT_EQ( total , accum );
}
execution_space::fence();
}
};
} // namespace Test
/*--------------------------------------------------------------------------*/
namespace Test {
template< class ExecSpace >
struct SharedTeamFunctor {
typedef ExecSpace execution_space ;
typedef int value_type ;
typedef Kokkos::TeamPolicy< execution_space > policy_type ;
enum { SHARED_COUNT = 1000 };
typedef typename ExecSpace::scratch_memory_space shmem_space ;
// tbd: MemoryUnmanaged should be the default for shared memory space
typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ;
// Tell how much shared memory will be required by this functor:
inline
unsigned team_shmem_size( int /* team_size */ ) const
{
return shared_int_array_type::shmem_size( SHARED_COUNT ) +
shared_int_array_type::shmem_size( SHARED_COUNT );
}
KOKKOS_INLINE_FUNCTION
void operator()( const typename policy_type::member_type & ind , value_type & update ) const
{
const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT );
const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT );
if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) ||
(shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) {
printf ("Failed to allocate shared memory of size %lu\n",
static_cast<unsigned long> (SHARED_COUNT));
++update; // failure to allocate is an error
}
else {
for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) {
shared_A[i] = i + ind.league_rank();
shared_B[i] = 2 * i + ind.league_rank();
}
ind.team_barrier();
if ( ind.team_rank() + 1 == ind.team_size() ) {
for ( int i = 0 ; i < SHARED_COUNT ; ++i ) {
if ( shared_A[i] != i + ind.league_rank() ) {
++update ;
}
if ( shared_B[i] != 2 * i + ind.league_rank() ) {
++update ;
}
}
}
}
}
};
}
namespace {
template< class ExecSpace >
struct TestSharedTeam {
TestSharedTeam()
{ run(); }
void run()
{
typedef Test::SharedTeamFunctor<ExecSpace> Functor ;
typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ;
const size_t team_size = Kokkos::TeamPolicy< ExecSpace >::team_size_max( Functor() );
Kokkos::TeamPolicy< ExecSpace > team_exec( 8192 / team_size , team_size );
typename Functor::value_type error_count = 0 ;
Kokkos::parallel_reduce( team_exec , Functor() , result_type( & error_count ) );
ASSERT_EQ( error_count , 0 );
}
};
}
/*--------------------------------------------------------------------------*/