554 lines
15 KiB
C++
554 lines
15 KiB
C++
/*
|
|
//@HEADER
|
|
// ************************************************************************
|
|
//
|
|
// Kokkos v. 2.0
|
|
// Copyright (2014) Sandia Corporation
|
|
//
|
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
// the U.S. Government retains certain rights in this software.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
//
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// 3. Neither the name of the Corporation nor the names of the
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
|
//
|
|
// ************************************************************************
|
|
//@HEADER
|
|
*/
|
|
|
|
#include <Kokkos_Core.hpp>
|
|
|
|
namespace TestAtomic {
|
|
|
|
// Struct for testing arbitrary size atomics.
|
|
|
|
template< int N >
|
|
struct SuperScalar {
|
|
double val[N];
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar() {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] = 0.0;
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar( const SuperScalar & src ) {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] = src.val[i];
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar( const volatile SuperScalar & src ) {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] = src.val[i];
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar& operator=( const SuperScalar & src ) {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] = src.val[i];
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar& operator=( const volatile SuperScalar & src ) {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] = src.val[i];
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator=( const SuperScalar & src ) volatile {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] = src.val[i];
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar operator+( const SuperScalar & src ) {
|
|
SuperScalar tmp = *this;
|
|
for ( int i = 0; i < N; i++ ) {
|
|
tmp.val[i] += src.val[i];
|
|
}
|
|
return tmp;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar& operator+=( const double & src ) {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] += 1.0 * ( i + 1 ) * src;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar& operator+=( const SuperScalar & src ) {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] += src.val[i];
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
bool operator==( const SuperScalar & src ) {
|
|
bool compare = true;
|
|
for( int i = 0; i < N; i++ ) {
|
|
compare = compare && ( val[i] == src.val[i] );
|
|
}
|
|
return compare;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
bool operator!=( const SuperScalar & src ) {
|
|
bool compare = true;
|
|
for ( int i = 0; i < N; i++ ) {
|
|
compare = compare && ( val[i] == src.val[i] );
|
|
}
|
|
return !compare;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
SuperScalar( const double & src ) {
|
|
for ( int i = 0; i < N; i++ ) {
|
|
val[i] = 1.0 * ( i + 1 ) * src;
|
|
}
|
|
}
|
|
};
|
|
|
|
template< int N >
|
|
std::ostream & operator<<( std::ostream & os, const SuperScalar< N > & dt )
|
|
{
|
|
os << "{ ";
|
|
for ( int i = 0; i < N - 1; i++ ) {
|
|
os << dt.val[i] << ", ";
|
|
}
|
|
os << dt.val[N-1] << "}";
|
|
|
|
return os;
|
|
}
|
|
|
|
template< class T, class DEVICE_TYPE >
|
|
struct ZeroFunctor {
|
|
typedef DEVICE_TYPE execution_space;
|
|
typedef typename Kokkos::View< T, execution_space > type;
|
|
typedef typename Kokkos::View< T, execution_space >::HostMirror h_type;
|
|
|
|
type data;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( int ) const {
|
|
data() = 0;
|
|
}
|
|
};
|
|
|
|
//---------------------------------------------------
|
|
//--------------atomic_fetch_add---------------------
|
|
//---------------------------------------------------
|
|
|
|
template< class T, class DEVICE_TYPE >
|
|
struct AddFunctor {
|
|
typedef DEVICE_TYPE execution_space;
|
|
typedef Kokkos::View< T, execution_space > type;
|
|
|
|
type data;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( int ) const {
|
|
Kokkos::atomic_fetch_add( &data(), (T) 1 );
|
|
}
|
|
};
|
|
|
|
template< class T, class DEVICE_TYPE >
|
|
struct AddFunctorReduce {
|
|
typedef DEVICE_TYPE execution_space;
|
|
typedef Kokkos::View< T, execution_space > type;
|
|
|
|
type data;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( int , int& ) const {
|
|
Kokkos::atomic_fetch_add( &data(), (T) 1 );
|
|
}
|
|
};
|
|
|
|
template< class T, class execution_space >
|
|
T AddLoop( int loop ) {
|
|
struct ZeroFunctor< T, execution_space > f_zero;
|
|
typename ZeroFunctor< T, execution_space >::type data( "Data" );
|
|
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
|
|
|
|
f_zero.data = data;
|
|
|
|
Kokkos::parallel_for( 1, f_zero );
|
|
execution_space().fence();
|
|
|
|
struct AddFunctor< T, execution_space > f_add;
|
|
|
|
f_add.data = data;
|
|
Kokkos::parallel_for( loop, f_add );
|
|
execution_space().fence();
|
|
|
|
Kokkos::deep_copy( h_data, data );
|
|
T val = h_data();
|
|
|
|
struct AddFunctorReduce< T, execution_space > f_add_red;
|
|
f_add_red.data = data;
|
|
int dummy_result;
|
|
Kokkos::parallel_reduce( loop, f_add_red , dummy_result );
|
|
execution_space().fence();
|
|
|
|
return val;
|
|
}
|
|
|
|
template< class T >
|
|
T AddLoopSerial( int loop ) {
|
|
T* data = new T[1];
|
|
data[0] = 0;
|
|
|
|
for ( int i = 0; i < loop; i++ ) {
|
|
*data += (T) 1;
|
|
}
|
|
|
|
T val = *data;
|
|
delete [] data;
|
|
|
|
return val;
|
|
}
|
|
|
|
//------------------------------------------------------
|
|
//--------------atomic_compare_exchange-----------------
|
|
//------------------------------------------------------
|
|
|
|
template< class T, class DEVICE_TYPE >
|
|
struct CASFunctor {
|
|
typedef DEVICE_TYPE execution_space;
|
|
typedef Kokkos::View< T, execution_space > type;
|
|
|
|
type data;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( int ) const {
|
|
T old = data();
|
|
T newval, assumed;
|
|
|
|
do {
|
|
assumed = old;
|
|
newval = assumed + (T) 1;
|
|
old = Kokkos::atomic_compare_exchange( &data(), assumed, newval );
|
|
} while( old != assumed );
|
|
}
|
|
};
|
|
|
|
template< class T, class DEVICE_TYPE >
|
|
struct CASFunctorReduce {
|
|
typedef DEVICE_TYPE execution_space;
|
|
typedef Kokkos::View< T, execution_space > type;
|
|
|
|
type data;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( int , int& ) const {
|
|
T old = data();
|
|
T newval, assumed;
|
|
|
|
do {
|
|
assumed = old;
|
|
newval = assumed + (T) 1;
|
|
old = Kokkos::atomic_compare_exchange( &data(), assumed, newval );
|
|
} while( old != assumed );
|
|
}
|
|
};
|
|
|
|
template< class T, class execution_space >
|
|
T CASLoop( int loop ) {
|
|
struct ZeroFunctor< T, execution_space > f_zero;
|
|
typename ZeroFunctor< T, execution_space >::type data( "Data" );
|
|
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
|
|
|
|
f_zero.data = data;
|
|
Kokkos::parallel_for( 1, f_zero );
|
|
execution_space().fence();
|
|
|
|
struct CASFunctor< T, execution_space > f_cas;
|
|
f_cas.data = data;
|
|
Kokkos::parallel_for( loop, f_cas );
|
|
execution_space().fence();
|
|
|
|
Kokkos::deep_copy( h_data, data );
|
|
T val = h_data();
|
|
|
|
struct CASFunctorReduce< T, execution_space > f_cas_red;
|
|
f_cas_red.data = data;
|
|
int dummy_result;
|
|
Kokkos::parallel_reduce( loop, f_cas_red , dummy_result );
|
|
execution_space().fence();
|
|
|
|
return val;
|
|
}
|
|
|
|
template< class T >
|
|
T CASLoopSerial( int loop ) {
|
|
T* data = new T[1];
|
|
data[0] = 0;
|
|
|
|
for ( int i = 0; i < loop; i++ ) {
|
|
T assumed;
|
|
T newval;
|
|
T old;
|
|
|
|
do {
|
|
assumed = *data;
|
|
newval = assumed + (T) 1;
|
|
old = *data;
|
|
*data = newval;
|
|
} while( !( assumed == old ) );
|
|
}
|
|
|
|
T val = *data;
|
|
delete [] data;
|
|
|
|
return val;
|
|
}
|
|
|
|
//----------------------------------------------
|
|
//--------------atomic_exchange-----------------
|
|
//----------------------------------------------
|
|
|
|
template< class T, class DEVICE_TYPE >
|
|
struct ExchFunctor {
|
|
typedef DEVICE_TYPE execution_space;
|
|
typedef Kokkos::View< T, execution_space > type;
|
|
|
|
type data, data2;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( int i ) const {
|
|
T old = Kokkos::atomic_exchange( &data(), (T) i );
|
|
Kokkos::atomic_fetch_add( &data2(), old );
|
|
}
|
|
};
|
|
|
|
template< class T, class DEVICE_TYPE >
|
|
struct ExchFunctorReduce {
|
|
typedef DEVICE_TYPE execution_space;
|
|
typedef Kokkos::View< T, execution_space > type;
|
|
|
|
type data, data2;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( int i , int& ) const {
|
|
T old = Kokkos::atomic_exchange( &data(), (T) i );
|
|
Kokkos::atomic_fetch_add( &data2(), old );
|
|
}
|
|
};
|
|
|
|
template< class T, class execution_space >
|
|
T ExchLoop( int loop ) {
|
|
struct ZeroFunctor< T, execution_space > f_zero;
|
|
typename ZeroFunctor< T, execution_space >::type data( "Data" );
|
|
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
|
|
|
|
f_zero.data = data;
|
|
Kokkos::parallel_for( 1, f_zero );
|
|
execution_space().fence();
|
|
|
|
typename ZeroFunctor< T, execution_space >::type data2( "Data" );
|
|
typename ZeroFunctor< T, execution_space >::h_type h_data2( "HData" );
|
|
|
|
f_zero.data = data2;
|
|
Kokkos::parallel_for( 1, f_zero );
|
|
execution_space().fence();
|
|
|
|
struct ExchFunctor< T, execution_space > f_exch;
|
|
f_exch.data = data;
|
|
f_exch.data2 = data2;
|
|
Kokkos::parallel_for( loop, f_exch );
|
|
execution_space().fence();
|
|
|
|
Kokkos::deep_copy( h_data, data );
|
|
Kokkos::deep_copy( h_data2, data2 );
|
|
T val = h_data() + h_data2();
|
|
|
|
struct ExchFunctorReduce< T, execution_space > f_exch_red;
|
|
f_exch_red.data = data;
|
|
f_exch_red.data2 = data2;
|
|
int dummy_result;
|
|
Kokkos::parallel_reduce( loop, f_exch_red , dummy_result );
|
|
execution_space().fence();
|
|
|
|
return val;
|
|
}
|
|
|
|
template< class T >
|
|
T ExchLoopSerial( typename std::conditional< !std::is_same< T, Kokkos::complex<double> >::value, int, void >::type loop ) {
|
|
T* data = new T[1];
|
|
T* data2 = new T[1];
|
|
data[0] = 0;
|
|
data2[0] = 0;
|
|
|
|
for ( int i = 0; i < loop; i++ ) {
|
|
T old = *data;
|
|
*data = (T) i;
|
|
*data2 += old;
|
|
}
|
|
|
|
T val = *data2 + *data;
|
|
delete [] data;
|
|
delete [] data2;
|
|
|
|
return val;
|
|
}
|
|
|
|
template< class T >
|
|
T ExchLoopSerial( typename std::conditional< std::is_same< T, Kokkos::complex<double> >::value, int, void >::type loop ) {
|
|
T* data = new T[1];
|
|
T* data2 = new T[1];
|
|
data[0] = 0;
|
|
data2[0] = 0;
|
|
|
|
for ( int i = 0; i < loop; i++ ) {
|
|
T old = *data;
|
|
data->real() = ( static_cast<double>( i ) );
|
|
data->imag() = 0;
|
|
*data2 += old;
|
|
}
|
|
|
|
T val = *data2 + *data;
|
|
delete [] data;
|
|
delete [] data2;
|
|
|
|
return val;
|
|
}
|
|
|
|
template< class T, class DeviceType >
|
|
T LoopVariant( int loop, int test ) {
|
|
switch ( test ) {
|
|
case 1: return AddLoop< T, DeviceType >( loop );
|
|
case 2: return CASLoop< T, DeviceType >( loop );
|
|
case 3: return ExchLoop< T, DeviceType >( loop );
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
template< class T >
|
|
T LoopVariantSerial( int loop, int test ) {
|
|
switch ( test ) {
|
|
case 1: return AddLoopSerial< T >( loop );
|
|
case 2: return CASLoopSerial< T >( loop );
|
|
case 3: return ExchLoopSerial< T >( loop );
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
template< class T, class DeviceType >
|
|
bool Loop( int loop, int test )
|
|
{
|
|
T res = LoopVariant< T, DeviceType >( loop, test );
|
|
T resSerial = LoopVariantSerial< T >( loop, test );
|
|
|
|
bool passed = true;
|
|
|
|
if ( resSerial != res ) {
|
|
passed = false;
|
|
|
|
std::cout << "Loop<"
|
|
<< typeid( T ).name()
|
|
<< ">( test = "
|
|
<< test << " FAILED : "
|
|
<< resSerial << " != " << res
|
|
<< std::endl;
|
|
}
|
|
|
|
return passed;
|
|
}
|
|
|
|
} // namespace TestAtomic
|
|
|
|
namespace Test {
|
|
|
|
TEST_F( TEST_CATEGORY, atomics )
|
|
{
|
|
const int loop_count = 1e4;
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 3 ) ) );
|
|
|
|
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
|
#ifndef KOKKOS_ENABLE_ROCM // ROCM doesn't yet support atomics for >64bit types
|
|
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 3 ) ) );
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 3 ) ) );
|
|
|
|
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 1 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 2 ) ) );
|
|
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 3 ) ) );
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
|
|
} // namespace Test
|
|
|