Update Kokkos library in LAMMPS to v3.0
This commit is contained in:
@ -2,10 +2,11 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
// Kokkos v. 3.0
|
||||
// Copyright (2020) National Technology & Engineering
|
||||
// Solutions of Sandia, LLC (NTESS).
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
@ -23,10 +24,10 @@
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
@ -47,201 +48,194 @@ namespace TestAtomic {
|
||||
|
||||
// Struct for testing arbitrary size atomics.
|
||||
|
||||
template< int N >
|
||||
template <int N>
|
||||
struct SuperScalar {
|
||||
double val[N];
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar() {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar( const SuperScalar & src ) {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
SuperScalar(const SuperScalar& src) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] = src.val[i];
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar( const volatile SuperScalar & src ) {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
SuperScalar(const volatile SuperScalar& src) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] = src.val[i];
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar& operator=( const SuperScalar & src ) {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
SuperScalar& operator=(const SuperScalar& src) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] = src.val[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar& operator=( const volatile SuperScalar & src ) {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
SuperScalar& operator=(const volatile SuperScalar& src) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] = src.val[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator=( const SuperScalar & src ) volatile {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
void operator=(const SuperScalar& src) volatile {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] = src.val[i];
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar operator+( const SuperScalar & src ) {
|
||||
SuperScalar operator+(const SuperScalar& src) {
|
||||
SuperScalar tmp = *this;
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
tmp.val[i] += src.val[i];
|
||||
}
|
||||
return tmp;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar& operator+=( const double & src ) {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
val[i] += 1.0 * ( i + 1 ) * src;
|
||||
SuperScalar& operator+=(const double& src) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] += 1.0 * (i + 1) * src;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar& operator+=( const SuperScalar & src ) {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
SuperScalar& operator+=(const SuperScalar& src) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] += src.val[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator==( const SuperScalar & src ) {
|
||||
bool operator==(const SuperScalar& src) {
|
||||
bool compare = true;
|
||||
for( int i = 0; i < N; i++ ) {
|
||||
compare = compare && ( val[i] == src.val[i] );
|
||||
for (int i = 0; i < N; i++) {
|
||||
compare = compare && (val[i] == src.val[i]);
|
||||
}
|
||||
return compare;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool operator!=( const SuperScalar & src ) {
|
||||
bool operator!=(const SuperScalar& src) {
|
||||
bool compare = true;
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
compare = compare && ( val[i] == src.val[i] );
|
||||
for (int i = 0; i < N; i++) {
|
||||
compare = compare && (val[i] == src.val[i]);
|
||||
}
|
||||
return !compare;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
SuperScalar( const double & src ) {
|
||||
for ( int i = 0; i < N; i++ ) {
|
||||
val[i] = 1.0 * ( i + 1 ) * src;
|
||||
SuperScalar(const double& src) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
val[i] = 1.0 * (i + 1) * src;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template< int N >
|
||||
std::ostream & operator<<( std::ostream & os, const SuperScalar< N > & dt )
|
||||
{
|
||||
template <int N>
|
||||
std::ostream& operator<<(std::ostream& os, const SuperScalar<N>& dt) {
|
||||
os << "{ ";
|
||||
for ( int i = 0; i < N - 1; i++ ) {
|
||||
os << dt.val[i] << ", ";
|
||||
for (int i = 0; i < N - 1; i++) {
|
||||
os << dt.val[i] << ", ";
|
||||
}
|
||||
os << dt.val[N-1] << "}";
|
||||
os << dt.val[N - 1] << "}";
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
template< class T, class DEVICE_TYPE >
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct ZeroFunctor {
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef typename Kokkos::View< T, execution_space > type;
|
||||
typedef typename Kokkos::View< T, execution_space >::HostMirror h_type;
|
||||
typedef typename Kokkos::View<T, execution_space> type;
|
||||
typedef typename Kokkos::View<T, execution_space>::HostMirror h_type;
|
||||
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int ) const {
|
||||
data() = 0;
|
||||
}
|
||||
void operator()(int) const { data() = 0; }
|
||||
};
|
||||
|
||||
//---------------------------------------------------
|
||||
//--------------atomic_fetch_add---------------------
|
||||
//---------------------------------------------------
|
||||
|
||||
template< class T, class DEVICE_TYPE >
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct AddFunctor {
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View< T, execution_space > type;
|
||||
typedef Kokkos::View<T, execution_space> type;
|
||||
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int ) const {
|
||||
Kokkos::atomic_fetch_add( &data(), (T) 1 );
|
||||
}
|
||||
void operator()(int) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
|
||||
};
|
||||
|
||||
template< class T, class DEVICE_TYPE >
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct AddFunctorReduce {
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View< T, execution_space > type;
|
||||
typedef Kokkos::View<T, execution_space> type;
|
||||
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int , int& ) const {
|
||||
Kokkos::atomic_fetch_add( &data(), (T) 1 );
|
||||
}
|
||||
void operator()(int, int&) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
|
||||
};
|
||||
|
||||
template< class T, class execution_space >
|
||||
T AddLoop( int loop ) {
|
||||
struct ZeroFunctor< T, execution_space > f_zero;
|
||||
typename ZeroFunctor< T, execution_space >::type data( "Data" );
|
||||
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
|
||||
template <class T, class execution_space>
|
||||
T AddLoop(int loop) {
|
||||
struct ZeroFunctor<T, execution_space> f_zero;
|
||||
typename ZeroFunctor<T, execution_space>::type data("Data");
|
||||
typename ZeroFunctor<T, execution_space>::h_type h_data("HData");
|
||||
|
||||
f_zero.data = data;
|
||||
|
||||
Kokkos::parallel_for( 1, f_zero );
|
||||
Kokkos::parallel_for(1, f_zero);
|
||||
execution_space().fence();
|
||||
|
||||
struct AddFunctor< T, execution_space > f_add;
|
||||
struct AddFunctor<T, execution_space> f_add;
|
||||
|
||||
f_add.data = data;
|
||||
Kokkos::parallel_for( loop, f_add );
|
||||
Kokkos::parallel_for(loop, f_add);
|
||||
execution_space().fence();
|
||||
|
||||
Kokkos::deep_copy( h_data, data );
|
||||
Kokkos::deep_copy(h_data, data);
|
||||
T val = h_data();
|
||||
|
||||
struct AddFunctorReduce< T, execution_space > f_add_red;
|
||||
struct AddFunctorReduce<T, execution_space> f_add_red;
|
||||
f_add_red.data = data;
|
||||
int dummy_result;
|
||||
Kokkos::parallel_reduce( loop, f_add_red , dummy_result );
|
||||
Kokkos::parallel_reduce(loop, f_add_red, dummy_result);
|
||||
execution_space().fence();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template< class T >
|
||||
T AddLoopSerial( int loop ) {
|
||||
template <class T>
|
||||
T AddLoopSerial(int loop) {
|
||||
T* data = new T[1];
|
||||
data[0] = 0;
|
||||
|
||||
for ( int i = 0; i < loop; i++ ) {
|
||||
*data += (T) 1;
|
||||
for (int i = 0; i < loop; i++) {
|
||||
*data += (T)1;
|
||||
}
|
||||
|
||||
T val = *data;
|
||||
delete [] data;
|
||||
delete[] data;
|
||||
|
||||
return val;
|
||||
}
|
||||
@ -250,93 +244,93 @@ T AddLoopSerial( int loop ) {
|
||||
//--------------atomic_compare_exchange-----------------
|
||||
//------------------------------------------------------
|
||||
|
||||
template< class T, class DEVICE_TYPE >
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct CASFunctor {
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View< T, execution_space > type;
|
||||
typedef Kokkos::View<T, execution_space> type;
|
||||
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int ) const {
|
||||
void operator()(int) const {
|
||||
T old = data();
|
||||
T newval, assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
newval = assumed + (T) 1;
|
||||
old = Kokkos::atomic_compare_exchange( &data(), assumed, newval );
|
||||
} while( old != assumed );
|
||||
newval = assumed + (T)1;
|
||||
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
|
||||
} while (old != assumed);
|
||||
}
|
||||
};
|
||||
|
||||
template< class T, class DEVICE_TYPE >
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct CASFunctorReduce {
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View< T, execution_space > type;
|
||||
typedef Kokkos::View<T, execution_space> type;
|
||||
|
||||
type data;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int , int& ) const {
|
||||
void operator()(int, int&) const {
|
||||
T old = data();
|
||||
T newval, assumed;
|
||||
|
||||
do {
|
||||
assumed = old;
|
||||
newval = assumed + (T) 1;
|
||||
old = Kokkos::atomic_compare_exchange( &data(), assumed, newval );
|
||||
} while( old != assumed );
|
||||
newval = assumed + (T)1;
|
||||
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
|
||||
} while (old != assumed);
|
||||
}
|
||||
};
|
||||
|
||||
template< class T, class execution_space >
|
||||
T CASLoop( int loop ) {
|
||||
struct ZeroFunctor< T, execution_space > f_zero;
|
||||
typename ZeroFunctor< T, execution_space >::type data( "Data" );
|
||||
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
|
||||
template <class T, class execution_space>
|
||||
T CASLoop(int loop) {
|
||||
struct ZeroFunctor<T, execution_space> f_zero;
|
||||
typename ZeroFunctor<T, execution_space>::type data("Data");
|
||||
typename ZeroFunctor<T, execution_space>::h_type h_data("HData");
|
||||
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for( 1, f_zero );
|
||||
Kokkos::parallel_for(1, f_zero);
|
||||
execution_space().fence();
|
||||
|
||||
struct CASFunctor< T, execution_space > f_cas;
|
||||
struct CASFunctor<T, execution_space> f_cas;
|
||||
f_cas.data = data;
|
||||
Kokkos::parallel_for( loop, f_cas );
|
||||
Kokkos::parallel_for(loop, f_cas);
|
||||
execution_space().fence();
|
||||
|
||||
Kokkos::deep_copy( h_data, data );
|
||||
Kokkos::deep_copy(h_data, data);
|
||||
T val = h_data();
|
||||
|
||||
struct CASFunctorReduce< T, execution_space > f_cas_red;
|
||||
struct CASFunctorReduce<T, execution_space> f_cas_red;
|
||||
f_cas_red.data = data;
|
||||
int dummy_result;
|
||||
Kokkos::parallel_reduce( loop, f_cas_red , dummy_result );
|
||||
Kokkos::parallel_reduce(loop, f_cas_red, dummy_result);
|
||||
execution_space().fence();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template< class T >
|
||||
T CASLoopSerial( int loop ) {
|
||||
template <class T>
|
||||
T CASLoopSerial(int loop) {
|
||||
T* data = new T[1];
|
||||
data[0] = 0;
|
||||
|
||||
for ( int i = 0; i < loop; i++ ) {
|
||||
for (int i = 0; i < loop; i++) {
|
||||
T assumed;
|
||||
T newval;
|
||||
T old;
|
||||
|
||||
do {
|
||||
assumed = *data;
|
||||
newval = assumed + (T) 1;
|
||||
old = *data;
|
||||
*data = newval;
|
||||
} while( !( assumed == old ) );
|
||||
newval = assumed + (T)1;
|
||||
old = *data;
|
||||
*data = newval;
|
||||
} while (!(assumed == old));
|
||||
}
|
||||
|
||||
T val = *data;
|
||||
delete [] data;
|
||||
delete[] data;
|
||||
|
||||
return val;
|
||||
}
|
||||
@ -345,209 +339,228 @@ T CASLoopSerial( int loop ) {
|
||||
//--------------atomic_exchange-----------------
|
||||
//----------------------------------------------
|
||||
|
||||
template< class T, class DEVICE_TYPE >
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct ExchFunctor {
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View< T, execution_space > type;
|
||||
typedef Kokkos::View<T, execution_space> type;
|
||||
|
||||
type data, data2;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int i ) const {
|
||||
T old = Kokkos::atomic_exchange( &data(), (T) i );
|
||||
Kokkos::atomic_fetch_add( &data2(), old );
|
||||
void operator()(int i) const {
|
||||
T old = Kokkos::atomic_exchange(&data(), (T)i);
|
||||
Kokkos::atomic_fetch_add(&data2(), old);
|
||||
}
|
||||
};
|
||||
|
||||
template< class T, class DEVICE_TYPE >
|
||||
template <class T, class DEVICE_TYPE>
|
||||
struct ExchFunctorReduce {
|
||||
typedef DEVICE_TYPE execution_space;
|
||||
typedef Kokkos::View< T, execution_space > type;
|
||||
typedef Kokkos::View<T, execution_space> type;
|
||||
|
||||
type data, data2;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int i , int& ) const {
|
||||
T old = Kokkos::atomic_exchange( &data(), (T) i );
|
||||
Kokkos::atomic_fetch_add( &data2(), old );
|
||||
void operator()(int i, int&) const {
|
||||
T old = Kokkos::atomic_exchange(&data(), (T)i);
|
||||
Kokkos::atomic_fetch_add(&data2(), old);
|
||||
}
|
||||
};
|
||||
|
||||
template< class T, class execution_space >
|
||||
T ExchLoop( int loop ) {
|
||||
struct ZeroFunctor< T, execution_space > f_zero;
|
||||
typename ZeroFunctor< T, execution_space >::type data( "Data" );
|
||||
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
|
||||
template <class T, class execution_space>
|
||||
T ExchLoop(int loop) {
|
||||
struct ZeroFunctor<T, execution_space> f_zero;
|
||||
typename ZeroFunctor<T, execution_space>::type data("Data");
|
||||
typename ZeroFunctor<T, execution_space>::h_type h_data("HData");
|
||||
|
||||
f_zero.data = data;
|
||||
Kokkos::parallel_for( 1, f_zero );
|
||||
Kokkos::parallel_for(1, f_zero);
|
||||
execution_space().fence();
|
||||
|
||||
typename ZeroFunctor< T, execution_space >::type data2( "Data" );
|
||||
typename ZeroFunctor< T, execution_space >::h_type h_data2( "HData" );
|
||||
typename ZeroFunctor<T, execution_space>::type data2("Data");
|
||||
typename ZeroFunctor<T, execution_space>::h_type h_data2("HData");
|
||||
|
||||
f_zero.data = data2;
|
||||
Kokkos::parallel_for( 1, f_zero );
|
||||
Kokkos::parallel_for(1, f_zero);
|
||||
execution_space().fence();
|
||||
|
||||
struct ExchFunctor< T, execution_space > f_exch;
|
||||
f_exch.data = data;
|
||||
struct ExchFunctor<T, execution_space> f_exch;
|
||||
f_exch.data = data;
|
||||
f_exch.data2 = data2;
|
||||
Kokkos::parallel_for( loop, f_exch );
|
||||
Kokkos::parallel_for(loop, f_exch);
|
||||
execution_space().fence();
|
||||
|
||||
Kokkos::deep_copy( h_data, data );
|
||||
Kokkos::deep_copy( h_data2, data2 );
|
||||
Kokkos::deep_copy(h_data, data);
|
||||
Kokkos::deep_copy(h_data2, data2);
|
||||
T val = h_data() + h_data2();
|
||||
|
||||
struct ExchFunctorReduce< T, execution_space > f_exch_red;
|
||||
f_exch_red.data = data;
|
||||
struct ExchFunctorReduce<T, execution_space> f_exch_red;
|
||||
f_exch_red.data = data;
|
||||
f_exch_red.data2 = data2;
|
||||
int dummy_result;
|
||||
Kokkos::parallel_reduce( loop, f_exch_red , dummy_result );
|
||||
Kokkos::parallel_reduce(loop, f_exch_red, dummy_result);
|
||||
execution_space().fence();
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template< class T >
|
||||
T ExchLoopSerial( typename std::conditional< !std::is_same< T, Kokkos::complex<double> >::value, int, void >::type loop ) {
|
||||
T* data = new T[1];
|
||||
template <class T>
|
||||
T ExchLoopSerial(
|
||||
typename std::conditional<!std::is_same<T, Kokkos::complex<double> >::value,
|
||||
int, void>::type loop) {
|
||||
T* data = new T[1];
|
||||
T* data2 = new T[1];
|
||||
data[0] = 0;
|
||||
data[0] = 0;
|
||||
data2[0] = 0;
|
||||
|
||||
for ( int i = 0; i < loop; i++ ) {
|
||||
for (int i = 0; i < loop; i++) {
|
||||
T old = *data;
|
||||
*data = (T) i;
|
||||
*data = (T)i;
|
||||
*data2 += old;
|
||||
}
|
||||
|
||||
T val = *data2 + *data;
|
||||
delete [] data;
|
||||
delete [] data2;
|
||||
delete[] data;
|
||||
delete[] data2;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template< class T >
|
||||
T ExchLoopSerial( typename std::conditional< std::is_same< T, Kokkos::complex<double> >::value, int, void >::type loop ) {
|
||||
T* data = new T[1];
|
||||
template <class T>
|
||||
T ExchLoopSerial(
|
||||
typename std::conditional<std::is_same<T, Kokkos::complex<double> >::value,
|
||||
int, void>::type loop) {
|
||||
T* data = new T[1];
|
||||
T* data2 = new T[1];
|
||||
data[0] = 0;
|
||||
data[0] = 0;
|
||||
data2[0] = 0;
|
||||
|
||||
for ( int i = 0; i < loop; i++ ) {
|
||||
T old = *data;
|
||||
data->real() = ( static_cast<double>( i ) );
|
||||
for (int i = 0; i < loop; i++) {
|
||||
T old = *data;
|
||||
data->real() = (static_cast<double>(i));
|
||||
data->imag() = 0;
|
||||
*data2 += old;
|
||||
}
|
||||
|
||||
T val = *data2 + *data;
|
||||
delete [] data;
|
||||
delete [] data2;
|
||||
delete[] data;
|
||||
delete[] data2;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
template< class T, class DeviceType >
|
||||
T LoopVariant( int loop, int test ) {
|
||||
switch ( test ) {
|
||||
case 1: return AddLoop< T, DeviceType >( loop );
|
||||
case 2: return CASLoop< T, DeviceType >( loop );
|
||||
case 3: return ExchLoop< T, DeviceType >( loop );
|
||||
template <class T, class DeviceType>
|
||||
T LoopVariant(int loop, int test) {
|
||||
switch (test) {
|
||||
case 1: return AddLoop<T, DeviceType>(loop);
|
||||
case 2: return CASLoop<T, DeviceType>(loop);
|
||||
case 3: return ExchLoop<T, DeviceType>(loop);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template< class T >
|
||||
T LoopVariantSerial( int loop, int test ) {
|
||||
switch ( test ) {
|
||||
case 1: return AddLoopSerial< T >( loop );
|
||||
case 2: return CASLoopSerial< T >( loop );
|
||||
case 3: return ExchLoopSerial< T >( loop );
|
||||
template <class T>
|
||||
T LoopVariantSerial(int loop, int test) {
|
||||
switch (test) {
|
||||
case 1: return AddLoopSerial<T>(loop);
|
||||
case 2: return CASLoopSerial<T>(loop);
|
||||
case 3: return ExchLoopSerial<T>(loop);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template< class T, class DeviceType >
|
||||
bool Loop( int loop, int test )
|
||||
{
|
||||
T res = LoopVariant< T, DeviceType >( loop, test );
|
||||
T resSerial = LoopVariantSerial< T >( loop, test );
|
||||
template <class T, class DeviceType>
|
||||
bool Loop(int loop, int test) {
|
||||
T res = LoopVariant<T, DeviceType>(loop, test);
|
||||
T resSerial = LoopVariantSerial<T>(loop, test);
|
||||
|
||||
bool passed = true;
|
||||
|
||||
if ( resSerial != res ) {
|
||||
if (resSerial != res) {
|
||||
passed = false;
|
||||
|
||||
std::cout << "Loop<"
|
||||
<< typeid( T ).name()
|
||||
<< ">( test = "
|
||||
<< test << " FAILED : "
|
||||
<< resSerial << " != " << res
|
||||
<< std::endl;
|
||||
std::cout << "Loop<" << typeid(T).name() << ">( test = " << test
|
||||
<< " FAILED : " << resSerial << " != " << res << std::endl;
|
||||
}
|
||||
|
||||
return passed;
|
||||
}
|
||||
|
||||
} // namespace TestAtomic
|
||||
} // namespace TestAtomic
|
||||
|
||||
namespace Test {
|
||||
|
||||
TEST_F( TEST_CATEGORY, atomics )
|
||||
{
|
||||
TEST(TEST_CATEGORY, atomics) {
|
||||
const int loop_count = 1e4;
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
||||
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
||||
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
||||
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
||||
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 3)));
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 3 ) ) );
|
||||
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 3)));
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 3 ) ) );
|
||||
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 3)));
|
||||
|
||||
#ifndef KOKKOS_ENABLE_OPENMPTARGET
|
||||
#ifndef KOKKOS_ENABLE_ROCM // ROCM doesn't yet support atomics for >64bit types
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 3 ) ) );
|
||||
#ifndef KOKKOS_ENABLE_ROCM // ROCM doesn't yet support atomics for >64bit types
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 3)));
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 3 ) ) );
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 3)));
|
||||
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 1 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 2 ) ) );
|
||||
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 3 ) ) );
|
||||
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 1)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 2)));
|
||||
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 3)));
|
||||
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 3)));
|
||||
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 1)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 2)));
|
||||
ASSERT_TRUE(
|
||||
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 3)));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
} // namespace Test
|
||||
|
||||
} // namespace Test
|
||||
|
||||
Reference in New Issue
Block a user