Update Kokkos library in LAMMPS to v3.0

This commit is contained in:
Stan Moore
2020-03-25 14:08:39 -06:00
parent 0252d8c210
commit 60864e38d1
2169 changed files with 121406 additions and 126492 deletions

View File

@ -2,10 +2,11 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
@ -23,10 +24,10 @@
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@ -47,201 +48,194 @@ namespace TestAtomic {
// Struct for testing arbitrary size atomics.
template< int N >
template <int N>
struct SuperScalar {
double val[N];
KOKKOS_INLINE_FUNCTION
SuperScalar() {
for ( int i = 0; i < N; i++ ) {
for (int i = 0; i < N; i++) {
val[i] = 0.0;
}
}
KOKKOS_INLINE_FUNCTION
SuperScalar( const SuperScalar & src ) {
for ( int i = 0; i < N; i++ ) {
SuperScalar(const SuperScalar& src) {
for (int i = 0; i < N; i++) {
val[i] = src.val[i];
}
}
KOKKOS_INLINE_FUNCTION
SuperScalar( const volatile SuperScalar & src ) {
for ( int i = 0; i < N; i++ ) {
SuperScalar(const volatile SuperScalar& src) {
for (int i = 0; i < N; i++) {
val[i] = src.val[i];
}
}
KOKKOS_INLINE_FUNCTION
SuperScalar& operator=( const SuperScalar & src ) {
for ( int i = 0; i < N; i++ ) {
SuperScalar& operator=(const SuperScalar& src) {
for (int i = 0; i < N; i++) {
val[i] = src.val[i];
}
return *this;
}
KOKKOS_INLINE_FUNCTION
SuperScalar& operator=( const volatile SuperScalar & src ) {
for ( int i = 0; i < N; i++ ) {
SuperScalar& operator=(const volatile SuperScalar& src) {
for (int i = 0; i < N; i++) {
val[i] = src.val[i];
}
return *this;
}
KOKKOS_INLINE_FUNCTION
void operator=( const SuperScalar & src ) volatile {
for ( int i = 0; i < N; i++ ) {
void operator=(const SuperScalar& src) volatile {
for (int i = 0; i < N; i++) {
val[i] = src.val[i];
}
}
KOKKOS_INLINE_FUNCTION
SuperScalar operator+( const SuperScalar & src ) {
SuperScalar operator+(const SuperScalar& src) {
SuperScalar tmp = *this;
for ( int i = 0; i < N; i++ ) {
for (int i = 0; i < N; i++) {
tmp.val[i] += src.val[i];
}
return tmp;
}
KOKKOS_INLINE_FUNCTION
SuperScalar& operator+=( const double & src ) {
for ( int i = 0; i < N; i++ ) {
val[i] += 1.0 * ( i + 1 ) * src;
SuperScalar& operator+=(const double& src) {
for (int i = 0; i < N; i++) {
val[i] += 1.0 * (i + 1) * src;
}
return *this;
}
KOKKOS_INLINE_FUNCTION
SuperScalar& operator+=( const SuperScalar & src ) {
for ( int i = 0; i < N; i++ ) {
SuperScalar& operator+=(const SuperScalar& src) {
for (int i = 0; i < N; i++) {
val[i] += src.val[i];
}
return *this;
}
KOKKOS_INLINE_FUNCTION
bool operator==( const SuperScalar & src ) {
bool operator==(const SuperScalar& src) {
bool compare = true;
for( int i = 0; i < N; i++ ) {
compare = compare && ( val[i] == src.val[i] );
for (int i = 0; i < N; i++) {
compare = compare && (val[i] == src.val[i]);
}
return compare;
}
KOKKOS_INLINE_FUNCTION
bool operator!=( const SuperScalar & src ) {
bool operator!=(const SuperScalar& src) {
bool compare = true;
for ( int i = 0; i < N; i++ ) {
compare = compare && ( val[i] == src.val[i] );
for (int i = 0; i < N; i++) {
compare = compare && (val[i] == src.val[i]);
}
return !compare;
}
KOKKOS_INLINE_FUNCTION
SuperScalar( const double & src ) {
for ( int i = 0; i < N; i++ ) {
val[i] = 1.0 * ( i + 1 ) * src;
SuperScalar(const double& src) {
for (int i = 0; i < N; i++) {
val[i] = 1.0 * (i + 1) * src;
}
}
};
template< int N >
std::ostream & operator<<( std::ostream & os, const SuperScalar< N > & dt )
{
template <int N>
std::ostream& operator<<(std::ostream& os, const SuperScalar<N>& dt) {
os << "{ ";
for ( int i = 0; i < N - 1; i++ ) {
os << dt.val[i] << ", ";
for (int i = 0; i < N - 1; i++) {
os << dt.val[i] << ", ";
}
os << dt.val[N-1] << "}";
os << dt.val[N - 1] << "}";
return os;
}
template< class T, class DEVICE_TYPE >
template <class T, class DEVICE_TYPE>
struct ZeroFunctor {
typedef DEVICE_TYPE execution_space;
typedef typename Kokkos::View< T, execution_space > type;
typedef typename Kokkos::View< T, execution_space >::HostMirror h_type;
typedef typename Kokkos::View<T, execution_space> type;
typedef typename Kokkos::View<T, execution_space>::HostMirror h_type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()( int ) const {
data() = 0;
}
void operator()(int) const { data() = 0; }
};
//---------------------------------------------------
//--------------atomic_fetch_add---------------------
//---------------------------------------------------
template< class T, class DEVICE_TYPE >
template <class T, class DEVICE_TYPE>
struct AddFunctor {
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View< T, execution_space > type;
typedef Kokkos::View<T, execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()( int ) const {
Kokkos::atomic_fetch_add( &data(), (T) 1 );
}
void operator()(int) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
};
template< class T, class DEVICE_TYPE >
template <class T, class DEVICE_TYPE>
struct AddFunctorReduce {
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View< T, execution_space > type;
typedef Kokkos::View<T, execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()( int , int& ) const {
Kokkos::atomic_fetch_add( &data(), (T) 1 );
}
void operator()(int, int&) const { Kokkos::atomic_fetch_add(&data(), (T)1); }
};
template< class T, class execution_space >
T AddLoop( int loop ) {
struct ZeroFunctor< T, execution_space > f_zero;
typename ZeroFunctor< T, execution_space >::type data( "Data" );
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
template <class T, class execution_space>
T AddLoop(int loop) {
struct ZeroFunctor<T, execution_space> f_zero;
typename ZeroFunctor<T, execution_space>::type data("Data");
typename ZeroFunctor<T, execution_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for( 1, f_zero );
Kokkos::parallel_for(1, f_zero);
execution_space().fence();
struct AddFunctor< T, execution_space > f_add;
struct AddFunctor<T, execution_space> f_add;
f_add.data = data;
Kokkos::parallel_for( loop, f_add );
Kokkos::parallel_for(loop, f_add);
execution_space().fence();
Kokkos::deep_copy( h_data, data );
Kokkos::deep_copy(h_data, data);
T val = h_data();
struct AddFunctorReduce< T, execution_space > f_add_red;
struct AddFunctorReduce<T, execution_space> f_add_red;
f_add_red.data = data;
int dummy_result;
Kokkos::parallel_reduce( loop, f_add_red , dummy_result );
Kokkos::parallel_reduce(loop, f_add_red, dummy_result);
execution_space().fence();
return val;
}
template< class T >
T AddLoopSerial( int loop ) {
template <class T>
T AddLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for ( int i = 0; i < loop; i++ ) {
*data += (T) 1;
for (int i = 0; i < loop; i++) {
*data += (T)1;
}
T val = *data;
delete [] data;
delete[] data;
return val;
}
@ -250,93 +244,93 @@ T AddLoopSerial( int loop ) {
//--------------atomic_compare_exchange-----------------
//------------------------------------------------------
template< class T, class DEVICE_TYPE >
template <class T, class DEVICE_TYPE>
struct CASFunctor {
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View< T, execution_space > type;
typedef Kokkos::View<T, execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()( int ) const {
void operator()(int) const {
T old = data();
T newval, assumed;
do {
assumed = old;
newval = assumed + (T) 1;
old = Kokkos::atomic_compare_exchange( &data(), assumed, newval );
} while( old != assumed );
newval = assumed + (T)1;
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
} while (old != assumed);
}
};
template< class T, class DEVICE_TYPE >
template <class T, class DEVICE_TYPE>
struct CASFunctorReduce {
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View< T, execution_space > type;
typedef Kokkos::View<T, execution_space> type;
type data;
KOKKOS_INLINE_FUNCTION
void operator()( int , int& ) const {
void operator()(int, int&) const {
T old = data();
T newval, assumed;
do {
assumed = old;
newval = assumed + (T) 1;
old = Kokkos::atomic_compare_exchange( &data(), assumed, newval );
} while( old != assumed );
newval = assumed + (T)1;
old = Kokkos::atomic_compare_exchange(&data(), assumed, newval);
} while (old != assumed);
}
};
template< class T, class execution_space >
T CASLoop( int loop ) {
struct ZeroFunctor< T, execution_space > f_zero;
typename ZeroFunctor< T, execution_space >::type data( "Data" );
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
template <class T, class execution_space>
T CASLoop(int loop) {
struct ZeroFunctor<T, execution_space> f_zero;
typename ZeroFunctor<T, execution_space>::type data("Data");
typename ZeroFunctor<T, execution_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for( 1, f_zero );
Kokkos::parallel_for(1, f_zero);
execution_space().fence();
struct CASFunctor< T, execution_space > f_cas;
struct CASFunctor<T, execution_space> f_cas;
f_cas.data = data;
Kokkos::parallel_for( loop, f_cas );
Kokkos::parallel_for(loop, f_cas);
execution_space().fence();
Kokkos::deep_copy( h_data, data );
Kokkos::deep_copy(h_data, data);
T val = h_data();
struct CASFunctorReduce< T, execution_space > f_cas_red;
struct CASFunctorReduce<T, execution_space> f_cas_red;
f_cas_red.data = data;
int dummy_result;
Kokkos::parallel_reduce( loop, f_cas_red , dummy_result );
Kokkos::parallel_reduce(loop, f_cas_red, dummy_result);
execution_space().fence();
return val;
}
template< class T >
T CASLoopSerial( int loop ) {
template <class T>
T CASLoopSerial(int loop) {
T* data = new T[1];
data[0] = 0;
for ( int i = 0; i < loop; i++ ) {
for (int i = 0; i < loop; i++) {
T assumed;
T newval;
T old;
do {
assumed = *data;
newval = assumed + (T) 1;
old = *data;
*data = newval;
} while( !( assumed == old ) );
newval = assumed + (T)1;
old = *data;
*data = newval;
} while (!(assumed == old));
}
T val = *data;
delete [] data;
delete[] data;
return val;
}
@ -345,209 +339,228 @@ T CASLoopSerial( int loop ) {
//--------------atomic_exchange-----------------
//----------------------------------------------
template< class T, class DEVICE_TYPE >
template <class T, class DEVICE_TYPE>
struct ExchFunctor {
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View< T, execution_space > type;
typedef Kokkos::View<T, execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()( int i ) const {
T old = Kokkos::atomic_exchange( &data(), (T) i );
Kokkos::atomic_fetch_add( &data2(), old );
void operator()(int i) const {
T old = Kokkos::atomic_exchange(&data(), (T)i);
Kokkos::atomic_fetch_add(&data2(), old);
}
};
template< class T, class DEVICE_TYPE >
template <class T, class DEVICE_TYPE>
struct ExchFunctorReduce {
typedef DEVICE_TYPE execution_space;
typedef Kokkos::View< T, execution_space > type;
typedef Kokkos::View<T, execution_space> type;
type data, data2;
KOKKOS_INLINE_FUNCTION
void operator()( int i , int& ) const {
T old = Kokkos::atomic_exchange( &data(), (T) i );
Kokkos::atomic_fetch_add( &data2(), old );
void operator()(int i, int&) const {
T old = Kokkos::atomic_exchange(&data(), (T)i);
Kokkos::atomic_fetch_add(&data2(), old);
}
};
template< class T, class execution_space >
T ExchLoop( int loop ) {
struct ZeroFunctor< T, execution_space > f_zero;
typename ZeroFunctor< T, execution_space >::type data( "Data" );
typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" );
template <class T, class execution_space>
T ExchLoop(int loop) {
struct ZeroFunctor<T, execution_space> f_zero;
typename ZeroFunctor<T, execution_space>::type data("Data");
typename ZeroFunctor<T, execution_space>::h_type h_data("HData");
f_zero.data = data;
Kokkos::parallel_for( 1, f_zero );
Kokkos::parallel_for(1, f_zero);
execution_space().fence();
typename ZeroFunctor< T, execution_space >::type data2( "Data" );
typename ZeroFunctor< T, execution_space >::h_type h_data2( "HData" );
typename ZeroFunctor<T, execution_space>::type data2("Data");
typename ZeroFunctor<T, execution_space>::h_type h_data2("HData");
f_zero.data = data2;
Kokkos::parallel_for( 1, f_zero );
Kokkos::parallel_for(1, f_zero);
execution_space().fence();
struct ExchFunctor< T, execution_space > f_exch;
f_exch.data = data;
struct ExchFunctor<T, execution_space> f_exch;
f_exch.data = data;
f_exch.data2 = data2;
Kokkos::parallel_for( loop, f_exch );
Kokkos::parallel_for(loop, f_exch);
execution_space().fence();
Kokkos::deep_copy( h_data, data );
Kokkos::deep_copy( h_data2, data2 );
Kokkos::deep_copy(h_data, data);
Kokkos::deep_copy(h_data2, data2);
T val = h_data() + h_data2();
struct ExchFunctorReduce< T, execution_space > f_exch_red;
f_exch_red.data = data;
struct ExchFunctorReduce<T, execution_space> f_exch_red;
f_exch_red.data = data;
f_exch_red.data2 = data2;
int dummy_result;
Kokkos::parallel_reduce( loop, f_exch_red , dummy_result );
Kokkos::parallel_reduce(loop, f_exch_red, dummy_result);
execution_space().fence();
return val;
}
template< class T >
T ExchLoopSerial( typename std::conditional< !std::is_same< T, Kokkos::complex<double> >::value, int, void >::type loop ) {
T* data = new T[1];
template <class T>
T ExchLoopSerial(
typename std::conditional<!std::is_same<T, Kokkos::complex<double> >::value,
int, void>::type loop) {
T* data = new T[1];
T* data2 = new T[1];
data[0] = 0;
data[0] = 0;
data2[0] = 0;
for ( int i = 0; i < loop; i++ ) {
for (int i = 0; i < loop; i++) {
T old = *data;
*data = (T) i;
*data = (T)i;
*data2 += old;
}
T val = *data2 + *data;
delete [] data;
delete [] data2;
delete[] data;
delete[] data2;
return val;
}
template< class T >
T ExchLoopSerial( typename std::conditional< std::is_same< T, Kokkos::complex<double> >::value, int, void >::type loop ) {
T* data = new T[1];
template <class T>
T ExchLoopSerial(
typename std::conditional<std::is_same<T, Kokkos::complex<double> >::value,
int, void>::type loop) {
T* data = new T[1];
T* data2 = new T[1];
data[0] = 0;
data[0] = 0;
data2[0] = 0;
for ( int i = 0; i < loop; i++ ) {
T old = *data;
data->real() = ( static_cast<double>( i ) );
for (int i = 0; i < loop; i++) {
T old = *data;
data->real() = (static_cast<double>(i));
data->imag() = 0;
*data2 += old;
}
T val = *data2 + *data;
delete [] data;
delete [] data2;
delete[] data;
delete[] data2;
return val;
}
template< class T, class DeviceType >
T LoopVariant( int loop, int test ) {
switch ( test ) {
case 1: return AddLoop< T, DeviceType >( loop );
case 2: return CASLoop< T, DeviceType >( loop );
case 3: return ExchLoop< T, DeviceType >( loop );
template <class T, class DeviceType>
T LoopVariant(int loop, int test) {
switch (test) {
case 1: return AddLoop<T, DeviceType>(loop);
case 2: return CASLoop<T, DeviceType>(loop);
case 3: return ExchLoop<T, DeviceType>(loop);
}
return 0;
}
template< class T >
T LoopVariantSerial( int loop, int test ) {
switch ( test ) {
case 1: return AddLoopSerial< T >( loop );
case 2: return CASLoopSerial< T >( loop );
case 3: return ExchLoopSerial< T >( loop );
template <class T>
T LoopVariantSerial(int loop, int test) {
switch (test) {
case 1: return AddLoopSerial<T>(loop);
case 2: return CASLoopSerial<T>(loop);
case 3: return ExchLoopSerial<T>(loop);
}
return 0;
}
template< class T, class DeviceType >
bool Loop( int loop, int test )
{
T res = LoopVariant< T, DeviceType >( loop, test );
T resSerial = LoopVariantSerial< T >( loop, test );
template <class T, class DeviceType>
bool Loop(int loop, int test) {
T res = LoopVariant<T, DeviceType>(loop, test);
T resSerial = LoopVariantSerial<T>(loop, test);
bool passed = true;
if ( resSerial != res ) {
if (resSerial != res) {
passed = false;
std::cout << "Loop<"
<< typeid( T ).name()
<< ">( test = "
<< test << " FAILED : "
<< resSerial << " != " << res
<< std::endl;
std::cout << "Loop<" << typeid(T).name() << ">( test = " << test
<< " FAILED : " << resSerial << " != " << res << std::endl;
}
return passed;
}
} // namespace TestAtomic
} // namespace TestAtomic
namespace Test {
TEST_F( TEST_CATEGORY, atomics )
{
TEST(TEST_CATEGORY, atomics) {
const int loop_count = 1e4;
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 3 ) ) );
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<int, TEST_EXECSPACE>(loop_count, 3)));
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 3 ) ) );
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<unsigned int, TEST_EXECSPACE>(loop_count, 3)));
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<long int, TEST_EXECSPACE>(loop_count, 3)));
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
ASSERT_TRUE(
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<unsigned long int, TEST_EXECSPACE>(loop_count, 3)));
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 3 ) ) );
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<long long int, TEST_EXECSPACE>(loop_count, 3)));
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 3 ) ) );
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 1)));
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 2)));
ASSERT_TRUE((TestAtomic::Loop<double, TEST_EXECSPACE>(loop_count, 3)));
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 3 ) ) );
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 1)));
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 2)));
ASSERT_TRUE((TestAtomic::Loop<float, TEST_EXECSPACE>(100, 3)));
#ifndef KOKKOS_ENABLE_OPENMPTARGET
#ifndef KOKKOS_ENABLE_ROCM // ROCM doesn't yet support atomics for >64bit types
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 1, 3 ) ) );
#ifndef KOKKOS_ENABLE_ROCM // ROCM doesn't yet support atomics for >64bit types
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(1, 3)));
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 3 ) ) );
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<double>, TEST_EXECSPACE>(100, 3)));
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 1 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 2 ) ) );
ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 3 ) ) );
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 1)));
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 2)));
ASSERT_TRUE((TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(1, 3)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<Kokkos::complex<float>, TEST_EXECSPACE>(100, 3)));
ASSERT_TRUE(
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 1)));
ASSERT_TRUE(
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 2)));
ASSERT_TRUE(
(TestAtomic::Loop<TestAtomic::SuperScalar<4>, TEST_EXECSPACE>(100, 3)));
#endif
#endif
}
} // namespace Test
} // namespace Test