Update Kokkos library in LAMMPS to v3.0

This commit is contained in:
Stan Moore
2020-03-25 14:08:39 -06:00
parent 0252d8c210
commit 60864e38d1
2169 changed files with 121406 additions and 126492 deletions

View File

@ -1,13 +1,14 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
//
// Kokkos v. 3.0
// Copyright (2020) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -23,10 +24,10 @@
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@ -36,7 +37,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
@ -55,131 +56,105 @@ namespace Test {
// Reduction : result = dot( Q(:,j) , Q(:,j) );
// PostProcess : R(j,j) = result ; inv = 1 / result ;
template< class VectorView , class ValueView >
struct InvNorm2 : public Kokkos::DotSingle< VectorView > {
template <class VectorView, class ValueView>
struct InvNorm2 : public Kokkos::DotSingle<VectorView> {
typedef typename Kokkos::DotSingle<VectorView>::value_type value_type;
typedef typename Kokkos::DotSingle< VectorView >::value_type value_type ;
ValueView Rjj;
ValueView inv;
ValueView Rjj ;
ValueView inv ;
InvNorm2( const VectorView & argX ,
const ValueView & argR ,
const ValueView & argInv )
: Kokkos::DotSingle< VectorView >( argX )
, Rjj( argR )
, inv( argInv )
{}
InvNorm2(const VectorView& argX, const ValueView& argR,
const ValueView& argInv)
: Kokkos::DotSingle<VectorView>(argX), Rjj(argR), inv(argInv) {}
KOKKOS_INLINE_FUNCTION
void final( value_type & result ) const
{
result = std::sqrt( result );
Rjj() = result ;
inv() = ( 0 < result ) ? 1.0 / result : 0 ;
void final(value_type& result) const {
result = std::sqrt(result);
Rjj() = result;
inv() = (0 < result) ? 1.0 / result : 0;
}
};
template< class VectorView , class ValueView >
inline
void invnorm2( const VectorView & x ,
const ValueView & r ,
const ValueView & r_inv )
{
Kokkos::parallel_reduce( x.extent(0) , InvNorm2< VectorView , ValueView >( x , r , r_inv ) );
template <class VectorView, class ValueView>
inline void invnorm2(const VectorView& x, const ValueView& r,
const ValueView& r_inv) {
Kokkos::parallel_reduce(x.extent(0),
InvNorm2<VectorView, ValueView>(x, r, r_inv));
}
// PostProcess : tmp = - ( R(j,k) = result );
template< class VectorView , class ValueView >
struct DotM : public Kokkos::Dot< VectorView > {
template <class VectorView, class ValueView>
struct DotM : public Kokkos::Dot<VectorView> {
typedef typename Kokkos::Dot<VectorView>::value_type value_type;
typedef typename Kokkos::Dot< VectorView >::value_type value_type ;
ValueView Rjk;
ValueView tmp;
ValueView Rjk ;
ValueView tmp ;
DotM( const VectorView & argX ,
const VectorView & argY ,
const ValueView & argR ,
const ValueView & argTmp )
: Kokkos::Dot< VectorView >( argX , argY )
, Rjk( argR )
, tmp( argTmp )
{}
DotM(const VectorView& argX, const VectorView& argY, const ValueView& argR,
const ValueView& argTmp)
: Kokkos::Dot<VectorView>(argX, argY), Rjk(argR), tmp(argTmp) {}
KOKKOS_INLINE_FUNCTION
void final( value_type & result ) const
{
Rjk() = result ;
tmp() = - result ;
void final(value_type& result) const {
Rjk() = result;
tmp() = -result;
}
};
template< class VectorView , class ValueView >
inline
void dot_neg( const VectorView & x ,
const VectorView & y ,
const ValueView & r ,
const ValueView & r_neg )
{
Kokkos::parallel_reduce( x.extent(0) , DotM< VectorView , ValueView >( x , y , r , r_neg ) );
template <class VectorView, class ValueView>
inline void dot_neg(const VectorView& x, const VectorView& y,
const ValueView& r, const ValueView& r_neg) {
Kokkos::parallel_reduce(x.extent(0),
DotM<VectorView, ValueView>(x, y, r, r_neg));
}
template <typename Scalar, class DeviceType>
struct ModifiedGramSchmidt {
typedef DeviceType execution_space;
typedef typename execution_space::size_type size_type;
template< typename Scalar , class DeviceType >
struct ModifiedGramSchmidt
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
typedef Kokkos::View<Scalar**, Kokkos::LayoutLeft, execution_space>
multivector_type;
typedef Kokkos::View< Scalar** ,
Kokkos::LayoutLeft ,
execution_space > multivector_type ;
typedef Kokkos::View<Scalar*, Kokkos::LayoutLeft, execution_space>
vector_type;
typedef Kokkos::View< Scalar* ,
Kokkos::LayoutLeft ,
execution_space > vector_type ;
typedef Kokkos::View<Scalar, Kokkos::LayoutLeft, execution_space> value_view;
typedef Kokkos::View< Scalar ,
Kokkos::LayoutLeft ,
execution_space > value_view ;
multivector_type Q;
multivector_type R;
multivector_type Q ;
multivector_type R ;
static double factorization( const multivector_type Q_ ,
const multivector_type R_ )
{
const size_type count = Q_.extent(1);
static double factorization(const multivector_type Q_,
const multivector_type R_) {
const size_type count = Q_.extent(1);
value_view tmp("tmp");
value_view one("one");
Kokkos::deep_copy( one , (Scalar) 1 );
Kokkos::deep_copy(one, (Scalar)1);
Kokkos::Timer timer ;
Kokkos::Timer timer;
for ( size_type j = 0 ; j < count ; ++j ) {
for (size_type j = 0; j < count; ++j) {
// Reduction : tmp = dot( Q(:,j) , Q(:,j) );
// PostProcess : tmp = std::sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ;
const vector_type Qj = Kokkos::subview( Q_ , Kokkos::ALL() , j );
const value_view Rjj = Kokkos::subview( R_ , j , j );
const vector_type Qj = Kokkos::subview(Q_, Kokkos::ALL(), j);
const value_view Rjj = Kokkos::subview(R_, j, j);
invnorm2( Qj , Rjj , tmp );
invnorm2(Qj, Rjj, tmp);
// Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ;
Kokkos::scale( tmp , Qj );
Kokkos::scale(tmp, Qj);
for ( size_t k = j + 1 ; k < count ; ++k ) {
const vector_type Qk = Kokkos::subview( Q_ , Kokkos::ALL() , k );
const value_view Rjk = Kokkos::subview( R_ , j , k );
for (size_t k = j + 1; k < count; ++k) {
const vector_type Qk = Kokkos::subview(Q_, Kokkos::ALL(), k);
const value_view Rjk = Kokkos::subview(R_, j, k);
// Reduction : R(j,k) = dot( Q(:,j) , Q(:,k) );
// PostProcess : tmp = - R(j,k);
dot_neg( Qj , Qk , Rjk , tmp );
dot_neg(Qj, Qk, Rjk, tmp);
// Q(:,k) -= R(j,k) * Q(:,j); => Q(:,k) += tmp * Q(:,j)
Kokkos::axpby( tmp , Qj , one , Qk );
Kokkos::axpby(tmp, Qj, one, Qk);
}
}
@ -190,94 +165,87 @@ struct ModifiedGramSchmidt
//--------------------------------------------------------------------------
static double test( const size_t length ,
const size_t count ,
const size_t iter = 1 )
{
multivector_type Q_( "Q" , length , count );
multivector_type R_( "R" , count , count );
static double test(const size_t length, const size_t count,
const size_t iter = 1) {
multivector_type Q_("Q", length, count);
multivector_type R_("R", count, count);
typename multivector_type::HostMirror A =
Kokkos::create_mirror( Q_ );
typename multivector_type::HostMirror A = Kokkos::create_mirror(Q_);
// Create and fill A on the host
for ( size_type j = 0 ; j < count ; ++j ) {
for ( size_type i = 0 ; i < length ; ++i ) {
A(i,j) = ( i + 1 ) * ( j + 1 );
for (size_type j = 0; j < count; ++j) {
for (size_type i = 0; i < length; ++i) {
A(i, j) = (i + 1) * (j + 1);
}
}
double dt_min = 0 ;
double dt_min = 0;
for ( size_t i = 0 ; i < iter ; ++i ) {
Kokkos::deep_copy( Q_ , A );
for (size_t i = 0; i < iter; ++i) {
Kokkos::deep_copy(Q_, A);
// A = Q * R
const double dt = factorization( Q_ , R_ );
const double dt = factorization(Q_, R_);
if ( 0 == i ) dt_min = dt ;
else dt_min = dt < dt_min ? dt : dt_min ;
if (0 == i)
dt_min = dt;
else
dt_min = dt < dt_min ? dt : dt_min;
}
return dt_min ;
return dt_min;
}
};
template< class DeviceType >
void run_test_gramschmidt( int exp_beg , int exp_end, int num_trials, const char deviceTypeName[] )
{
std::string label_gramschmidt ;
label_gramschmidt.append( "\"GramSchmidt< double , " );
label_gramschmidt.append( deviceTypeName );
label_gramschmidt.append( " >\"" );
template <class DeviceType>
void run_test_gramschmidt(int exp_beg, int exp_end, int num_trials,
const char deviceTypeName[]) {
std::string label_gramschmidt;
label_gramschmidt.append("\"GramSchmidt< double , ");
label_gramschmidt.append(deviceTypeName);
label_gramschmidt.append(" >\"");
for (int i = exp_beg ; i < exp_end ; ++i) {
double min_seconds = 0.0 ;
double max_seconds = 0.0 ;
double avg_seconds = 0.0 ;
for (int i = exp_beg; i < exp_end; ++i) {
double min_seconds = 0.0;
double max_seconds = 0.0;
double avg_seconds = 0.0;
const int parallel_work_length = 1<<i;
const int parallel_work_length = 1 << i;
for ( int j = 0 ; j < num_trials ; ++j ) {
const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ;
for (int j = 0; j < num_trials; ++j) {
const double seconds = ModifiedGramSchmidt<double, DeviceType>::test(
parallel_work_length, 32);
if ( 0 == j ) {
min_seconds = seconds ;
max_seconds = seconds ;
if (0 == j) {
min_seconds = seconds;
max_seconds = seconds;
} else {
if (seconds < min_seconds) min_seconds = seconds;
if (seconds > max_seconds) max_seconds = seconds;
}
else {
if ( seconds < min_seconds ) min_seconds = seconds ;
if ( seconds > max_seconds ) max_seconds = seconds ;
}
avg_seconds += seconds ;
avg_seconds += seconds;
}
avg_seconds /= num_trials ;
avg_seconds /= num_trials;
std::cout << label_gramschmidt
<< " , " << parallel_work_length
<< " , " << min_seconds
<< " , " << ( min_seconds / parallel_work_length )
<< std::endl ;
std::cout << label_gramschmidt << " , " << parallel_work_length << " , "
<< min_seconds << " , " << (min_seconds / parallel_work_length)
<< std::endl;
}
}
TEST_F( default_exec, gramschmidt ) {
int exp_beg = 10;
int exp_end = 20;
TEST(default_exec, gramschmidt) {
int exp_beg = 10;
int exp_end = 20;
int num_trials = 5;
if(command_line_num_args()>1)
exp_beg = atoi(command_line_arg(1));
if(command_line_num_args()>2)
exp_end = atoi(command_line_arg(2));
if(command_line_num_args()>3)
num_trials = atoi(command_line_arg(3));
EXPECT_NO_THROW(run_test_gramschmidt< Kokkos::DefaultExecutionSpace>( exp_beg, exp_end, num_trials, Kokkos::DefaultExecutionSpace::name() ));
}
if (command_line_num_args() > 1) exp_beg = atoi(command_line_arg(1));
if (command_line_num_args() > 2) exp_end = atoi(command_line_arg(2));
if (command_line_num_args() > 3) num_trials = atoi(command_line_arg(3));
EXPECT_NO_THROW(run_test_gramschmidt<Kokkos::DefaultExecutionSpace>(
exp_beg, exp_end, num_trials, Kokkos::DefaultExecutionSpace::name()));
}
} // namespace Test