git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14370 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2015-12-15 22:26:37 +00:00
parent 31f22919ab
commit 06a217aa08
327 changed files with 0 additions and 95949 deletions

View File

@ -1,296 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_CG_SOLVE
#define KOKKOS_EXAMPLE_CG_SOLVE
#include <cmath>
#include <limits>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <WrapMPI.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
template< typename ValueType , class Space >
struct CrsMatrix {
typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
typedef View< ValueType * , Space > coeff_type ;
StaticCrsGraphType graph ;
coeff_type coeff ;
CrsMatrix() : graph(), coeff() {}
CrsMatrix( const StaticCrsGraphType & arg_graph )
: graph( arg_graph )
, coeff( "crs_matrix_coeff" , arg_graph.entries.dimension_0() )
{}
};
template< typename MScalar
, typename VScalar
, class Space >
struct Multiply {
const Example::CrsMatrix< MScalar , Space > m_A ;
const Kokkos::View< const VScalar * , Space > m_x ;
const Kokkos::View< VScalar * , Space > m_y ;
KOKKOS_INLINE_FUNCTION
void operator()( const int iRow ) const
{
const int iEntryBegin = m_A.graph.row_map[iRow];
const int iEntryEnd = m_A.graph.row_map[iRow+1];
double sum = 0 ;
for ( int iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
sum += m_A.coeff(iEntry) * m_x( m_A.graph.entries(iEntry) );
}
m_y(iRow) = sum ;
}
Multiply( const View< VScalar * , Space > & y
, const CrsMatrix< MScalar , Space > & A
, const View< const VScalar * , Space > & x
)
: m_A( A ), m_x( x ), m_y( y )
{}
};
template< typename MScalar
, typename VScalar
, class Space >
inline
void multiply( const int nrow
, const Kokkos::View< VScalar * , Space > & y
, const Example::CrsMatrix< MScalar , Space > & A
, const Kokkos::View< VScalar * , Space > & x
)
{
Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,nrow), Multiply<MScalar,VScalar,Space>( y , A , x ) );
}
template< typename ValueType , class Space >
struct WAXPBY {
const Kokkos::View< const ValueType * , Space > m_x ;
const Kokkos::View< const ValueType * , Space > m_y ;
const Kokkos::View< ValueType * , Space > m_w ;
const double m_alpha ;
const double m_beta ;
KOKKOS_INLINE_FUNCTION
void operator()( const int i ) const
{ m_w(i) = m_alpha * m_x(i) + m_beta * m_y(i); }
WAXPBY( const View< ValueType * , Space > & arg_w
, const double arg_alpha
, const View< ValueType * , Space > & arg_x
, const double arg_beta
, const View< ValueType * , Space > & arg_y
)
: m_x( arg_x )
, m_y( arg_y )
, m_w( arg_w )
, m_alpha( arg_alpha )
, m_beta( arg_beta )
{}
};
template< typename VScalar , class Space >
void waxpby( const int n
, const Kokkos::View< VScalar * , Space > & arg_w
, const double arg_alpha
, const Kokkos::View< VScalar * , Space > & arg_x
, const double arg_beta
, const Kokkos::View< VScalar * , Space > & arg_y
)
{
Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,n), WAXPBY<VScalar,Space>(arg_w,arg_alpha,arg_x,arg_beta,arg_y) );
}
template< typename VScalar , class Space >
struct Dot {
typedef double value_type ;
const Kokkos::View< const VScalar * , Space > m_x ;
const Kokkos::View< const VScalar * , Space > m_y ;
KOKKOS_INLINE_FUNCTION
void operator()( const int i , value_type & update ) const
{ update += m_x(i) * m_y(i); }
Dot( const Kokkos::View< VScalar * , Space > & arg_x
, const Kokkos::View< VScalar * , Space > & arg_y
)
: m_x(arg_x), m_y(arg_y) {}
};
template< typename VScalar , class Space >
double dot( const int n
, const Kokkos::View< VScalar * , Space > & arg_x
, const Kokkos::View< VScalar * , Space > & arg_y
)
{
double result = 0 ;
Kokkos::parallel_reduce( Kokkos::RangePolicy<Space>(0,n) , Dot<VScalar,Space>( arg_x , arg_y ) , result );
return result ;
}
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
struct CGSolveResult {
size_t iteration ;
double iter_time ;
double matvec_time ;
double norm_res ;
};
template< class ImportType
, typename MScalar
, typename VScalar
, class Space
>
inline
void cgsolve( const ImportType & import
, const CrsMatrix< MScalar , Space > & A
, const Kokkos::View< VScalar * , Space > & b
, const Kokkos::View< VScalar * , Space > & x
, const size_t maximum_iteration = 200
, const double tolerance = std::numeric_limits<double>::epsilon()
, CGSolveResult * result = 0
)
{
typedef View< VScalar * , Space > VectorType ;
const size_t count_owned = import.count_owned ;
const size_t count_total = import.count_owned + import.count_receive;
size_t iteration = 0 ;
double iter_time = 0 ;
double matvec_time = 0 ;
double norm_res = 0 ;
// Need input vector to matvec to be owned + received
VectorType pAll ( "cg::p" , count_total );
VectorType p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
VectorType r ( "cg::r" , count_owned );
VectorType Ap( "cg::Ap", count_owned );
/* r = b - A * x ; */
/* p = x */ Kokkos::deep_copy( p , x );
/* import p */ import( pAll );
/* Ap = A * p */ multiply( count_owned , Ap , A , pAll );
/* r = b - Ap */ waxpby( count_owned , r , 1.0 , b , -1.0 , Ap );
/* p = r */ Kokkos::deep_copy( p , r );
double old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
norm_res = sqrt( old_rdot );
iteration = 0 ;
Kokkos::Impl::Timer wall_clock ;
Kokkos::Impl::Timer timer;
while ( tolerance < norm_res && iteration < maximum_iteration ) {
/* pAp_dot = dot( p , Ap = A * p ) */
timer.reset();
/* import p */ import( pAll );
/* Ap = A * p */ multiply( count_owned , Ap , A , pAll );
Space::fence();
matvec_time += timer.seconds();
const double pAp_dot = Kokkos::Example::all_reduce( dot( count_owned , p , Ap ) , import.comm );
const double alpha = old_rdot / pAp_dot ;
/* x += alpha * p ; */ waxpby( count_owned , x , alpha, p , 1.0 , x );
/* r += -alpha * Ap ; */ waxpby( count_owned , r , -alpha, Ap , 1.0 , r );
const double r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
const double beta = r_dot / old_rdot ;
/* p = r + beta * p ; */ waxpby( count_owned , p , 1.0 , r , beta , p );
norm_res = sqrt( old_rdot = r_dot );
++iteration ;
}
Space::fence();
iter_time = wall_clock.seconds();
if ( 0 != result ) {
result->iteration = iteration ;
result->iter_time = iter_time ;
result->matvec_time = matvec_time ;
result->norm_res = norm_res ;
}
}
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXAMPLE_CG_SOLVE */

View File

@ -1,50 +0,0 @@
KOKKOS_PATH = ../..
vpath %.cpp ${KOKKOS_PATH}/example/fixture ${KOKKOS_PATH}/example/fenl
EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ${KOKKOS_PATH}/example/fenl/*.hpp)
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += \
-I${KOKKOS_PATH}/example/common \
-I${KOKKOS_PATH}/example/fixture \
-I${KOKKOS_PATH}/example/fenl
EXE_EXAMPLE_FENL = KokkosExample_Fenl
OBJ_EXAMPLE_FENL = BoxElemPart.o main.o fenl.o
TARGETS = $(EXE_EXAMPLE_FENL)
#TEST_TARGETS =
$(EXE_EXAMPLE_FENL) : $(OBJ_EXAMPLE_FENL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FENL) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FENL)
build_all : $(TARGETS)
test : build_all
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,117 +0,0 @@
/*
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
*/
#include <HexElement.hpp>
#include <fenl_impl.hpp>
namespace Kokkos {
namespace Example {
namespace FENL {
#if defined( KOKKOS_HAVE_PTHREAD )
template
Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
template
Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemQuadratic >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
#endif
#if defined (KOKKOS_HAVE_OPENMP)
template
Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
template
Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemQuadratic >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
#endif
#if defined( KOKKOS_HAVE_CUDA )
template
Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
template
Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemQuadratic >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
#endif
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */

View File

@ -1,89 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FENL_HPP
#define KOKKOS_EXAMPLE_FENL_HPP
#include <stdlib.h>
#include <BoxElemPart.hpp>
#include <WrapMPI.hpp>
namespace Kokkos {
namespace Example {
namespace FENL {
struct Perf {
size_t global_elem_count ;
size_t global_node_count ;
size_t newton_iter_count ;
size_t cg_iter_count ;
double map_ratio ;
double fill_node_set ;
double scan_node_count ;
double fill_graph_entries ;
double sort_graph_entries ;
double fill_element_graph ;
double create_sparse_matrix ;
double fill_time ;
double bc_time ;
double matvec_time ;
double cg_time ;
double newton_residual ;
double error_max ;
};
template < class Device , BoxElemPart::ElemOrder ElemOrder >
Perf fenl(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_FENL_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,598 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP
#define KOKKOS_EXAMPLE_FENL_IMPL_HPP
#include <math.h>
// Kokkos libraries' headers:
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_StaticCrsGraph.hpp>
#include <impl/Kokkos_Timer.hpp>
// Examples headers:
#include <BoxElemFixture.hpp>
#include <VectorImport.hpp>
#include <CGSolve.hpp>
#include <fenl.hpp>
#include <fenl_functors.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
namespace FENL {
inline
double maximum( MPI_Comm comm , double local )
{
double global = local ;
#if defined( KOKKOS_HAVE_MPI )
MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , comm );
#endif
return global ;
}
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
namespace FENL {
class ManufacturedSolution {
public:
// Manufactured solution for one dimensional nonlinear PDE
//
// -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax
//
// Has an analytic solution of the form:
//
// T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 )
//
// Given T_0 and T_L compute K for this analytic solution.
//
// Two analytic solutions:
//
// Solution with singularity:
// , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
// , b( -1.0 / sqrt(T_zmin) )
//
// Solution without singularity:
// , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
// , b( 1.0 / sqrt(T_zmin) )
const double zmin ;
const double zmax ;
const double T_zmin ;
const double T_zmax ;
const double a ;
const double b ;
const double K ;
ManufacturedSolution( const double arg_zmin ,
const double arg_zmax ,
const double arg_T_zmin ,
const double arg_T_zmax )
: zmin( arg_zmin )
, zmax( arg_zmax )
, T_zmin( arg_T_zmin )
, T_zmax( arg_T_zmax )
, a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
, b( 1.0 / sqrt(T_zmin) )
, K( 1.0 / ( 6.0 * a * a ) )
{}
double operator()( const double z ) const
{
const double tmp = a * ( z - zmin ) + b ;
return 1.0 / ( tmp * tmp );
}
};
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
namespace FENL {
template < class Space , BoxElemPart::ElemOrder ElemOrder >
Perf fenl(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int use_elems[] )
{
typedef Kokkos::Example::BoxElemFixture< Space , ElemOrder > FixtureType ;
typedef Kokkos::Example::CrsMatrix< double , Space >
SparseMatrixType ;
typedef typename SparseMatrixType::StaticCrsGraphType
SparseGraphType ;
typedef Kokkos::Example::FENL::NodeNodeGraph< typename FixtureType::elem_node_type , SparseGraphType , FixtureType::ElemNode >
NodeNodeGraphType ;
typedef Kokkos::Example::FENL::ElementComputation< FixtureType , SparseMatrixType >
ElementComputationType ;
typedef Kokkos::Example::FENL::DirichletComputation< FixtureType , SparseMatrixType >
DirichletComputationType ;
typedef NodeElemGatherFill< ElementComputationType >
NodeElemGatherFillType ;
typedef typename ElementComputationType::vector_type VectorType ;
typedef Kokkos::Example::VectorImport<
typename FixtureType::comm_list_type ,
typename FixtureType::send_nodeid_type ,
VectorType > ImportType ;
//------------------------------------
const unsigned newton_iteration_limit = 10 ;
const double newton_iteration_tolerance = 1e-7 ;
const unsigned cg_iteration_limit = 200 ;
const double cg_iteration_tolerance = 1e-7 ;
//------------------------------------
const int print_flag = use_print && Kokkos::Impl::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ;
int comm_rank ;
int comm_size ;
MPI_Comm_rank( comm , & comm_rank );
MPI_Comm_size( comm , & comm_size );
// Decompose by node to avoid mpi-communication for assembly
const float bubble_x = 1.0 ;
const float bubble_y = 1.0 ;
const float bubble_z = 1.0 ;
const FixtureType fixture( BoxElemPart::DecomposeNode , comm_size , comm_rank ,
use_elems[0] , use_elems[1] , use_elems[2] ,
bubble_x , bubble_y , bubble_z );
{
int global_error = ! fixture.ok();
#if defined( KOKKOS_HAVE_MPI )
int local_error = global_error ;
global_error = 0 ;
MPI_Allreduce( & local_error , & global_error , 1 , MPI_INT , MPI_SUM , comm );
#endif
if ( global_error ) {
throw std::runtime_error(std::string("Error generating finite element fixture"));
}
}
//------------------------------------
const ImportType comm_nodal_import(
comm ,
fixture.recv_node() ,
fixture.send_node() ,
fixture.send_nodeid() ,
fixture.node_count_owned() ,
fixture.node_count() - fixture.node_count_owned() );
//------------------------------------
const double bc_lower_value = 1 ;
const double bc_upper_value = 2 ;
const Kokkos::Example::FENL::ManufacturedSolution
manufactured_solution( 0 , 1 , bc_lower_value , bc_upper_value );
//------------------------------------
for ( int k = 0 ; k < comm_size && use_print ; ++k ) {
if ( k == comm_rank ) {
typename FixtureType::node_grid_type::HostMirror
h_node_grid = Kokkos::create_mirror_view( fixture.node_grid() );
typename FixtureType::node_coord_type::HostMirror
h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() );
typename FixtureType::elem_node_type::HostMirror
h_elem_node = Kokkos::create_mirror_view( fixture.elem_node() );
Kokkos::deep_copy( h_node_grid , fixture.node_grid() );
Kokkos::deep_copy( h_node_coord , fixture.node_coord() );
Kokkos::deep_copy( h_elem_node , fixture.elem_node() );
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
std::cout << "Node grid {" ;
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
std::cout << " (" << h_node_grid(inode,0)
<< "," << h_node_grid(inode,1)
<< "," << h_node_grid(inode,2)
<< ")" ;
}
std::cout << " }" << std::endl ;
std::cout << "Node coord {" ;
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
std::cout << " (" << h_node_coord(inode,0)
<< "," << h_node_coord(inode,1)
<< "," << h_node_coord(inode,2)
<< ")" ;
}
std::cout << " }" << std::endl ;
std::cout << "Manufactured solution"
<< " a[" << manufactured_solution.a << "]"
<< " b[" << manufactured_solution.b << "]"
<< " K[" << manufactured_solution.K << "]"
<< " {" ;
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
std::cout << " " << manufactured_solution( h_node_coord( inode , 2 ) );
}
std::cout << " }" << std::endl ;
std::cout << "ElemNode {" << std::endl ;
for ( unsigned ielem = 0 ; ielem < fixture.elem_count() ; ++ielem ) {
std::cout << " elem[" << ielem << "]{" ;
for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
std::cout << " " << h_elem_node(ielem,inode);
}
std::cout << " }{" ;
for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
std::cout << " (" << h_node_grid(h_elem_node(ielem,inode),0)
<< "," << h_node_grid(h_elem_node(ielem,inode),1)
<< "," << h_node_grid(h_elem_node(ielem,inode),2)
<< ")" ;
}
std::cout << " }" << std::endl ;
}
std::cout << "}" << std::endl ;
}
std::cout.flush();
MPI_Barrier( comm );
}
//------------------------------------
Kokkos::Impl::Timer wall_clock ;
Perf perf_stats = Perf() ;
for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
Perf perf = Perf() ;
perf.global_elem_count = fixture.elem_count_global();
perf.global_node_count = fixture.node_count_global();
//----------------------------------
// Create the sparse matrix graph and element-to-graph map
// from the element->to->node identifier array.
// The graph only has rows for the owned nodes.
typename NodeNodeGraphType::Times graph_times;
const NodeNodeGraphType
mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(), graph_times );
perf.map_ratio = maximum(comm, graph_times.ratio);
perf.fill_node_set = maximum(comm, graph_times.fill_node_set);
perf.scan_node_count = maximum(comm, graph_times.scan_node_count);
perf.fill_graph_entries = maximum(comm, graph_times.fill_graph_entries);
perf.sort_graph_entries = maximum(comm, graph_times.sort_graph_entries);
perf.fill_element_graph = maximum(comm, graph_times.fill_element_graph);
wall_clock.reset();
// Create the sparse matrix from the graph:
SparseMatrixType jacobian( mesh_to_graph.graph );
Space::fence();
perf.create_sparse_matrix = maximum( comm , wall_clock.seconds() );
//----------------------------------
for ( int k = 0 ; k < comm_size && print_flag ; ++k ) {
if ( k == comm_rank ) {
const unsigned nrow = jacobian.graph.numRows();
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
std::cout << "JacobianGraph {" << std::endl ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " row[" << irow << "]{" ;
const unsigned entry_end = jacobian.graph.row_map(irow+1);
for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) {
std::cout << " " << jacobian.graph.entries(entry);
}
std::cout << " }" << std::endl ;
}
std::cout << "}" << std::endl ;
std::cout << "ElemGraph {" << std::endl ;
for ( unsigned ielem = 0 ; ielem < mesh_to_graph.elem_graph.dimension_0() ; ++ielem ) {
std::cout << " elem[" << ielem << "]{" ;
for ( unsigned irow = 0 ; irow < mesh_to_graph.elem_graph.dimension_1() ; ++irow ) {
std::cout << " {" ;
for ( unsigned icol = 0 ; icol < mesh_to_graph.elem_graph.dimension_2() ; ++icol ) {
std::cout << " " << mesh_to_graph.elem_graph(ielem,irow,icol);
}
std::cout << " }" ;
}
std::cout << " }" << std::endl ;
}
std::cout << "}" << std::endl ;
}
std::cout.flush();
MPI_Barrier( comm );
}
//----------------------------------
// Allocate solution vector for each node in the mesh and residual vector for each owned node
const VectorType nodal_solution( "nodal_solution" , fixture.node_count() );
const VectorType nodal_residual( "nodal_residual" , fixture.node_count_owned() );
const VectorType nodal_delta( "nodal_delta" , fixture.node_count_owned() );
// Create element computation functor
const ElementComputationType elemcomp(
use_atomic ? ElementComputationType( fixture , manufactured_solution.K , nodal_solution ,
mesh_to_graph.elem_graph , jacobian , nodal_residual )
: ElementComputationType( fixture , manufactured_solution.K , nodal_solution ) );
const NodeElemGatherFillType gatherfill(
use_atomic ? NodeElemGatherFillType()
: NodeElemGatherFillType( fixture.elem_node() ,
mesh_to_graph.elem_graph ,
nodal_residual ,
jacobian ,
elemcomp.elem_residuals ,
elemcomp.elem_jacobians ) );
// Create boundary condition functor
const DirichletComputationType dirichlet(
fixture , nodal_solution , jacobian , nodal_residual ,
2 /* apply at 'z' ends */ ,
manufactured_solution.T_zmin ,
manufactured_solution.T_zmax );
//----------------------------------
// Nonlinear Newton iteration:
double residual_norm_init = 0 ;
for ( perf.newton_iter_count = 0 ;
perf.newton_iter_count < newton_iteration_limit ;
++perf.newton_iter_count ) {
//--------------------------------
comm_nodal_import( nodal_solution );
//--------------------------------
// Element contributions to residual and jacobian
wall_clock.reset();
Kokkos::deep_copy( nodal_residual , double(0) );
Kokkos::deep_copy( jacobian.coeff , double(0) );
elemcomp.apply();
if ( ! use_atomic ) {
gatherfill.apply();
}
Space::fence();
perf.fill_time = maximum( comm , wall_clock.seconds() );
//--------------------------------
// Apply boundary conditions
wall_clock.reset();
dirichlet.apply();
Space::fence();
perf.bc_time = maximum( comm , wall_clock.seconds() );
//--------------------------------
// Evaluate convergence
const double residual_norm =
std::sqrt(
Kokkos::Example::all_reduce(
Kokkos::Example::dot( fixture.node_count_owned() , nodal_residual, nodal_residual ) , comm ) );
perf.newton_residual = residual_norm ;
if ( 0 == perf.newton_iter_count ) { residual_norm_init = residual_norm ; }
if ( residual_norm < residual_norm_init * newton_iteration_tolerance ) { break ; }
//--------------------------------
// Solve for nonlinear update
CGSolveResult cg_result ;
Kokkos::Example::cgsolve( comm_nodal_import
, jacobian
, nodal_residual
, nodal_delta
, cg_iteration_limit
, cg_iteration_tolerance
, & cg_result
);
// Update solution vector
Kokkos::Example::waxpby( fixture.node_count_owned() , nodal_solution , -1.0 , nodal_delta , 1.0 , nodal_solution );
perf.cg_iter_count += cg_result.iteration ;
perf.matvec_time += cg_result.matvec_time ;
perf.cg_time += cg_result.iter_time ;
//--------------------------------
if ( print_flag ) {
const double delta_norm =
std::sqrt(
Kokkos::Example::all_reduce(
Kokkos::Example::dot( fixture.node_count_owned() , nodal_delta, nodal_delta ) , comm ) );
if ( 0 == comm_rank ) {
std::cout << "Newton iteration[" << perf.newton_iter_count << "]"
<< " residual[" << perf.newton_residual << "]"
<< " update[" << delta_norm << "]"
<< " cg_iteration[" << cg_result.iteration << "]"
<< " cg_residual[" << cg_result.norm_res << "]"
<< std::endl ;
}
for ( int k = 0 ; k < comm_size ; ++k ) {
if ( k == comm_rank ) {
const unsigned nrow = jacobian.graph.numRows();
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
std::cout << "Residual {" ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " " << nodal_residual(irow);
}
std::cout << " }" << std::endl ;
std::cout << "Delta {" ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " " << nodal_delta(irow);
}
std::cout << " }" << std::endl ;
std::cout << "Solution {" ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " " << nodal_solution(irow);
}
std::cout << " }" << std::endl ;
std::cout << "Jacobian[ "
<< jacobian.graph.numRows() << " x " << Kokkos::maximum_entry( jacobian.graph )
<< " ] {" << std::endl ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " {" ;
const unsigned entry_end = jacobian.graph.row_map(irow+1);
for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) {
std::cout << " (" << jacobian.graph.entries(entry)
<< "," << jacobian.coeff(entry)
<< ")" ;
}
std::cout << " }" << std::endl ;
}
std::cout << "}" << std::endl ;
}
std::cout.flush();
MPI_Barrier( comm );
}
}
//--------------------------------
}
// Evaluate solution error
if ( 0 == itrial ) {
const typename FixtureType::node_coord_type::HostMirror
h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() );
const typename VectorType::HostMirror
h_nodal_solution = Kokkos::create_mirror_view( nodal_solution );
Kokkos::deep_copy( h_node_coord , fixture.node_coord() );
Kokkos::deep_copy( h_nodal_solution , nodal_solution );
double error_max = 0 ;
for ( unsigned inode = 0 ; inode < fixture.node_count_owned() ; ++inode ) {
const double answer = manufactured_solution( h_node_coord( inode , 2 ) );
const double error = ( h_nodal_solution(inode) - answer ) / answer ;
if ( error_max < fabs( error ) ) { error_max = fabs( error ); }
}
perf.error_max = std::sqrt( Kokkos::Example::all_reduce_max( error_max , comm ) );
perf_stats = perf ;
}
else {
perf_stats.fill_node_set = std::min( perf_stats.fill_node_set , perf.fill_node_set );
perf_stats.scan_node_count = std::min( perf_stats.scan_node_count , perf.scan_node_count );
perf_stats.fill_graph_entries = std::min( perf_stats.fill_graph_entries , perf.fill_graph_entries );
perf_stats.sort_graph_entries = std::min( perf_stats.sort_graph_entries , perf.sort_graph_entries );
perf_stats.fill_element_graph = std::min( perf_stats.fill_element_graph , perf.fill_element_graph );
perf_stats.create_sparse_matrix = std::min( perf_stats.create_sparse_matrix , perf.create_sparse_matrix );
perf_stats.fill_time = std::min( perf_stats.fill_time , perf.fill_time );
perf_stats.bc_time = std::min( perf_stats.bc_time , perf.bc_time );
perf_stats.cg_time = std::min( perf_stats.cg_time , perf.cg_time );
}
}
return perf_stats ;
}
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP */

View File

@ -1,422 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <utility>
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
#include <iomanip>
#include <Kokkos_Core.hpp>
#include <WrapMPI.hpp>
#include <fenl.hpp>
// For vtune
#include <sys/types.h>
#include <unistd.h>
//----------------------------------------------------------------------------
enum { CMD_USE_THREADS = 0
, CMD_USE_NUMA
, CMD_USE_CORE_PER_NUMA
, CMD_USE_CUDA
, CMD_USE_OPENMP
, CMD_USE_CUDA_DEV
, CMD_USE_FIXTURE_X
, CMD_USE_FIXTURE_Y
, CMD_USE_FIXTURE_Z
, CMD_USE_FIXTURE_BEGIN
, CMD_USE_FIXTURE_END
, CMD_USE_FIXTURE_QUADRATIC
, CMD_USE_ATOMIC
, CMD_USE_TRIALS
, CMD_VTUNE
, CMD_PRINT
, CMD_ECHO
, CMD_ERROR
, CMD_COUNT };
void print_cmdline( std::ostream & s , const int cmd[] )
{
if ( cmd[ CMD_USE_THREADS ] ) {
s << " Threads(" << cmd[ CMD_USE_THREADS ]
<< ") NUMA(" << cmd[ CMD_USE_NUMA ]
<< ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ]
<< ")" ;
}
if ( cmd[ CMD_USE_OPENMP ] ) {
s << " OpenMP(" << cmd[ CMD_USE_OPENMP ]
<< ") NUMA(" << cmd[ CMD_USE_NUMA ]
<< ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ]
<< ")" ;
}
if ( cmd[ CMD_USE_FIXTURE_X ] ) {
s << " Fixture(" << cmd[ CMD_USE_FIXTURE_X ]
<< "x" << cmd[ CMD_USE_FIXTURE_Y ]
<< "x" << cmd[ CMD_USE_FIXTURE_Z ]
<< ")" ;
}
if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) {
s << " Fixture( " << cmd[ CMD_USE_FIXTURE_BEGIN ]
<< " .. " << cmd[ CMD_USE_FIXTURE_END ]
<< " )" ;
}
if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) {
s << " Quadratic-Element" ;
}
if ( cmd[ CMD_USE_CUDA ] ) {
s << " CUDA(" << cmd[ CMD_USE_CUDA_DEV ] << ")" ;
}
if ( cmd[ CMD_USE_ATOMIC ] ) {
s << " ATOMIC" ;
}
if ( cmd[ CMD_USE_TRIALS ] ) {
s << " TRIALS(" << cmd[ CMD_USE_TRIALS ] << ")" ;
}
if ( cmd[ CMD_VTUNE ] ) {
s << " VTUNE" ;
}
if ( cmd[ CMD_PRINT ] ) {
s << " PRINT" ;
}
s << std::endl ;
}
void print_perf_value( std::ostream & s , const std::vector<size_t> & widths, const Kokkos::Example::FENL::Perf & perf )
{
int i=0;
s << std::setw(widths[i++]) << perf.global_elem_count << " ,";
s << std::setw(widths[i++]) << perf.global_node_count << " ,";
s << std::setw(widths[i++]) << perf.newton_iter_count << " ,";
s << std::setw(widths[i++]) << perf.cg_iter_count << " ,";
s << std::setw(widths[i++]) << perf.map_ratio << " ,";
s << std::setw(widths[i++]) << ( perf.fill_node_set * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.scan_node_count * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.fill_graph_entries * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.sort_graph_entries * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.fill_element_graph * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.create_sparse_matrix * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.fill_time * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.bc_time * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( ( perf.matvec_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( ( perf.cg_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,";
s << std::setw(widths[i]) << perf.error_max;
s << std::endl ;
}
template< class Device , Kokkos::Example::BoxElemPart::ElemOrder ElemOrder >
void run( MPI_Comm comm , const int cmd[] )
{
int comm_rank = 0 ;
#if defined( KOKKOS_HAVE_MPI )
MPI_Comm_rank( comm , & comm_rank );
#else
comm = 0 ;
#endif
if ( 0 == comm_rank ) {
if ( cmd[ CMD_USE_THREADS ] ) { std::cout << "THREADS , " << cmd[ CMD_USE_THREADS ] ; }
else if ( cmd[ CMD_USE_OPENMP ] ) { std::cout << "OPENMP , " << cmd[ CMD_USE_OPENMP ] ; }
else if ( cmd[ CMD_USE_CUDA ] ) { std::cout << "CUDA" ; }
if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) { std::cout << " , QUADRATIC-ELEMENT" ; }
else { std::cout << " , LINEAR-ELEMENT" ; }
if ( cmd[ CMD_USE_ATOMIC ] ) { std::cout << " , USING ATOMICS" ; }
}
std::vector< std::pair<std::string,std::string> > headers;
headers.push_back(std::make_pair("ELEMS","count"));
headers.push_back(std::make_pair("NODES","count"));
headers.push_back(std::make_pair("NEWTON","iter"));
headers.push_back(std::make_pair("CG","iter"));
headers.push_back(std::make_pair("MAP_RATIO","ratio"));
headers.push_back(std::make_pair("SET_FILL/NODE","millisec"));
headers.push_back(std::make_pair("SCAN/NODE","millisec"));
headers.push_back(std::make_pair("GRAPH_FILL/NODE","millisec"));
headers.push_back(std::make_pair("SORT/NODE","millisec"));
headers.push_back(std::make_pair("ELEM_GRAPH_FILL/NODE","millisec"));
headers.push_back(std::make_pair("MATRIX_CREATE/NODE","millisec"));
headers.push_back(std::make_pair("MATRIX_FILL/NODE","millisec"));
headers.push_back(std::make_pair("BOUNDARY/NODE","millisec"));
headers.push_back(std::make_pair("MAT_VEC/ITER/ROW","millisec"));
headers.push_back(std::make_pair("CG/ITER/ROW","millisec"));
headers.push_back(std::make_pair("ERROR","ratio"));
// find print widths
size_t min_width = 10;
std::vector< size_t > widths(headers.size());
for (size_t i=0, ie=headers.size(); i<ie; ++i)
widths[i] = std::max(min_width, headers[i].first.size()+1);
// print column headers
if ( 0 == comm_rank ) {
std::cout << std::endl ;
for (size_t i=0; i<headers.size(); ++i)
std::cout << std::setw(widths[i]) << headers[i].first << " ,";
std::cout << "\b\b " << std::endl;
for (size_t i=0; i<headers.size(); ++i)
std::cout << std::setw(widths[i]) << headers[i].second << " ,";
std::cout << "\b\b " << std::endl;
std::cout << std::scientific;
std::cout.precision(3);
}
if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) {
for ( int i = cmd[CMD_USE_FIXTURE_BEGIN] ; i < cmd[CMD_USE_FIXTURE_END] * 2 ; i *= 2 ) {
int nelem[3] ;
nelem[0] = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
nelem[1] = 1 + nelem[0] ;
nelem[2] = 2 * nelem[0] ;
const Kokkos::Example::FENL::Perf perf =
cmd[ CMD_USE_FIXTURE_QUADRATIC ]
? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
: Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear >
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
;
if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf );
}
}
else {
int nelem[3] = { cmd[ CMD_USE_FIXTURE_X ] ,
cmd[ CMD_USE_FIXTURE_Y ] ,
cmd[ CMD_USE_FIXTURE_Z ] };
const Kokkos::Example::FENL::Perf perf =
cmd[ CMD_USE_FIXTURE_QUADRATIC ]
? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
: Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear >
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
;
if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf );
}
}
//----------------------------------------------------------------------------
int main( int argc , char ** argv )
{
int comm_rank = 0 ;
#if defined( KOKKOS_HAVE_MPI )
MPI_Init( & argc , & argv );
MPI_Comm comm = MPI_COMM_WORLD ;
MPI_Comm_rank( comm , & comm_rank );
#else
MPI_Comm comm = 0 ;
(void) comm ; // suppress warning
#endif
int cmdline[ CMD_COUNT ] ;
for ( int i = 0 ; i < CMD_COUNT ; ++i ) cmdline[i] = 0 ;
if ( 0 == comm_rank ) {
for ( int i = 1 ; i < argc ; ++i ) {
if ( 0 == strcasecmp( argv[i] , "threads" ) ) {
cmdline[ CMD_USE_THREADS ] = atoi( argv[++i] );
}
else if ( 0 == strcasecmp( argv[i] , "openmp" ) ) {
cmdline[ CMD_USE_OPENMP ] = atoi( argv[++i] );
}
else if ( 0 == strcasecmp( argv[i] , "cores" ) ) {
sscanf( argv[++i] , "%dx%d" ,
cmdline + CMD_USE_NUMA ,
cmdline + CMD_USE_CORE_PER_NUMA );
}
else if ( 0 == strcasecmp( argv[i] , "cuda" ) ) {
cmdline[ CMD_USE_CUDA ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "cuda-dev" ) ) {
cmdline[ CMD_USE_CUDA ] = 1 ;
cmdline[ CMD_USE_CUDA_DEV ] = atoi( argv[++i] ) ;
}
else if ( 0 == strcasecmp( argv[i] , "fixture" ) ) {
sscanf( argv[++i] , "%dx%dx%d" ,
cmdline + CMD_USE_FIXTURE_X ,
cmdline + CMD_USE_FIXTURE_Y ,
cmdline + CMD_USE_FIXTURE_Z );
}
else if ( 0 == strcasecmp( argv[i] , "fixture-range" ) ) {
sscanf( argv[++i] , "%d..%d" ,
cmdline + CMD_USE_FIXTURE_BEGIN ,
cmdline + CMD_USE_FIXTURE_END );
}
else if ( 0 == strcasecmp( argv[i] , "fixture-quadratic" ) ) {
cmdline[ CMD_USE_FIXTURE_QUADRATIC ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "atomic" ) ) {
cmdline[ CMD_USE_ATOMIC ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "trials" ) ) {
cmdline[ CMD_USE_TRIALS ] = atoi( argv[++i] ) ;
}
else if ( 0 == strcasecmp( argv[i] , "vtune" ) ) {
cmdline[ CMD_VTUNE ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "print" ) ) {
cmdline[ CMD_PRINT ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "echo" ) ) {
cmdline[ CMD_ECHO ] = 1 ;
}
else {
cmdline[ CMD_ERROR ] = 1 ;
std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl ;
}
}
if ( cmdline[ CMD_ECHO ] && 0 == comm_rank ) { print_cmdline( std::cout , cmdline ); }
}
#if defined( KOKKOS_HAVE_MPI )
MPI_Bcast( cmdline , CMD_COUNT , MPI_INT , 0 , comm );
#endif
if ( cmdline[ CMD_VTUNE ] ) {
std::stringstream cmd;
pid_t my_os_pid=getpid();
const std::string vtune_loc =
"/usr/local/intel/vtune_amplifier_xe_2013/bin64/amplxe-cl";
const std::string output_dir = "./vtune/vtune.";
const int p_rank = comm_rank;
cmd << vtune_loc
<< " -collect hotspots -result-dir " << output_dir << p_rank
<< " -target-pid " << my_os_pid << " &";
if (p_rank == 0)
std::cout << cmd.str() << std::endl;
system(cmd.str().c_str());
system("sleep 10");
}
if ( ! cmdline[ CMD_ERROR ] && ! cmdline[ CMD_ECHO ] ) {
if ( ! cmdline[ CMD_USE_TRIALS ] ) { cmdline[ CMD_USE_TRIALS ] = 1 ; }
if ( ! cmdline[ CMD_USE_FIXTURE_X ] && ! cmdline[ CMD_USE_FIXTURE_BEGIN ] ) {
cmdline[ CMD_USE_FIXTURE_X ] = 2 ;
cmdline[ CMD_USE_FIXTURE_Y ] = 2 ;
cmdline[ CMD_USE_FIXTURE_Z ] = 2 ;
}
#if defined( KOKKOS_HAVE_PTHREAD )
if ( cmdline[ CMD_USE_THREADS ] ) {
if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) {
Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] ,
cmdline[ CMD_USE_NUMA ] ,
cmdline[ CMD_USE_CORE_PER_NUMA ] );
}
else {
Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] );
}
run< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
Kokkos::Threads::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if ( cmdline[ CMD_USE_OPENMP ] ) {
if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) {
Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] ,
cmdline[ CMD_USE_NUMA ] ,
cmdline[ CMD_USE_CORE_PER_NUMA ] );
}
else {
Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] );
}
run< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
Kokkos::OpenMP::finalize();
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
if ( cmdline[ CMD_USE_CUDA ] ) {
// Use the last device:
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cmdline[ CMD_USE_CUDA_DEV ] ) );
run< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
#endif
}
#if defined( KOKKOS_HAVE_MPI )
MPI_Finalize();
#endif
return cmdline[ CMD_ERROR ] ? -1 : 0 ;
}