git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14370 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,296 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_CG_SOLVE
|
||||
#define KOKKOS_EXAMPLE_CG_SOLVE
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <WrapMPI.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template< typename ValueType , class Space >
|
||||
struct CrsMatrix {
|
||||
typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
|
||||
typedef View< ValueType * , Space > coeff_type ;
|
||||
|
||||
StaticCrsGraphType graph ;
|
||||
coeff_type coeff ;
|
||||
|
||||
CrsMatrix() : graph(), coeff() {}
|
||||
|
||||
CrsMatrix( const StaticCrsGraphType & arg_graph )
|
||||
: graph( arg_graph )
|
||||
, coeff( "crs_matrix_coeff" , arg_graph.entries.dimension_0() )
|
||||
{}
|
||||
};
|
||||
|
||||
template< typename MScalar
|
||||
, typename VScalar
|
||||
, class Space >
|
||||
struct Multiply {
|
||||
|
||||
const Example::CrsMatrix< MScalar , Space > m_A ;
|
||||
const Kokkos::View< const VScalar * , Space > m_x ;
|
||||
const Kokkos::View< VScalar * , Space > m_y ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int iRow ) const
|
||||
{
|
||||
const int iEntryBegin = m_A.graph.row_map[iRow];
|
||||
const int iEntryEnd = m_A.graph.row_map[iRow+1];
|
||||
|
||||
double sum = 0 ;
|
||||
|
||||
for ( int iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
|
||||
sum += m_A.coeff(iEntry) * m_x( m_A.graph.entries(iEntry) );
|
||||
}
|
||||
|
||||
m_y(iRow) = sum ;
|
||||
}
|
||||
|
||||
Multiply( const View< VScalar * , Space > & y
|
||||
, const CrsMatrix< MScalar , Space > & A
|
||||
, const View< const VScalar * , Space > & x
|
||||
)
|
||||
: m_A( A ), m_x( x ), m_y( y )
|
||||
{}
|
||||
};
|
||||
|
||||
template< typename MScalar
|
||||
, typename VScalar
|
||||
, class Space >
|
||||
inline
|
||||
void multiply( const int nrow
|
||||
, const Kokkos::View< VScalar * , Space > & y
|
||||
, const Example::CrsMatrix< MScalar , Space > & A
|
||||
, const Kokkos::View< VScalar * , Space > & x
|
||||
)
|
||||
{
|
||||
Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,nrow), Multiply<MScalar,VScalar,Space>( y , A , x ) );
|
||||
}
|
||||
|
||||
template< typename ValueType , class Space >
|
||||
struct WAXPBY {
|
||||
const Kokkos::View< const ValueType * , Space > m_x ;
|
||||
const Kokkos::View< const ValueType * , Space > m_y ;
|
||||
const Kokkos::View< ValueType * , Space > m_w ;
|
||||
const double m_alpha ;
|
||||
const double m_beta ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int i ) const
|
||||
{ m_w(i) = m_alpha * m_x(i) + m_beta * m_y(i); }
|
||||
|
||||
WAXPBY( const View< ValueType * , Space > & arg_w
|
||||
, const double arg_alpha
|
||||
, const View< ValueType * , Space > & arg_x
|
||||
, const double arg_beta
|
||||
, const View< ValueType * , Space > & arg_y
|
||||
)
|
||||
: m_x( arg_x )
|
||||
, m_y( arg_y )
|
||||
, m_w( arg_w )
|
||||
, m_alpha( arg_alpha )
|
||||
, m_beta( arg_beta )
|
||||
{}
|
||||
};
|
||||
|
||||
template< typename VScalar , class Space >
|
||||
void waxpby( const int n
|
||||
, const Kokkos::View< VScalar * , Space > & arg_w
|
||||
, const double arg_alpha
|
||||
, const Kokkos::View< VScalar * , Space > & arg_x
|
||||
, const double arg_beta
|
||||
, const Kokkos::View< VScalar * , Space > & arg_y
|
||||
)
|
||||
{
|
||||
Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,n), WAXPBY<VScalar,Space>(arg_w,arg_alpha,arg_x,arg_beta,arg_y) );
|
||||
}
|
||||
|
||||
template< typename VScalar , class Space >
|
||||
struct Dot {
|
||||
typedef double value_type ;
|
||||
|
||||
const Kokkos::View< const VScalar * , Space > m_x ;
|
||||
const Kokkos::View< const VScalar * , Space > m_y ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int i , value_type & update ) const
|
||||
{ update += m_x(i) * m_y(i); }
|
||||
|
||||
Dot( const Kokkos::View< VScalar * , Space > & arg_x
|
||||
, const Kokkos::View< VScalar * , Space > & arg_y
|
||||
)
|
||||
: m_x(arg_x), m_y(arg_y) {}
|
||||
};
|
||||
|
||||
template< typename VScalar , class Space >
|
||||
double dot( const int n
|
||||
, const Kokkos::View< VScalar * , Space > & arg_x
|
||||
, const Kokkos::View< VScalar * , Space > & arg_y
|
||||
)
|
||||
{
|
||||
double result = 0 ;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy<Space>(0,n) , Dot<VScalar,Space>( arg_x , arg_y ) , result );
|
||||
return result ;
|
||||
}
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
struct CGSolveResult {
|
||||
size_t iteration ;
|
||||
double iter_time ;
|
||||
double matvec_time ;
|
||||
double norm_res ;
|
||||
};
|
||||
|
||||
template< class ImportType
|
||||
, typename MScalar
|
||||
, typename VScalar
|
||||
, class Space
|
||||
>
|
||||
inline
|
||||
void cgsolve( const ImportType & import
|
||||
, const CrsMatrix< MScalar , Space > & A
|
||||
, const Kokkos::View< VScalar * , Space > & b
|
||||
, const Kokkos::View< VScalar * , Space > & x
|
||||
, const size_t maximum_iteration = 200
|
||||
, const double tolerance = std::numeric_limits<double>::epsilon()
|
||||
, CGSolveResult * result = 0
|
||||
)
|
||||
{
|
||||
typedef View< VScalar * , Space > VectorType ;
|
||||
|
||||
const size_t count_owned = import.count_owned ;
|
||||
const size_t count_total = import.count_owned + import.count_receive;
|
||||
|
||||
size_t iteration = 0 ;
|
||||
double iter_time = 0 ;
|
||||
double matvec_time = 0 ;
|
||||
double norm_res = 0 ;
|
||||
|
||||
// Need input vector to matvec to be owned + received
|
||||
VectorType pAll ( "cg::p" , count_total );
|
||||
|
||||
VectorType p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
|
||||
VectorType r ( "cg::r" , count_owned );
|
||||
VectorType Ap( "cg::Ap", count_owned );
|
||||
|
||||
/* r = b - A * x ; */
|
||||
|
||||
/* p = x */ Kokkos::deep_copy( p , x );
|
||||
/* import p */ import( pAll );
|
||||
/* Ap = A * p */ multiply( count_owned , Ap , A , pAll );
|
||||
/* r = b - Ap */ waxpby( count_owned , r , 1.0 , b , -1.0 , Ap );
|
||||
/* p = r */ Kokkos::deep_copy( p , r );
|
||||
|
||||
double old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
|
||||
|
||||
norm_res = sqrt( old_rdot );
|
||||
iteration = 0 ;
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
Kokkos::Impl::Timer timer;
|
||||
|
||||
while ( tolerance < norm_res && iteration < maximum_iteration ) {
|
||||
|
||||
/* pAp_dot = dot( p , Ap = A * p ) */
|
||||
|
||||
timer.reset();
|
||||
/* import p */ import( pAll );
|
||||
/* Ap = A * p */ multiply( count_owned , Ap , A , pAll );
|
||||
Space::fence();
|
||||
matvec_time += timer.seconds();
|
||||
|
||||
const double pAp_dot = Kokkos::Example::all_reduce( dot( count_owned , p , Ap ) , import.comm );
|
||||
const double alpha = old_rdot / pAp_dot ;
|
||||
|
||||
/* x += alpha * p ; */ waxpby( count_owned , x , alpha, p , 1.0 , x );
|
||||
/* r += -alpha * Ap ; */ waxpby( count_owned , r , -alpha, Ap , 1.0 , r );
|
||||
|
||||
const double r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
|
||||
const double beta = r_dot / old_rdot ;
|
||||
|
||||
/* p = r + beta * p ; */ waxpby( count_owned , p , 1.0 , r , beta , p );
|
||||
|
||||
norm_res = sqrt( old_rdot = r_dot );
|
||||
|
||||
++iteration ;
|
||||
}
|
||||
|
||||
Space::fence();
|
||||
iter_time = wall_clock.seconds();
|
||||
|
||||
if ( 0 != result ) {
|
||||
result->iteration = iteration ;
|
||||
result->iter_time = iter_time ;
|
||||
result->matvec_time = matvec_time ;
|
||||
result->norm_res = norm_res ;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_CG_SOLVE */
|
||||
|
||||
|
||||
@ -1,50 +0,0 @@
|
||||
KOKKOS_PATH = ../..
|
||||
|
||||
vpath %.cpp ${KOKKOS_PATH}/example/fixture ${KOKKOS_PATH}/example/fenl
|
||||
|
||||
EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ${KOKKOS_PATH}/example/fenl/*.hpp)
|
||||
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += \
|
||||
-I${KOKKOS_PATH}/example/common \
|
||||
-I${KOKKOS_PATH}/example/fixture \
|
||||
-I${KOKKOS_PATH}/example/fenl
|
||||
|
||||
|
||||
EXE_EXAMPLE_FENL = KokkosExample_Fenl
|
||||
OBJ_EXAMPLE_FENL = BoxElemPart.o main.o fenl.o
|
||||
|
||||
TARGETS = $(EXE_EXAMPLE_FENL)
|
||||
|
||||
#TEST_TARGETS =
|
||||
|
||||
$(EXE_EXAMPLE_FENL) : $(OBJ_EXAMPLE_FENL) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FENL) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FENL)
|
||||
|
||||
build_all : $(TARGETS)
|
||||
|
||||
|
||||
test : build_all
|
||||
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,117 +0,0 @@
|
||||
/*
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
*/
|
||||
|
||||
#include <HexElement.hpp>
|
||||
#include <fenl_impl.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemQuadratic >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (KOKKOS_HAVE_OPENMP)
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemQuadratic >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemQuadratic >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
@ -1,89 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_FENL_HPP
|
||||
#define KOKKOS_EXAMPLE_FENL_HPP
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <BoxElemPart.hpp>
|
||||
#include <WrapMPI.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
struct Perf {
|
||||
size_t global_elem_count ;
|
||||
size_t global_node_count ;
|
||||
size_t newton_iter_count ;
|
||||
size_t cg_iter_count ;
|
||||
double map_ratio ;
|
||||
double fill_node_set ;
|
||||
double scan_node_count ;
|
||||
double fill_graph_entries ;
|
||||
double sort_graph_entries ;
|
||||
double fill_element_graph ;
|
||||
double create_sparse_matrix ;
|
||||
double fill_time ;
|
||||
double bc_time ;
|
||||
double matvec_time ;
|
||||
double cg_time ;
|
||||
double newton_residual ;
|
||||
double error_max ;
|
||||
|
||||
};
|
||||
|
||||
template < class Device , BoxElemPart::ElemOrder ElemOrder >
|
||||
Perf fenl(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_FENL_HPP */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,598 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP
|
||||
#define KOKKOS_EXAMPLE_FENL_IMPL_HPP
|
||||
|
||||
#include <math.h>
|
||||
|
||||
// Kokkos libraries' headers:
|
||||
|
||||
#include <Kokkos_UnorderedMap.hpp>
|
||||
#include <Kokkos_StaticCrsGraph.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
// Examples headers:
|
||||
|
||||
#include <BoxElemFixture.hpp>
|
||||
#include <VectorImport.hpp>
|
||||
#include <CGSolve.hpp>
|
||||
|
||||
#include <fenl.hpp>
|
||||
#include <fenl_functors.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
inline
|
||||
double maximum( MPI_Comm comm , double local )
|
||||
{
|
||||
double global = local ;
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , comm );
|
||||
#endif
|
||||
return global ;
|
||||
}
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
class ManufacturedSolution {
|
||||
public:
|
||||
|
||||
// Manufactured solution for one dimensional nonlinear PDE
|
||||
//
|
||||
// -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax
|
||||
//
|
||||
// Has an analytic solution of the form:
|
||||
//
|
||||
// T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 )
|
||||
//
|
||||
// Given T_0 and T_L compute K for this analytic solution.
|
||||
//
|
||||
// Two analytic solutions:
|
||||
//
|
||||
// Solution with singularity:
|
||||
// , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
// , b( -1.0 / sqrt(T_zmin) )
|
||||
//
|
||||
// Solution without singularity:
|
||||
// , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
// , b( 1.0 / sqrt(T_zmin) )
|
||||
|
||||
const double zmin ;
|
||||
const double zmax ;
|
||||
const double T_zmin ;
|
||||
const double T_zmax ;
|
||||
const double a ;
|
||||
const double b ;
|
||||
const double K ;
|
||||
|
||||
ManufacturedSolution( const double arg_zmin ,
|
||||
const double arg_zmax ,
|
||||
const double arg_T_zmin ,
|
||||
const double arg_T_zmax )
|
||||
: zmin( arg_zmin )
|
||||
, zmax( arg_zmax )
|
||||
, T_zmin( arg_T_zmin )
|
||||
, T_zmax( arg_T_zmax )
|
||||
, a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
, b( 1.0 / sqrt(T_zmin) )
|
||||
, K( 1.0 / ( 6.0 * a * a ) )
|
||||
{}
|
||||
|
||||
double operator()( const double z ) const
|
||||
{
|
||||
const double tmp = a * ( z - zmin ) + b ;
|
||||
return 1.0 / ( tmp * tmp );
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
template < class Space , BoxElemPart::ElemOrder ElemOrder >
|
||||
Perf fenl(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int use_elems[] )
|
||||
{
|
||||
typedef Kokkos::Example::BoxElemFixture< Space , ElemOrder > FixtureType ;
|
||||
|
||||
typedef Kokkos::Example::CrsMatrix< double , Space >
|
||||
SparseMatrixType ;
|
||||
|
||||
typedef typename SparseMatrixType::StaticCrsGraphType
|
||||
SparseGraphType ;
|
||||
|
||||
typedef Kokkos::Example::FENL::NodeNodeGraph< typename FixtureType::elem_node_type , SparseGraphType , FixtureType::ElemNode >
|
||||
NodeNodeGraphType ;
|
||||
|
||||
typedef Kokkos::Example::FENL::ElementComputation< FixtureType , SparseMatrixType >
|
||||
ElementComputationType ;
|
||||
|
||||
typedef Kokkos::Example::FENL::DirichletComputation< FixtureType , SparseMatrixType >
|
||||
DirichletComputationType ;
|
||||
|
||||
typedef NodeElemGatherFill< ElementComputationType >
|
||||
NodeElemGatherFillType ;
|
||||
|
||||
typedef typename ElementComputationType::vector_type VectorType ;
|
||||
|
||||
typedef Kokkos::Example::VectorImport<
|
||||
typename FixtureType::comm_list_type ,
|
||||
typename FixtureType::send_nodeid_type ,
|
||||
VectorType > ImportType ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const unsigned newton_iteration_limit = 10 ;
|
||||
const double newton_iteration_tolerance = 1e-7 ;
|
||||
const unsigned cg_iteration_limit = 200 ;
|
||||
const double cg_iteration_tolerance = 1e-7 ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const int print_flag = use_print && Kokkos::Impl::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ;
|
||||
|
||||
int comm_rank ;
|
||||
int comm_size ;
|
||||
|
||||
MPI_Comm_rank( comm , & comm_rank );
|
||||
MPI_Comm_size( comm , & comm_size );
|
||||
|
||||
// Decompose by node to avoid mpi-communication for assembly
|
||||
|
||||
const float bubble_x = 1.0 ;
|
||||
const float bubble_y = 1.0 ;
|
||||
const float bubble_z = 1.0 ;
|
||||
|
||||
const FixtureType fixture( BoxElemPart::DecomposeNode , comm_size , comm_rank ,
|
||||
use_elems[0] , use_elems[1] , use_elems[2] ,
|
||||
bubble_x , bubble_y , bubble_z );
|
||||
|
||||
|
||||
{
|
||||
int global_error = ! fixture.ok();
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
int local_error = global_error ;
|
||||
global_error = 0 ;
|
||||
MPI_Allreduce( & local_error , & global_error , 1 , MPI_INT , MPI_SUM , comm );
|
||||
#endif
|
||||
|
||||
if ( global_error ) {
|
||||
throw std::runtime_error(std::string("Error generating finite element fixture"));
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const ImportType comm_nodal_import(
|
||||
comm ,
|
||||
fixture.recv_node() ,
|
||||
fixture.send_node() ,
|
||||
fixture.send_nodeid() ,
|
||||
fixture.node_count_owned() ,
|
||||
fixture.node_count() - fixture.node_count_owned() );
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const double bc_lower_value = 1 ;
|
||||
const double bc_upper_value = 2 ;
|
||||
|
||||
const Kokkos::Example::FENL::ManufacturedSolution
|
||||
manufactured_solution( 0 , 1 , bc_lower_value , bc_upper_value );
|
||||
|
||||
//------------------------------------
|
||||
|
||||
for ( int k = 0 ; k < comm_size && use_print ; ++k ) {
|
||||
if ( k == comm_rank ) {
|
||||
typename FixtureType::node_grid_type::HostMirror
|
||||
h_node_grid = Kokkos::create_mirror_view( fixture.node_grid() );
|
||||
|
||||
typename FixtureType::node_coord_type::HostMirror
|
||||
h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() );
|
||||
|
||||
typename FixtureType::elem_node_type::HostMirror
|
||||
h_elem_node = Kokkos::create_mirror_view( fixture.elem_node() );
|
||||
|
||||
Kokkos::deep_copy( h_node_grid , fixture.node_grid() );
|
||||
Kokkos::deep_copy( h_node_coord , fixture.node_coord() );
|
||||
Kokkos::deep_copy( h_elem_node , fixture.elem_node() );
|
||||
|
||||
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
|
||||
std::cout << "Node grid {" ;
|
||||
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
|
||||
std::cout << " (" << h_node_grid(inode,0)
|
||||
<< "," << h_node_grid(inode,1)
|
||||
<< "," << h_node_grid(inode,2)
|
||||
<< ")" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Node coord {" ;
|
||||
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
|
||||
std::cout << " (" << h_node_coord(inode,0)
|
||||
<< "," << h_node_coord(inode,1)
|
||||
<< "," << h_node_coord(inode,2)
|
||||
<< ")" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Manufactured solution"
|
||||
<< " a[" << manufactured_solution.a << "]"
|
||||
<< " b[" << manufactured_solution.b << "]"
|
||||
<< " K[" << manufactured_solution.K << "]"
|
||||
<< " {" ;
|
||||
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
|
||||
std::cout << " " << manufactured_solution( h_node_coord( inode , 2 ) );
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "ElemNode {" << std::endl ;
|
||||
for ( unsigned ielem = 0 ; ielem < fixture.elem_count() ; ++ielem ) {
|
||||
std::cout << " elem[" << ielem << "]{" ;
|
||||
for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
|
||||
std::cout << " " << h_elem_node(ielem,inode);
|
||||
}
|
||||
std::cout << " }{" ;
|
||||
for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
|
||||
std::cout << " (" << h_node_grid(h_elem_node(ielem,inode),0)
|
||||
<< "," << h_node_grid(h_elem_node(ielem,inode),1)
|
||||
<< "," << h_node_grid(h_elem_node(ielem,inode),2)
|
||||
<< ")" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
}
|
||||
std::cout << "}" << std::endl ;
|
||||
}
|
||||
std::cout.flush();
|
||||
MPI_Barrier( comm );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
Perf perf_stats = Perf() ;
|
||||
|
||||
for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
|
||||
|
||||
Perf perf = Perf() ;
|
||||
|
||||
perf.global_elem_count = fixture.elem_count_global();
|
||||
perf.global_node_count = fixture.node_count_global();
|
||||
|
||||
//----------------------------------
|
||||
// Create the sparse matrix graph and element-to-graph map
|
||||
// from the element->to->node identifier array.
|
||||
// The graph only has rows for the owned nodes.
|
||||
|
||||
typename NodeNodeGraphType::Times graph_times;
|
||||
|
||||
const NodeNodeGraphType
|
||||
mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(), graph_times );
|
||||
|
||||
perf.map_ratio = maximum(comm, graph_times.ratio);
|
||||
perf.fill_node_set = maximum(comm, graph_times.fill_node_set);
|
||||
perf.scan_node_count = maximum(comm, graph_times.scan_node_count);
|
||||
perf.fill_graph_entries = maximum(comm, graph_times.fill_graph_entries);
|
||||
perf.sort_graph_entries = maximum(comm, graph_times.sort_graph_entries);
|
||||
perf.fill_element_graph = maximum(comm, graph_times.fill_element_graph);
|
||||
|
||||
wall_clock.reset();
|
||||
// Create the sparse matrix from the graph:
|
||||
|
||||
SparseMatrixType jacobian( mesh_to_graph.graph );
|
||||
|
||||
Space::fence();
|
||||
|
||||
perf.create_sparse_matrix = maximum( comm , wall_clock.seconds() );
|
||||
|
||||
//----------------------------------
|
||||
|
||||
for ( int k = 0 ; k < comm_size && print_flag ; ++k ) {
|
||||
if ( k == comm_rank ) {
|
||||
const unsigned nrow = jacobian.graph.numRows();
|
||||
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
|
||||
std::cout << "JacobianGraph {" << std::endl ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " row[" << irow << "]{" ;
|
||||
const unsigned entry_end = jacobian.graph.row_map(irow+1);
|
||||
for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) {
|
||||
std::cout << " " << jacobian.graph.entries(entry);
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
}
|
||||
std::cout << "}" << std::endl ;
|
||||
|
||||
std::cout << "ElemGraph {" << std::endl ;
|
||||
for ( unsigned ielem = 0 ; ielem < mesh_to_graph.elem_graph.dimension_0() ; ++ielem ) {
|
||||
std::cout << " elem[" << ielem << "]{" ;
|
||||
for ( unsigned irow = 0 ; irow < mesh_to_graph.elem_graph.dimension_1() ; ++irow ) {
|
||||
std::cout << " {" ;
|
||||
for ( unsigned icol = 0 ; icol < mesh_to_graph.elem_graph.dimension_2() ; ++icol ) {
|
||||
std::cout << " " << mesh_to_graph.elem_graph(ielem,irow,icol);
|
||||
}
|
||||
std::cout << " }" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
}
|
||||
std::cout << "}" << std::endl ;
|
||||
}
|
||||
std::cout.flush();
|
||||
MPI_Barrier( comm );
|
||||
}
|
||||
|
||||
//----------------------------------
|
||||
|
||||
// Allocate solution vector for each node in the mesh and residual vector for each owned node
|
||||
const VectorType nodal_solution( "nodal_solution" , fixture.node_count() );
|
||||
const VectorType nodal_residual( "nodal_residual" , fixture.node_count_owned() );
|
||||
const VectorType nodal_delta( "nodal_delta" , fixture.node_count_owned() );
|
||||
|
||||
// Create element computation functor
|
||||
const ElementComputationType elemcomp(
|
||||
use_atomic ? ElementComputationType( fixture , manufactured_solution.K , nodal_solution ,
|
||||
mesh_to_graph.elem_graph , jacobian , nodal_residual )
|
||||
: ElementComputationType( fixture , manufactured_solution.K , nodal_solution ) );
|
||||
|
||||
const NodeElemGatherFillType gatherfill(
|
||||
use_atomic ? NodeElemGatherFillType()
|
||||
: NodeElemGatherFillType( fixture.elem_node() ,
|
||||
mesh_to_graph.elem_graph ,
|
||||
nodal_residual ,
|
||||
jacobian ,
|
||||
elemcomp.elem_residuals ,
|
||||
elemcomp.elem_jacobians ) );
|
||||
|
||||
// Create boundary condition functor
|
||||
const DirichletComputationType dirichlet(
|
||||
fixture , nodal_solution , jacobian , nodal_residual ,
|
||||
2 /* apply at 'z' ends */ ,
|
||||
manufactured_solution.T_zmin ,
|
||||
manufactured_solution.T_zmax );
|
||||
|
||||
//----------------------------------
|
||||
// Nonlinear Newton iteration:
|
||||
|
||||
double residual_norm_init = 0 ;
|
||||
|
||||
for ( perf.newton_iter_count = 0 ;
|
||||
perf.newton_iter_count < newton_iteration_limit ;
|
||||
++perf.newton_iter_count ) {
|
||||
|
||||
//--------------------------------
|
||||
|
||||
comm_nodal_import( nodal_solution );
|
||||
|
||||
//--------------------------------
|
||||
// Element contributions to residual and jacobian
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
Kokkos::deep_copy( nodal_residual , double(0) );
|
||||
Kokkos::deep_copy( jacobian.coeff , double(0) );
|
||||
|
||||
elemcomp.apply();
|
||||
|
||||
if ( ! use_atomic ) {
|
||||
gatherfill.apply();
|
||||
}
|
||||
|
||||
Space::fence();
|
||||
perf.fill_time = maximum( comm , wall_clock.seconds() );
|
||||
|
||||
//--------------------------------
|
||||
// Apply boundary conditions
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
dirichlet.apply();
|
||||
|
||||
Space::fence();
|
||||
perf.bc_time = maximum( comm , wall_clock.seconds() );
|
||||
|
||||
//--------------------------------
|
||||
// Evaluate convergence
|
||||
|
||||
const double residual_norm =
|
||||
std::sqrt(
|
||||
Kokkos::Example::all_reduce(
|
||||
Kokkos::Example::dot( fixture.node_count_owned() , nodal_residual, nodal_residual ) , comm ) );
|
||||
|
||||
perf.newton_residual = residual_norm ;
|
||||
|
||||
if ( 0 == perf.newton_iter_count ) { residual_norm_init = residual_norm ; }
|
||||
|
||||
if ( residual_norm < residual_norm_init * newton_iteration_tolerance ) { break ; }
|
||||
|
||||
//--------------------------------
|
||||
// Solve for nonlinear update
|
||||
|
||||
CGSolveResult cg_result ;
|
||||
|
||||
Kokkos::Example::cgsolve( comm_nodal_import
|
||||
, jacobian
|
||||
, nodal_residual
|
||||
, nodal_delta
|
||||
, cg_iteration_limit
|
||||
, cg_iteration_tolerance
|
||||
, & cg_result
|
||||
);
|
||||
|
||||
// Update solution vector
|
||||
|
||||
Kokkos::Example::waxpby( fixture.node_count_owned() , nodal_solution , -1.0 , nodal_delta , 1.0 , nodal_solution );
|
||||
|
||||
perf.cg_iter_count += cg_result.iteration ;
|
||||
perf.matvec_time += cg_result.matvec_time ;
|
||||
perf.cg_time += cg_result.iter_time ;
|
||||
|
||||
//--------------------------------
|
||||
|
||||
if ( print_flag ) {
|
||||
const double delta_norm =
|
||||
std::sqrt(
|
||||
Kokkos::Example::all_reduce(
|
||||
Kokkos::Example::dot( fixture.node_count_owned() , nodal_delta, nodal_delta ) , comm ) );
|
||||
|
||||
if ( 0 == comm_rank ) {
|
||||
std::cout << "Newton iteration[" << perf.newton_iter_count << "]"
|
||||
<< " residual[" << perf.newton_residual << "]"
|
||||
<< " update[" << delta_norm << "]"
|
||||
<< " cg_iteration[" << cg_result.iteration << "]"
|
||||
<< " cg_residual[" << cg_result.norm_res << "]"
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
for ( int k = 0 ; k < comm_size ; ++k ) {
|
||||
if ( k == comm_rank ) {
|
||||
const unsigned nrow = jacobian.graph.numRows();
|
||||
|
||||
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
|
||||
std::cout << "Residual {" ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " " << nodal_residual(irow);
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Delta {" ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " " << nodal_delta(irow);
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Solution {" ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " " << nodal_solution(irow);
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Jacobian[ "
|
||||
<< jacobian.graph.numRows() << " x " << Kokkos::maximum_entry( jacobian.graph )
|
||||
<< " ] {" << std::endl ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " {" ;
|
||||
const unsigned entry_end = jacobian.graph.row_map(irow+1);
|
||||
for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) {
|
||||
std::cout << " (" << jacobian.graph.entries(entry)
|
||||
<< "," << jacobian.coeff(entry)
|
||||
<< ")" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
}
|
||||
std::cout << "}" << std::endl ;
|
||||
}
|
||||
std::cout.flush();
|
||||
MPI_Barrier( comm );
|
||||
}
|
||||
}
|
||||
//--------------------------------
|
||||
}
|
||||
|
||||
// Evaluate solution error
|
||||
|
||||
if ( 0 == itrial ) {
|
||||
const typename FixtureType::node_coord_type::HostMirror
|
||||
h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() );
|
||||
|
||||
const typename VectorType::HostMirror
|
||||
h_nodal_solution = Kokkos::create_mirror_view( nodal_solution );
|
||||
|
||||
Kokkos::deep_copy( h_node_coord , fixture.node_coord() );
|
||||
Kokkos::deep_copy( h_nodal_solution , nodal_solution );
|
||||
|
||||
double error_max = 0 ;
|
||||
for ( unsigned inode = 0 ; inode < fixture.node_count_owned() ; ++inode ) {
|
||||
const double answer = manufactured_solution( h_node_coord( inode , 2 ) );
|
||||
const double error = ( h_nodal_solution(inode) - answer ) / answer ;
|
||||
if ( error_max < fabs( error ) ) { error_max = fabs( error ); }
|
||||
}
|
||||
|
||||
perf.error_max = std::sqrt( Kokkos::Example::all_reduce_max( error_max , comm ) );
|
||||
|
||||
perf_stats = perf ;
|
||||
}
|
||||
else {
|
||||
perf_stats.fill_node_set = std::min( perf_stats.fill_node_set , perf.fill_node_set );
|
||||
perf_stats.scan_node_count = std::min( perf_stats.scan_node_count , perf.scan_node_count );
|
||||
perf_stats.fill_graph_entries = std::min( perf_stats.fill_graph_entries , perf.fill_graph_entries );
|
||||
perf_stats.sort_graph_entries = std::min( perf_stats.sort_graph_entries , perf.sort_graph_entries );
|
||||
perf_stats.fill_element_graph = std::min( perf_stats.fill_element_graph , perf.fill_element_graph );
|
||||
perf_stats.create_sparse_matrix = std::min( perf_stats.create_sparse_matrix , perf.create_sparse_matrix );
|
||||
perf_stats.fill_time = std::min( perf_stats.fill_time , perf.fill_time );
|
||||
perf_stats.bc_time = std::min( perf_stats.bc_time , perf.bc_time );
|
||||
perf_stats.cg_time = std::min( perf_stats.cg_time , perf.cg_time );
|
||||
}
|
||||
}
|
||||
|
||||
return perf_stats ;
|
||||
}
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP */
|
||||
|
||||
@ -1,422 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <strings.h>
|
||||
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <WrapMPI.hpp>
|
||||
#include <fenl.hpp>
|
||||
|
||||
// For vtune
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
enum { CMD_USE_THREADS = 0
|
||||
, CMD_USE_NUMA
|
||||
, CMD_USE_CORE_PER_NUMA
|
||||
, CMD_USE_CUDA
|
||||
, CMD_USE_OPENMP
|
||||
, CMD_USE_CUDA_DEV
|
||||
, CMD_USE_FIXTURE_X
|
||||
, CMD_USE_FIXTURE_Y
|
||||
, CMD_USE_FIXTURE_Z
|
||||
, CMD_USE_FIXTURE_BEGIN
|
||||
, CMD_USE_FIXTURE_END
|
||||
, CMD_USE_FIXTURE_QUADRATIC
|
||||
, CMD_USE_ATOMIC
|
||||
, CMD_USE_TRIALS
|
||||
, CMD_VTUNE
|
||||
, CMD_PRINT
|
||||
, CMD_ECHO
|
||||
, CMD_ERROR
|
||||
, CMD_COUNT };
|
||||
|
||||
void print_cmdline( std::ostream & s , const int cmd[] )
|
||||
{
|
||||
if ( cmd[ CMD_USE_THREADS ] ) {
|
||||
s << " Threads(" << cmd[ CMD_USE_THREADS ]
|
||||
<< ") NUMA(" << cmd[ CMD_USE_NUMA ]
|
||||
<< ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ]
|
||||
<< ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_OPENMP ] ) {
|
||||
s << " OpenMP(" << cmd[ CMD_USE_OPENMP ]
|
||||
<< ") NUMA(" << cmd[ CMD_USE_NUMA ]
|
||||
<< ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ]
|
||||
<< ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_FIXTURE_X ] ) {
|
||||
s << " Fixture(" << cmd[ CMD_USE_FIXTURE_X ]
|
||||
<< "x" << cmd[ CMD_USE_FIXTURE_Y ]
|
||||
<< "x" << cmd[ CMD_USE_FIXTURE_Z ]
|
||||
<< ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) {
|
||||
s << " Fixture( " << cmd[ CMD_USE_FIXTURE_BEGIN ]
|
||||
<< " .. " << cmd[ CMD_USE_FIXTURE_END ]
|
||||
<< " )" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) {
|
||||
s << " Quadratic-Element" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_CUDA ] ) {
|
||||
s << " CUDA(" << cmd[ CMD_USE_CUDA_DEV ] << ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_ATOMIC ] ) {
|
||||
s << " ATOMIC" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_TRIALS ] ) {
|
||||
s << " TRIALS(" << cmd[ CMD_USE_TRIALS ] << ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_VTUNE ] ) {
|
||||
s << " VTUNE" ;
|
||||
}
|
||||
if ( cmd[ CMD_PRINT ] ) {
|
||||
s << " PRINT" ;
|
||||
}
|
||||
s << std::endl ;
|
||||
}
|
||||
|
||||
void print_perf_value( std::ostream & s , const std::vector<size_t> & widths, const Kokkos::Example::FENL::Perf & perf )
|
||||
{
|
||||
int i=0;
|
||||
s << std::setw(widths[i++]) << perf.global_elem_count << " ,";
|
||||
s << std::setw(widths[i++]) << perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << perf.newton_iter_count << " ,";
|
||||
s << std::setw(widths[i++]) << perf.cg_iter_count << " ,";
|
||||
s << std::setw(widths[i++]) << perf.map_ratio << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.fill_node_set * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.scan_node_count * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.fill_graph_entries * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.sort_graph_entries * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.fill_element_graph * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.create_sparse_matrix * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.fill_time * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.bc_time * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( ( perf.matvec_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( ( perf.cg_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i]) << perf.error_max;
|
||||
s << std::endl ;
|
||||
}
|
||||
|
||||
template< class Device , Kokkos::Example::BoxElemPart::ElemOrder ElemOrder >
|
||||
void run( MPI_Comm comm , const int cmd[] )
|
||||
{
|
||||
int comm_rank = 0 ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Comm_rank( comm , & comm_rank );
|
||||
#else
|
||||
comm = 0 ;
|
||||
#endif
|
||||
|
||||
|
||||
if ( 0 == comm_rank ) {
|
||||
if ( cmd[ CMD_USE_THREADS ] ) { std::cout << "THREADS , " << cmd[ CMD_USE_THREADS ] ; }
|
||||
else if ( cmd[ CMD_USE_OPENMP ] ) { std::cout << "OPENMP , " << cmd[ CMD_USE_OPENMP ] ; }
|
||||
else if ( cmd[ CMD_USE_CUDA ] ) { std::cout << "CUDA" ; }
|
||||
|
||||
if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) { std::cout << " , QUADRATIC-ELEMENT" ; }
|
||||
else { std::cout << " , LINEAR-ELEMENT" ; }
|
||||
|
||||
if ( cmd[ CMD_USE_ATOMIC ] ) { std::cout << " , USING ATOMICS" ; }
|
||||
}
|
||||
|
||||
std::vector< std::pair<std::string,std::string> > headers;
|
||||
|
||||
|
||||
headers.push_back(std::make_pair("ELEMS","count"));
|
||||
headers.push_back(std::make_pair("NODES","count"));
|
||||
headers.push_back(std::make_pair("NEWTON","iter"));
|
||||
headers.push_back(std::make_pair("CG","iter"));
|
||||
headers.push_back(std::make_pair("MAP_RATIO","ratio"));
|
||||
headers.push_back(std::make_pair("SET_FILL/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("SCAN/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("GRAPH_FILL/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("SORT/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("ELEM_GRAPH_FILL/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("MATRIX_CREATE/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("MATRIX_FILL/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("BOUNDARY/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("MAT_VEC/ITER/ROW","millisec"));
|
||||
headers.push_back(std::make_pair("CG/ITER/ROW","millisec"));
|
||||
headers.push_back(std::make_pair("ERROR","ratio"));
|
||||
|
||||
// find print widths
|
||||
size_t min_width = 10;
|
||||
std::vector< size_t > widths(headers.size());
|
||||
for (size_t i=0, ie=headers.size(); i<ie; ++i)
|
||||
widths[i] = std::max(min_width, headers[i].first.size()+1);
|
||||
|
||||
// print column headers
|
||||
if ( 0 == comm_rank ) {
|
||||
std::cout << std::endl ;
|
||||
for (size_t i=0; i<headers.size(); ++i)
|
||||
std::cout << std::setw(widths[i]) << headers[i].first << " ,";
|
||||
std::cout << "\b\b " << std::endl;
|
||||
for (size_t i=0; i<headers.size(); ++i)
|
||||
std::cout << std::setw(widths[i]) << headers[i].second << " ,";
|
||||
std::cout << "\b\b " << std::endl;
|
||||
|
||||
std::cout << std::scientific;
|
||||
std::cout.precision(3);
|
||||
}
|
||||
|
||||
if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) {
|
||||
for ( int i = cmd[CMD_USE_FIXTURE_BEGIN] ; i < cmd[CMD_USE_FIXTURE_END] * 2 ; i *= 2 ) {
|
||||
int nelem[3] ;
|
||||
nelem[0] = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
|
||||
nelem[1] = 1 + nelem[0] ;
|
||||
nelem[2] = 2 * nelem[0] ;
|
||||
|
||||
const Kokkos::Example::FENL::Perf perf =
|
||||
cmd[ CMD_USE_FIXTURE_QUADRATIC ]
|
||||
? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
|
||||
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
|
||||
: Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear >
|
||||
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
|
||||
;
|
||||
|
||||
if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf );
|
||||
}
|
||||
}
|
||||
else {
|
||||
int nelem[3] = { cmd[ CMD_USE_FIXTURE_X ] ,
|
||||
cmd[ CMD_USE_FIXTURE_Y ] ,
|
||||
cmd[ CMD_USE_FIXTURE_Z ] };
|
||||
|
||||
const Kokkos::Example::FENL::Perf perf =
|
||||
cmd[ CMD_USE_FIXTURE_QUADRATIC ]
|
||||
? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
|
||||
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
|
||||
: Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear >
|
||||
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
|
||||
;
|
||||
|
||||
if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
int main( int argc , char ** argv )
|
||||
{
|
||||
int comm_rank = 0 ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Init( & argc , & argv );
|
||||
MPI_Comm comm = MPI_COMM_WORLD ;
|
||||
MPI_Comm_rank( comm , & comm_rank );
|
||||
#else
|
||||
MPI_Comm comm = 0 ;
|
||||
(void) comm ; // suppress warning
|
||||
#endif
|
||||
|
||||
int cmdline[ CMD_COUNT ] ;
|
||||
|
||||
for ( int i = 0 ; i < CMD_COUNT ; ++i ) cmdline[i] = 0 ;
|
||||
|
||||
if ( 0 == comm_rank ) {
|
||||
for ( int i = 1 ; i < argc ; ++i ) {
|
||||
if ( 0 == strcasecmp( argv[i] , "threads" ) ) {
|
||||
cmdline[ CMD_USE_THREADS ] = atoi( argv[++i] );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "openmp" ) ) {
|
||||
cmdline[ CMD_USE_OPENMP ] = atoi( argv[++i] );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "cores" ) ) {
|
||||
sscanf( argv[++i] , "%dx%d" ,
|
||||
cmdline + CMD_USE_NUMA ,
|
||||
cmdline + CMD_USE_CORE_PER_NUMA );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "cuda" ) ) {
|
||||
cmdline[ CMD_USE_CUDA ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "cuda-dev" ) ) {
|
||||
cmdline[ CMD_USE_CUDA ] = 1 ;
|
||||
cmdline[ CMD_USE_CUDA_DEV ] = atoi( argv[++i] ) ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "fixture" ) ) {
|
||||
sscanf( argv[++i] , "%dx%dx%d" ,
|
||||
cmdline + CMD_USE_FIXTURE_X ,
|
||||
cmdline + CMD_USE_FIXTURE_Y ,
|
||||
cmdline + CMD_USE_FIXTURE_Z );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "fixture-range" ) ) {
|
||||
sscanf( argv[++i] , "%d..%d" ,
|
||||
cmdline + CMD_USE_FIXTURE_BEGIN ,
|
||||
cmdline + CMD_USE_FIXTURE_END );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "fixture-quadratic" ) ) {
|
||||
cmdline[ CMD_USE_FIXTURE_QUADRATIC ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "atomic" ) ) {
|
||||
cmdline[ CMD_USE_ATOMIC ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "trials" ) ) {
|
||||
cmdline[ CMD_USE_TRIALS ] = atoi( argv[++i] ) ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "vtune" ) ) {
|
||||
cmdline[ CMD_VTUNE ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "print" ) ) {
|
||||
cmdline[ CMD_PRINT ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "echo" ) ) {
|
||||
cmdline[ CMD_ECHO ] = 1 ;
|
||||
}
|
||||
else {
|
||||
cmdline[ CMD_ERROR ] = 1 ;
|
||||
|
||||
std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( cmdline[ CMD_ECHO ] && 0 == comm_rank ) { print_cmdline( std::cout , cmdline ); }
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Bcast( cmdline , CMD_COUNT , MPI_INT , 0 , comm );
|
||||
#endif
|
||||
|
||||
if ( cmdline[ CMD_VTUNE ] ) {
|
||||
std::stringstream cmd;
|
||||
pid_t my_os_pid=getpid();
|
||||
const std::string vtune_loc =
|
||||
"/usr/local/intel/vtune_amplifier_xe_2013/bin64/amplxe-cl";
|
||||
const std::string output_dir = "./vtune/vtune.";
|
||||
const int p_rank = comm_rank;
|
||||
cmd << vtune_loc
|
||||
<< " -collect hotspots -result-dir " << output_dir << p_rank
|
||||
<< " -target-pid " << my_os_pid << " &";
|
||||
if (p_rank == 0)
|
||||
std::cout << cmd.str() << std::endl;
|
||||
system(cmd.str().c_str());
|
||||
system("sleep 10");
|
||||
}
|
||||
|
||||
if ( ! cmdline[ CMD_ERROR ] && ! cmdline[ CMD_ECHO ] ) {
|
||||
|
||||
if ( ! cmdline[ CMD_USE_TRIALS ] ) { cmdline[ CMD_USE_TRIALS ] = 1 ; }
|
||||
|
||||
if ( ! cmdline[ CMD_USE_FIXTURE_X ] && ! cmdline[ CMD_USE_FIXTURE_BEGIN ] ) {
|
||||
cmdline[ CMD_USE_FIXTURE_X ] = 2 ;
|
||||
cmdline[ CMD_USE_FIXTURE_Y ] = 2 ;
|
||||
cmdline[ CMD_USE_FIXTURE_Z ] = 2 ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
|
||||
if ( cmdline[ CMD_USE_THREADS ] ) {
|
||||
|
||||
if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) {
|
||||
Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] ,
|
||||
cmdline[ CMD_USE_NUMA ] ,
|
||||
cmdline[ CMD_USE_CORE_PER_NUMA ] );
|
||||
}
|
||||
else {
|
||||
Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] );
|
||||
}
|
||||
|
||||
run< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
|
||||
|
||||
Kokkos::Threads::finalize();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
|
||||
if ( cmdline[ CMD_USE_OPENMP ] ) {
|
||||
|
||||
if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) {
|
||||
Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] ,
|
||||
cmdline[ CMD_USE_NUMA ] ,
|
||||
cmdline[ CMD_USE_CORE_PER_NUMA ] );
|
||||
}
|
||||
else {
|
||||
Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] );
|
||||
}
|
||||
|
||||
run< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
|
||||
|
||||
Kokkos::OpenMP::finalize();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if ( cmdline[ CMD_USE_CUDA ] ) {
|
||||
// Use the last device:
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cmdline[ CMD_USE_CUDA_DEV ] ) );
|
||||
|
||||
run< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
|
||||
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
|
||||
return cmdline[ CMD_ERROR ] ? -1 : 0 ;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user