Updating kokkos lib

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14918 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
stamoor
2016-05-02 22:06:50 +00:00
parent c5d0c55bee
commit 0a1b765248
411 changed files with 0 additions and 133424 deletions

View File

@ -1,20 +0,0 @@
# Subpackage name must match what appears in kokkos/cmake/Dependencies.cmake
#
TRIBITS_SUBPACKAGE(Example)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(query_device)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(fixture)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(feint)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(fenl)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(multi_fem)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(md_skeleton)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(global_2_local_ids)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(grow_array)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(sort_array)
if(NOT Kokkos_ENABLE_Cuda)
TRIBITS_ADD_EXAMPLE_DIRECTORIES(tutorial)
endif()
TRIBITS_SUBPACKAGE_POSTPROCESS()

View File

@ -1,16 +0,0 @@
This directory contains example application proxies that use different
parts of Kokkos. If you are looking for the FENL ("finite element
nonlinear" solve) example, it has moved into the LinAlg subpackage of
Tpetra.
MANIFEST:
- common: Header files used by different examples
- feint: Unstructured finite-element method
- fixture: Some other finite-element method example
- global_2_local_ids: Example of global-to-local index lookup
- grow_array: Parallel dynamic memory allocation
- md_skeleton: Molecular dynamics
- query_device: Kokkos' HWLOC wrapper for querying device topology
- sort_array: Parallel sort
- tutorial: Kokkos tutorial (START HERE)

View File

@ -1,4 +0,0 @@
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
LIB_REQUIRED_DEP_PACKAGES KokkosCore KokkosContainers KokkosAlgorithms
TEST_OPTIONAL_DEP_TPLS CUSPARSE MKL
)

View File

@ -1,294 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_VECTORIMPORT_HPP
#define KOKKOS_VECTORIMPORT_HPP
#include <utility>
#include <limits>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <Kokkos_Core.hpp>
#include <WrapMPI.hpp>
namespace Kokkos {
namespace Example {
template< class CommMessageType , class CommIdentType , class VectorType >
struct VectorImport ;
} // namespace Example
} // namespace Kokkos
#if ! defined( KOKKOS_HAVE_MPI )
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
template< class CommMessageType , class CommIdentType , class VectorType >
struct VectorImport {
const MPI_Comm comm ;
const unsigned count_owned ;
const unsigned count_receive ;
VectorImport( MPI_Comm arg_comm ,
const CommMessageType & ,
const CommMessageType & ,
const CommIdentType & ,
const unsigned arg_count_owned ,
const unsigned arg_count_receive )
: comm( arg_comm )
, count_owned( arg_count_owned )
, count_receive( arg_count_receive )
{}
inline
void operator()( const VectorType & ) const {}
};
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#else /* defined( KOKKOS_HAVE_MPI ) */
namespace Kokkos {
namespace Example {
template< class CommMessageType , class CommIdentType , class VectorType >
class VectorImport {
private:
// rank == 1 or array_layout == LayoutRight
enum { OK = Kokkos::Impl::StaticAssert<
( VectorType::rank == 1 ) ||
Kokkos::Impl::is_same< typename VectorType::array_layout , Kokkos::LayoutRight >::value
>::value };
typedef typename VectorType::HostMirror HostVectorType ;
enum { ReceiveInPlace =
Kokkos::Impl::is_same< typename VectorType::memory_space ,
typename HostVectorType::memory_space >::value };
const CommMessageType recv_msg ;
const CommMessageType send_msg ;
const CommIdentType send_nodeid ;
VectorType send_buffer ;
HostVectorType host_send_buffer ;
HostVectorType host_recv_buffer ;
unsigned chunk ;
public:
const MPI_Comm comm ;
const unsigned count_owned ;
const unsigned count_receive ;
struct Pack {
typedef typename VectorType::execution_space execution_space ;
const CommIdentType index ;
const VectorType source ;
const VectorType buffer ;
KOKKOS_INLINE_FUNCTION
void operator()( const unsigned i ) const
{ buffer( i ) = source( index(i) ); }
Pack( const CommIdentType & arg_index ,
const VectorType & arg_source ,
const VectorType & arg_buffer )
: index( arg_index )
, source( arg_source )
, buffer( arg_buffer )
{
Kokkos::parallel_for( index.dimension_0() , *this );
execution_space::fence();
}
};
VectorImport( MPI_Comm arg_comm ,
const CommMessageType & arg_recv_msg ,
const CommMessageType & arg_send_msg ,
const CommIdentType & arg_send_nodeid ,
const unsigned arg_count_owned ,
const unsigned arg_count_receive )
: recv_msg( arg_recv_msg )
, send_msg( arg_send_msg )
, send_nodeid( arg_send_nodeid )
, send_buffer()
, host_send_buffer()
, host_recv_buffer()
, comm( arg_comm )
, count_owned( arg_count_owned )
, count_receive( arg_count_receive )
{
if ( ! ReceiveInPlace ) {
host_recv_buffer = HostVectorType("recv_buffer",count_receive);
}
unsigned send_count = 0 ;
for ( unsigned i = 0 ; i < send_msg.dimension_0() ; ++i ) { send_count += send_msg(i,1); }
send_buffer = VectorType("send_buffer",send_count);
host_send_buffer = Kokkos::create_mirror_view( send_buffer );
}
inline
void operator()( const VectorType & v ) const
{
typedef typename VectorType::value_type scalar_type ;
const int mpi_tag = 42 ;
const unsigned chunk = v.dimension_1();
// Subvector for receives
const std::pair<unsigned,unsigned> recv_range( count_owned , count_owned + count_receive );
const VectorType recv_vector = Kokkos::subview( v , recv_range );
std::vector< MPI_Request > recv_request( recv_msg.dimension_0() , MPI_REQUEST_NULL );
{ // Post receives
scalar_type * ptr =
ReceiveInPlace ? recv_vector.ptr_on_device() : host_recv_buffer.ptr_on_device();
for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) {
const int proc = recv_msg(i,0);
const int count = recv_msg(i,1) * chunk ;
MPI_Irecv( ptr , count * sizeof(scalar_type) , MPI_BYTE ,
proc , mpi_tag , comm , & recv_request[i] );
ptr += count ;
}
}
MPI_Barrier( comm );
{ // Pack and send
const Pack pack( send_nodeid , v , send_buffer );
Kokkos::deep_copy( host_send_buffer , send_buffer );
scalar_type * ptr = host_send_buffer.ptr_on_device();
for ( size_t i = 0 ; i < send_msg.dimension_0() ; ++i ) {
const int proc = send_msg(i,0);
const int count = send_msg(i,1) * chunk ;
// MPI_Ssend blocks until
// (1) a receive is matched for the message and
// (2) the send buffer can be re-used.
//
// It is suggested that MPI_Ssend will have the best performance:
// http://www.mcs.anl.gov/research/projects/mpi/sendmode.html .
MPI_Ssend( ptr ,
count * sizeof(scalar_type) , MPI_BYTE ,
proc , mpi_tag , comm );
ptr += count ;
}
}
// Wait for receives and verify:
for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) {
MPI_Status recv_status ;
int recv_which = 0 ;
int recv_size = 0 ;
MPI_Waitany( recv_msg.dimension_0() , & recv_request[0] , & recv_which , & recv_status );
const int recv_proc = recv_status.MPI_SOURCE ;
MPI_Get_count( & recv_status , MPI_BYTE , & recv_size );
// Verify message properly received:
const int expected_proc = recv_msg(recv_which,0);
const int expected_size = recv_msg(recv_which,1) * chunk * sizeof(scalar_type);
if ( ( expected_proc != recv_proc ) ||
( expected_size != recv_size ) ) {
int local_rank = 0 ;
MPI_Comm_rank( comm , & local_rank );
std::ostringstream msg ;
msg << "VectorImport error:"
<< " P" << local_rank
<< " received from P" << recv_proc
<< " size " << recv_size
<< " expected " << expected_size
<< " from P" << expected_proc ;
throw std::runtime_error( msg.str() );
}
}
// Copy received data to device memory.
if ( ! ReceiveInPlace ) { Kokkos::deep_copy( recv_vector , host_recv_buffer ); }
}
};
} // namespace Example
} // namespace Kokkos
#endif
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_VECTORIMPORT_HPP */

View File

@ -1,103 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_WRAP_MPI
#define KOKKOS_EXAMPLE_WRAP_MPI
#include <Kokkos_Macros.hpp>
#include <string>
#if defined( KOKKOS_HAVE_MPI )
#include <mpi.h>
namespace Kokkos {
namespace Example {
inline
double all_reduce( double value , MPI_Comm comm )
{
double local = value ;
MPI_Allreduce( & local , & value , 1 , MPI_DOUBLE , MPI_SUM , comm );
return value ;
}
inline
double all_reduce_max( double value , MPI_Comm comm )
{
double local = value ;
MPI_Allreduce( & local , & value , 1 , MPI_DOUBLE , MPI_MAX , comm );
return value ;
}
} // namespace Example
} // namespace Kokkos
#elif ! defined( KOKKOS_HAVE_MPI )
/* Wrap the the MPI_Comm type and heavily used MPI functions
* to reduce the number of '#if defined( KOKKOS_HAVE_MPI )'
* blocks which have to be sprinkled throughout the examples.
*/
typedef int MPI_Comm ;
inline int MPI_Comm_size( MPI_Comm , int * size ) { *size = 1 ; return 0 ; }
inline int MPI_Comm_rank( MPI_Comm , int * rank ) { *rank = 0 ; return 0 ; }
inline int MPI_Barrier( MPI_Comm ) { return 0; }
namespace Kokkos {
namespace Example {
inline
double all_reduce( double value , MPI_Comm ) { return value ; }
inline
double all_reduce_max( double value , MPI_Comm ) { return value ; }
} // namespace Example
} // namespace Kokkos
#endif /* ! defined( KOKKOS_HAVE_MPI ) */
#endif /* #ifndef KOKKOS_EXAMPLE_WRAP_MPI */

View File

@ -1,18 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture)
SET(SOURCES "")
FILE(GLOB SOURCES *.cpp)
LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp)
TRIBITS_ADD_EXECUTABLE(
feint
SOURCES ${SOURCES}
COMM serial mpi
)

View File

@ -1,489 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP
#define KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP
#include <stdio.h>
#include <Kokkos_Core.hpp>
#include <BoxElemFixture.hpp>
namespace Kokkos {
namespace Example {
/** \brief Numerically integrate a function on a finite element mesh and
* project the integrated values to nodes.
*/
template< class FixtureType ,
class FunctionType ,
bool PerformScatterAddWithAtomic >
struct FiniteElementIntegration ;
// Specialized for an 'Example::BoxElemFixture' finite element mesh
template< class Device , BoxElemPart::ElemOrder ElemOrder , class GridMap ,
class FunctionType ,
bool PerformScatterAddWithAtomic >
struct FiniteElementIntegration<
Kokkos::Example::BoxElemFixture< Device , ElemOrder , GridMap > ,
FunctionType ,
PerformScatterAddWithAtomic >
{
// Element mesh types:
typedef Kokkos::Example::BoxElemFixture< Device , ElemOrder >
BoxFixtureType ;
typedef Kokkos::Example::HexElement_Data< BoxFixtureType::ElemNode >
HexElemDataType ;
enum { ElemNodeCount = HexElemDataType::element_node_count };
enum { IntegrationCount = HexElemDataType::integration_count };
enum { ValueCount = FunctionType::value_count };
// Dictionary of view types:
typedef View<int*, Device> ElemErrorType ;
typedef View<double*[ElemNodeCount][ValueCount],Device> ElemValueType ;
typedef View<double*[ValueCount], Device> NodeValueType ;
// Data members for this Functor:
const HexElemDataType m_hex_elem_data ; ///< Master element
const BoxFixtureType m_box_fixture ; ///< Unstructured mesh data
const FunctionType m_function ; ///< Function to integrate
const ElemErrorType m_elem_error ; ///< Flags for element errors
const ElemValueType m_elem_integral ; ///< Per-element quantities
const NodeValueType m_node_lumped ; ///< Quantities lumped to nodes
//----------------------------------------
FiniteElementIntegration(
const BoxFixtureType & box_fixture ,
const FunctionType & function )
: m_hex_elem_data()
, m_box_fixture( box_fixture ) // Shallow copy of the mesh fixture
, m_function( function )
, m_elem_error( "elem_error" , box_fixture.elem_count() )
, m_elem_integral( "elem_integral" , box_fixture.elem_count() )
, m_node_lumped( "node_lumped" , box_fixture.node_count() )
{}
//----------------------------------------
// Device for parallel dispatch.
typedef typename Device::execution_space execution_space;
// Value type for global parallel reduction.
struct value_type {
double value[ ValueCount ]; ///< Integrated quantitie
int error ; ///< Element inversion flag
};
//----------------------------------------
// Transform element interpolation function gradients and
// compute determinant of spatial jacobian.
KOKKOS_INLINE_FUNCTION
float transform_gradients(
const float grad[][ ElemNodeCount ] , // Gradient of bases master element
const double coord[][ ElemNodeCount ] ,
float dpsi[][ ElemNodeCount ] ) const
{
enum { TensorDim = 9 };
enum { j11 = 0 , j12 = 1 , j13 = 2 ,
j21 = 3 , j22 = 4 , j23 = 5 ,
j31 = 6 , j32 = 7 , j33 = 8 };
// Temporary for jacobian accumulation is double for summation accuracy.
double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for( int i = 0; i < ElemNodeCount ; ++i ) {
J[j11] += grad[0][i] * coord[0][i] ;
J[j12] += grad[0][i] * coord[1][i] ;
J[j13] += grad[0][i] * coord[2][i] ;
J[j21] += grad[1][i] * coord[0][i] ;
J[j22] += grad[1][i] * coord[1][i] ;
J[j23] += grad[1][i] * coord[2][i] ;
J[j31] += grad[2][i] * coord[0][i] ;
J[j32] += grad[2][i] * coord[1][i] ;
J[j33] += grad[2][i] * coord[2][i] ;
}
// Inverse jacobian, compute as double and store as float.
float invJ[ TensorDim ] = {
float( J[j22] * J[j33] - J[j23] * J[j32] ) ,
float( J[j13] * J[j32] - J[j12] * J[j33] ) ,
float( J[j12] * J[j23] - J[j13] * J[j22] ) ,
float( J[j23] * J[j31] - J[j21] * J[j33] ) ,
float( J[j11] * J[j33] - J[j13] * J[j31] ) ,
float( J[j13] * J[j21] - J[j11] * J[j23] ) ,
float( J[j21] * J[j32] - J[j22] * J[j31] ) ,
float( J[j12] * J[j31] - J[j11] * J[j32] ) ,
float( J[j11] * J[j22] - J[j12] * J[j21] ) };
const float detJ = J[j11] * invJ[j11] +
J[j21] * invJ[j12] +
J[j31] * invJ[j13] ;
{
const float detJinv = 1.0 / detJ ;
for ( int i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
}
// Transform gradients:
for ( int i = 0; i < ElemNodeCount ; ++i ) {
dpsi[0][i] = grad[0][i] * invJ[j11] +
grad[1][i] * invJ[j12] +
grad[2][i] * invJ[j13];
dpsi[1][i] = grad[0][i] * invJ[j21] +
grad[1][i] * invJ[j22] +
grad[2][i] * invJ[j23];
dpsi[2][i] = grad[0][i] * invJ[j31] +
grad[1][i] * invJ[j32] +
grad[2][i] * invJ[j33];
}
return detJ ;
}
// Functor's function called for each element in the mesh
// to numerically integrate the function and add element quantities
// to the global integral.
KOKKOS_INLINE_FUNCTION
void operator()( const int ielem , value_type & update ) const
{
// Local temporaries for gathering nodal data.
double node_coord[3][ ElemNodeCount ];
int inode[ ElemNodeCount ] ;
// Gather indices of element's node from global memory to local memory.
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
inode[i] = m_box_fixture.elem_node( ielem , i );
}
// Gather coordinates of element's nodes from global memory to local memory.
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
node_coord[0][i] = m_box_fixture.node_coord( inode[i] , 0 );
node_coord[1][i] = m_box_fixture.node_coord( inode[i] , 1 );
node_coord[2][i] = m_box_fixture.node_coord( inode[i] , 2 );
}
// Local temporary to accumulate numerical integration
// of vector valued function.
double accum[ ValueCount ];
for ( int j = 0 ; j < ValueCount ; ++j ) { accum[j] = 0 ; }
int error = 0 ;
// Numerical integration loop for this element:
for ( int k = 0 ; k < IntegrationCount ; ++k ) {
// Integration point in space as interpolated from nodal coordinates:
double point[3] = { 0 , 0 , 0 };
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
point[0] += node_coord[0][i] * m_hex_elem_data.values[k][i] ;
point[1] += node_coord[1][i] * m_hex_elem_data.values[k][i] ;
point[2] += node_coord[2][i] * m_hex_elem_data.values[k][i] ;
}
// Example function vector value at cubature point:
double val_at_pt[ ValueCount ];
m_function( point , val_at_pt );
// Temporary array for transformed element basis functions' gradient.
// Not used in this example, but computed anyway by the more general
// deformation function.
float dpsi[3][ ElemNodeCount ];
// Compute deformation jacobian, transform basis function gradient,
// and return determinant of deformation jacobian.
float detJ = transform_gradients( m_hex_elem_data.gradients[k] ,
node_coord , dpsi );
// Check for inverted spatial jacobian
if ( detJ <= 0 ) { error = 1 ; detJ = 0 ; }
// Integration weight.
const float w = m_hex_elem_data.weights[k] * detJ ;
// Cubature of function.
for ( int j = 0 ; j < ValueCount ; ++j ) {
accum[j] += val_at_pt[j] * w ;
}
}
m_elem_error(ielem) = error ;
// Element contribution to global integral:
if ( error ) { update.error = 1 ; }
for ( int j = 0 ; j < ValueCount ; ++j ) { update.value[j] += accum[j] ; }
// Element-node quantity for lumping to nodes:
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
for ( int j = 0 ; j < ValueCount ; ++j ) {
// Save element's integral apportionment to nodes to global memory
m_elem_integral( ielem , i , j ) = accum[j] / ElemNodeCount ;
}
}
if ( PerformScatterAddWithAtomic ) {
// Option to immediately scatter-add the integrated quantities to nodes.
// This is a race condition as two or more threads could attempt
// concurrent update of nodal values. The atomic_fetch_add (+=)
// function guarantees that the summation will occur correctly;
// however, there can be no guarantee for the order of summation.
// Due to non-associativity of floating point arithmetic the result
// is non-deterministic within bounds of floating point round-off.
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
for ( int j = 0 ; j < ValueCount ; ++j ) {
Kokkos::atomic_fetch_add( & m_node_lumped( inode[i] , j ) ,
m_elem_integral( ielem , i , j ) );
}
}
}
}
//--------------------------------------------------------------------------
// Initialization of the global reduction value.
KOKKOS_INLINE_FUNCTION
void init( value_type & update ) const
{
for ( int j = 0 ; j < ValueCount ; ++j ) update.value[j] = 0 ;
update.error = 0 ;
}
// Join two contributions to global reduction value.
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & update ,
volatile const value_type & input ) const
{
for ( int j = 0 ; j < ValueCount ; ++j ) update.value[j] += input.value[j] ;
if ( input.error ) update.error = 1 ;
}
};
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
template< class ViewElemNode ,
class ViewNodeScan ,
class ViewNodeElem >
void map_node_to_elem( const ViewElemNode & elem_node ,
const ViewNodeScan & node_scan ,
const ViewNodeElem & node_elem );
/** \brief Functor to gather-sum elements' per-node quantities
* to element nodes. Gather-sum is thread safe and
* does not require atomic updates.
*/
template< class ViewNodeValue ,
class ViewElemValue ,
bool AlreadyUsedAtomic >
struct LumpElemToNode {
typedef typename ViewElemValue::execution_space execution_space ;
// In this example we know that the ViewElemValue
// array specification is < double*[nNode][nValue] >
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
enum { value_count = ViewElemValue::dimension::N2 };
#else
enum { value_count = ViewElemValue::shape_type::N2 };
#endif
ViewNodeValue m_node_value ; ///< Integrated values at nodes
ViewElemValue m_elem_value ; ///< Values apportioned to nodes
View<int*, execution_space> m_node_scan ; ///< Offsets for nodes->element
View<int*[2],execution_space> m_node_elem ; ///< Node->element connectivity
// Only allocate node->element connectivity if have
// not already used atomic updates for the nodes.
template< class ViewElemNode >
LumpElemToNode( const ViewNodeValue & node_value ,
const ViewElemValue & elem_value ,
const ViewElemNode & elem_node )
: m_node_value( node_value )
, m_elem_value( elem_value )
, m_node_scan( "node_scan" ,
AlreadyUsedAtomic ? 0 : node_value.dimension_0() + 1 )
, m_node_elem( "node_elem" ,
AlreadyUsedAtomic ? 0 : elem_node.dimension_0() *
elem_node.dimension_1() )
{
if ( ! AlreadyUsedAtomic ) {
map_node_to_elem( elem_node , m_node_scan , m_node_elem );
}
}
//----------------------------------------
struct value_type { double value[ value_count ]; };
KOKKOS_INLINE_FUNCTION
void operator()( const int inode , value_type & update ) const
{
if ( ! AlreadyUsedAtomic ) {
// Sum element quantities to a local variable.
value_type local ;
for ( int j = 0 ; j < value_count ; ++j ) { local.value[j] = 0 ; }
{
// nodes' element ids span [i,end)
int i = m_node_scan(inode);
const int end = m_node_scan(inode+1);
for ( ; i < end ; ++i ) {
// element #ielem , local node #ielem_node is this node:
const int ielem = m_node_elem(i,0);
const int ielem_node = m_node_elem(i,1);
// Sum the vector-values quantity
for ( int j = 0 ; j < value_count ; ++j ) {
local.value[j] += m_elem_value( ielem , ielem_node , j );
}
}
}
// Assign nodal quantity (no race condition).
// Sum global value.
for ( int j = 0 ; j < value_count ; ++j ) {
m_node_value( inode , j ) = local.value[j] ;
update.value[j] += local.value[j] ;
}
}
else {
// Already used atomic update of the nodal quantity,
// query and sum the value.
for ( int j = 0 ; j < value_count ; ++j ) {
update.value[j] += m_node_value( inode , j );
}
}
}
KOKKOS_INLINE_FUNCTION
void init( value_type & update ) const
{ for ( int j = 0 ; j < value_count ; ++j ) { update.value[j] = 0 ; } }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & update ,
volatile const value_type & input ) const
{
for ( int j = 0 ; j < value_count ; ++j ) {
update.value[j] += input.value[j] ;
}
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template< class ViewElemNode ,
class ViewNodeScan ,
class ViewNodeElem >
void map_node_to_elem( const ViewElemNode & elem_node ,
const ViewNodeScan & node_scan ,
const ViewNodeElem & node_elem )
{
typedef typename ViewElemNode::host_mirror_space host_mirror_space ;
const typename ViewElemNode::HostMirror host_elem_node =
Kokkos::create_mirror_view(elem_node);
const typename ViewNodeScan::HostMirror host_node_scan =
Kokkos::create_mirror_view(node_scan);
const typename ViewNodeElem::HostMirror host_node_elem =
Kokkos::create_mirror_view(node_elem);
const int elem_count = host_elem_node.dimension_0();
const int elem_node_count = host_elem_node.dimension_1();
const int node_count = host_node_scan.dimension_0() - 1 ;
const View<int*, host_mirror_space >
node_elem_count( "node_elem_count" , node_count );
Kokkos::deep_copy( host_elem_node , elem_node );
for ( int i = 0 ; i < elem_count ; ++i ) {
for ( int j = 0 ; j < elem_node_count ; ++j ) {
++node_elem_count( host_elem_node(i,j) );
}
}
for ( int i = 0 ; i < node_count ; ++i ) {
host_node_scan(i+1) += host_node_scan(i) + node_elem_count(i);
node_elem_count(i) = 0 ;
}
for ( int i = 0 ; i < elem_count ; ++i ) {
for ( int j = 0 ; j < elem_node_count ; ++j ) {
const int inode = host_elem_node(i,j);
const int offset = host_node_scan(inode) + node_elem_count(inode);
host_node_elem( offset , 0 ) = i ;
host_node_elem( offset , 1 ) = j ;
++node_elem_count(inode);
}
}
Kokkos::deep_copy( node_scan , host_node_scan );
Kokkos::deep_copy( node_elem , host_node_elem );
}
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP */

View File

@ -1,62 +0,0 @@
KOKKOS_PATH = ../..
vpath %.cpp ${KOKKOS_PATH}/example/fixture ${KOKKOS_PATH}/example/feint
EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ${KOKKOS_PATH}/example/feint/*.hpp)
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += \
-I${KOKKOS_PATH}/example/common \
-I${KOKKOS_PATH}/example/fixture \
-I${KOKKOS_PATH}/example/feint
EXE_EXAMPLE_FEINT = KokkosExample_Feint
OBJ_EXAMPLE_FEINT = BoxElemPart.o main.o
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_EXAMPLE_FEINT += feint_cuda.o
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
OBJ_EXAMPLE_FEINT += feint_threads.o
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_EXAMPLE_FEINT += feint_openmp.o
endif
TARGETS = $(EXE_EXAMPLE_FEINT)
#TEST_TARGETS =
$(EXE_EXAMPLE_FEINT) : $(OBJ_EXAMPLE_FEINT) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FEINT) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FEINT)
build_all : $(TARGETS)
test : build_all
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,165 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FEINT_HPP
#define KOKKOS_EXAMPLE_FEINT_HPP
#include <iostream>
#include <BoxElemFixture.hpp>
#include <ElemFunctor.hpp>
#include <feint_fwd.hpp>
namespace Kokkos {
namespace Example {
/** \brief Vector valued function to numerically integrate.
*
* F(X) = { 1 , x , y , z , x*y , y*z , z*x , x*y*z }
*
* Integrates on a unit cube to:
* { 1 , 1/2 , 1/2 , 1/2 , 1/4 , 1/4 , 1/4 , 1/8 }
*/
struct MyFunctionType {
enum { value_count = 8 };
// Evaluate function at coordinate.
template< typename CoordType , typename ValueType >
KOKKOS_INLINE_FUNCTION
void operator()( const CoordType point[] , ValueType value[] ) const
{
value[0] = 1 ;
value[1] = point[0] ;
value[2] = point[1] ;
value[3] = point[2] ;
value[4] = point[0] * point[1] ;
value[5] = point[1] * point[2] ;
value[6] = point[2] * point[0] ;
value[7] = point[0] * point[1] * point[2] ;
}
};
template < class Device , bool UseAtomic >
void feint(
const unsigned global_elem_nx ,
const unsigned global_elem_ny ,
const unsigned global_elem_nz )
{
//----------------------------------------
// Create the unstructured finite element mesh box fixture on the device:
typedef Kokkos::Example::
BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear >
// BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
BoxFixtureType ;
// MPI distributed parallel domain decomposition of the fixture.
// Either by element (DecomposeElem) or by node (DecomposeNode)
// with ghosted elements.
static const Kokkos::Example::BoxElemPart::Decompose
decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ;
// decompose = Kokkos::Example::BoxElemPart:: DecomposeNode ;
// Not using MPI in this example.
const unsigned mpi_rank = 0 ;
const unsigned mpi_size = 1 ;
const BoxFixtureType fixture( decompose , mpi_size , mpi_rank ,
global_elem_nx ,
global_elem_ny ,
global_elem_nz );
//----------------------------------------
// Create and execute the numerical integration functor on the device:
typedef Kokkos::Example::
FiniteElementIntegration< BoxFixtureType , MyFunctionType , UseAtomic >
FeintType ;
const FeintType feint( fixture , MyFunctionType() );
typename FeintType::value_type elem_integral ;
// A reduction for the global integral:
Kokkos::parallel_reduce( fixture.elem_count() , feint , elem_integral );
if ( elem_integral.error ) {
std::cout << "An element had a spatial jacobian error" << std::endl ;
return ;
}
std::cout << "Elem integral =" ;
for ( int i = 0 ; i < MyFunctionType::value_count ; ++i ) {
std::cout << " " << elem_integral.value[i] ;
}
std::cout << std::endl ;
//----------------------------------------
// Create and execute the nodal lumped value projection and reduction functor:
typedef Kokkos::Example::
LumpElemToNode< typename FeintType::NodeValueType ,
typename FeintType::ElemValueType ,
UseAtomic > LumpType ;
const LumpType lump( feint.m_node_lumped ,
feint.m_elem_integral ,
fixture.elem_node() );
typename LumpType ::value_type node_sum ;
Kokkos::parallel_reduce( fixture.node_count() , lump , node_sum );
std::cout << "Node lumped sum =" ;
for ( int i = 0 ; i < MyFunctionType::value_count ; ++i ) {
std::cout << " " << node_sum.value[i] ;
}
std::cout << std::endl ;
}
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_HPP */

View File

@ -1,67 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_CUDA )
#include <feint.hpp>
namespace Kokkos {
namespace Example {
template void feint<Kokkos::Cuda,false>(
const unsigned global_elem_nx ,
const unsigned global_elem_ny ,
const unsigned global_elem_nz );
template void feint<Kokkos::Cuda,true>(
const unsigned global_elem_nx ,
const unsigned global_elem_ny ,
const unsigned global_elem_nz );
} /* namespace Example */
} /* namespace Kokkos */
#endif

View File

@ -1,60 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FEINT_FWD_HPP
#define KOKKOS_EXAMPLE_FEINT_FWD_HPP
namespace Kokkos {
namespace Example {
template < class Device , bool UseAtomic >
void feint(
const unsigned global_elem_nx = 100 ,
const unsigned global_elem_ny = 115 ,
const unsigned global_elem_nz = 130 );
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_FWD_HPP */

View File

@ -1,67 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#ifdef KOKKOS_HAVE_OPENMP
#include <feint.hpp>
namespace Kokkos {
namespace Example {
template void feint<Kokkos::OpenMP,false>(
const unsigned global_elem_nx ,
const unsigned global_elem_ny ,
const unsigned global_elem_nz );
template void feint<Kokkos::OpenMP,true>(
const unsigned global_elem_nx ,
const unsigned global_elem_ny ,
const unsigned global_elem_nz );
} /* namespace Example */
} /* namespace Kokkos */
#endif

View File

@ -1,66 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#if defined( KOKKOS_HAVE_PTHREAD )
#include <feint.hpp>
namespace Kokkos {
namespace Example {
template void feint< Kokkos::Threads ,false>(
const unsigned global_elem_nx ,
const unsigned global_elem_ny ,
const unsigned global_elem_nz );
template void feint< Kokkos::Threads ,true>(
const unsigned global_elem_nx ,
const unsigned global_elem_ny ,
const unsigned global_elem_nz );
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */

View File

@ -1,110 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#include <utility>
#include <iostream>
#include <Kokkos_Core.hpp>
#include <feint_fwd.hpp>
int main()
{
#if defined( KOKKOS_HAVE_PTHREAD )
{
// Use 4 cores per NUMA region, unless fewer available
const unsigned use_numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned use_cores_per_numa = std::min( 4u , Kokkos::hwloc::get_available_cores_per_numa() );
Kokkos::Threads::initialize( use_numa_count * use_cores_per_numa );
std::cout << "feint< Threads , NotUsingAtomic >" << std::endl ;
Kokkos::Example::feint< Kokkos::Threads , false >();
std::cout << "feint< Threads , Usingtomic >" << std::endl ;
Kokkos::Example::feint< Kokkos::Threads , true >();
Kokkos::Threads::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
{
// Use 4 cores per NUMA region, unless fewer available
const unsigned use_numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned use_cores_per_numa = std::min( 4u , Kokkos::hwloc::get_available_cores_per_numa() );
Kokkos::OpenMP::initialize( use_numa_count * use_cores_per_numa );
std::cout << "feint< OpenMP , NotUsingAtomic >" << std::endl ;
Kokkos::Example::feint< Kokkos::OpenMP , false >();
std::cout << "feint< OpenMP , Usingtomic >" << std::endl ;
Kokkos::Example::feint< Kokkos::OpenMP , true >();
Kokkos::OpenMP::finalize();
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
{
// Initialize Host mirror device
Kokkos::HostSpace::execution_space::initialize(1);
const unsigned device_count = Kokkos::Cuda::detect_device_count();
// Use the last device:
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(device_count-1) );
std::cout << "feint< Cuda , NotUsingAtomic >" << std::endl ;
Kokkos::Example::feint< Kokkos::Cuda , false >();
std::cout << "feint< Cuda , UsingAtomic >" << std::endl ;
Kokkos::Example::feint< Kokkos::Cuda , true >();
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
#endif
}

View File

@ -1,296 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_CG_SOLVE
#define KOKKOS_EXAMPLE_CG_SOLVE
#include <cmath>
#include <limits>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
#include <WrapMPI.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
template< typename ValueType , class Space >
struct CrsMatrix {
typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
typedef View< ValueType * , Space > coeff_type ;
StaticCrsGraphType graph ;
coeff_type coeff ;
CrsMatrix() : graph(), coeff() {}
CrsMatrix( const StaticCrsGraphType & arg_graph )
: graph( arg_graph )
, coeff( "crs_matrix_coeff" , arg_graph.entries.dimension_0() )
{}
};
template< typename MScalar
, typename VScalar
, class Space >
struct Multiply {
const Example::CrsMatrix< MScalar , Space > m_A ;
const Kokkos::View< const VScalar * , Space > m_x ;
const Kokkos::View< VScalar * , Space > m_y ;
KOKKOS_INLINE_FUNCTION
void operator()( const int iRow ) const
{
const int iEntryBegin = m_A.graph.row_map[iRow];
const int iEntryEnd = m_A.graph.row_map[iRow+1];
double sum = 0 ;
for ( int iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
sum += m_A.coeff(iEntry) * m_x( m_A.graph.entries(iEntry) );
}
m_y(iRow) = sum ;
}
Multiply( const View< VScalar * , Space > & y
, const CrsMatrix< MScalar , Space > & A
, const View< const VScalar * , Space > & x
)
: m_A( A ), m_x( x ), m_y( y )
{}
};
template< typename MScalar
, typename VScalar
, class Space >
inline
void multiply( const int nrow
, const Kokkos::View< VScalar * , Space > & y
, const Example::CrsMatrix< MScalar , Space > & A
, const Kokkos::View< VScalar * , Space > & x
)
{
Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,nrow), Multiply<MScalar,VScalar,Space>( y , A , x ) );
}
template< typename ValueType , class Space >
struct WAXPBY {
const Kokkos::View< const ValueType * , Space > m_x ;
const Kokkos::View< const ValueType * , Space > m_y ;
const Kokkos::View< ValueType * , Space > m_w ;
const double m_alpha ;
const double m_beta ;
KOKKOS_INLINE_FUNCTION
void operator()( const int i ) const
{ m_w(i) = m_alpha * m_x(i) + m_beta * m_y(i); }
WAXPBY( const View< ValueType * , Space > & arg_w
, const double arg_alpha
, const View< ValueType * , Space > & arg_x
, const double arg_beta
, const View< ValueType * , Space > & arg_y
)
: m_x( arg_x )
, m_y( arg_y )
, m_w( arg_w )
, m_alpha( arg_alpha )
, m_beta( arg_beta )
{}
};
template< typename VScalar , class Space >
void waxpby( const int n
, const Kokkos::View< VScalar * , Space > & arg_w
, const double arg_alpha
, const Kokkos::View< VScalar * , Space > & arg_x
, const double arg_beta
, const Kokkos::View< VScalar * , Space > & arg_y
)
{
Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,n), WAXPBY<VScalar,Space>(arg_w,arg_alpha,arg_x,arg_beta,arg_y) );
}
template< typename VScalar , class Space >
struct Dot {
typedef double value_type ;
const Kokkos::View< const VScalar * , Space > m_x ;
const Kokkos::View< const VScalar * , Space > m_y ;
KOKKOS_INLINE_FUNCTION
void operator()( const int i , value_type & update ) const
{ update += m_x(i) * m_y(i); }
Dot( const Kokkos::View< VScalar * , Space > & arg_x
, const Kokkos::View< VScalar * , Space > & arg_y
)
: m_x(arg_x), m_y(arg_y) {}
};
template< typename VScalar , class Space >
double dot( const int n
, const Kokkos::View< VScalar * , Space > & arg_x
, const Kokkos::View< VScalar * , Space > & arg_y
)
{
double result = 0 ;
Kokkos::parallel_reduce( Kokkos::RangePolicy<Space>(0,n) , Dot<VScalar,Space>( arg_x , arg_y ) , result );
return result ;
}
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
struct CGSolveResult {
size_t iteration ;
double iter_time ;
double matvec_time ;
double norm_res ;
};
template< class ImportType
, typename MScalar
, typename VScalar
, class Space
>
inline
void cgsolve( const ImportType & import
, const CrsMatrix< MScalar , Space > & A
, const Kokkos::View< VScalar * , Space > & b
, const Kokkos::View< VScalar * , Space > & x
, const size_t maximum_iteration = 200
, const double tolerance = std::numeric_limits<double>::epsilon()
, CGSolveResult * result = 0
)
{
typedef View< VScalar * , Space > VectorType ;
const size_t count_owned = import.count_owned ;
const size_t count_total = import.count_owned + import.count_receive;
size_t iteration = 0 ;
double iter_time = 0 ;
double matvec_time = 0 ;
double norm_res = 0 ;
// Need input vector to matvec to be owned + received
VectorType pAll ( "cg::p" , count_total );
VectorType p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
VectorType r ( "cg::r" , count_owned );
VectorType Ap( "cg::Ap", count_owned );
/* r = b - A * x ; */
/* p = x */ Kokkos::deep_copy( p , x );
/* import p */ import( pAll );
/* Ap = A * p */ multiply( count_owned , Ap , A , pAll );
/* r = b - Ap */ waxpby( count_owned , r , 1.0 , b , -1.0 , Ap );
/* p = r */ Kokkos::deep_copy( p , r );
double old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
norm_res = sqrt( old_rdot );
iteration = 0 ;
Kokkos::Impl::Timer wall_clock ;
Kokkos::Impl::Timer timer;
while ( tolerance < norm_res && iteration < maximum_iteration ) {
/* pAp_dot = dot( p , Ap = A * p ) */
timer.reset();
/* import p */ import( pAll );
/* Ap = A * p */ multiply( count_owned , Ap , A , pAll );
Space::fence();
matvec_time += timer.seconds();
const double pAp_dot = Kokkos::Example::all_reduce( dot( count_owned , p , Ap ) , import.comm );
const double alpha = old_rdot / pAp_dot ;
/* x += alpha * p ; */ waxpby( count_owned , x , alpha, p , 1.0 , x );
/* r += -alpha * Ap ; */ waxpby( count_owned , r , -alpha, Ap , 1.0 , r );
const double r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
const double beta = r_dot / old_rdot ;
/* p = r + beta * p ; */ waxpby( count_owned , p , 1.0 , r , beta , p );
norm_res = sqrt( old_rdot = r_dot );
++iteration ;
}
Space::fence();
iter_time = wall_clock.seconds();
if ( 0 != result ) {
result->iteration = iteration ;
result->iter_time = iter_time ;
result->matvec_time = matvec_time ;
result->norm_res = norm_res ;
}
}
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXAMPLE_CG_SOLVE */

View File

@ -1,17 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture)
SET(SOURCES "")
FILE( GLOB SOURCES *.cpp )
LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp )
TRIBITS_ADD_EXECUTABLE(
fenl
SOURCES ${SOURCES}
COMM serial mpi
)

View File

@ -1,57 +0,0 @@
KOKKOS_PATH ?= ../..
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
SRC_DIR := $(dir $(MAKEFILE_PATH))
vpath %.cpp ${SRC_DIR}/../fixture ${SRC_DIR}
EXAMPLE_HEADERS = $(wildcard $(SRC_DIR)/../common/*.hpp ${SRC_DIR}/../fixture/*.hpp ${SRC_DIR}/*.hpp)
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
# KOKKOS_INTERNAL_USE_CUDA is not exported to installed Makefile.kokkos
# use KOKKOS_DEVICE here
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += \
-I${SRC_DIR}/../common \
-I${SRC_DIR}/../fixture \
-I${SRC_DIR}
EXE_EXAMPLE_FENL = KokkosExample_Fenl
OBJ_EXAMPLE_FENL = BoxElemPart.o main.o fenl.o
TARGETS = $(EXE_EXAMPLE_FENL)
#TEST_TARGETS =
$(EXE_EXAMPLE_FENL) : $(OBJ_EXAMPLE_FENL) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FENL) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FENL)
build_all : $(TARGETS)
test : build_all
clean:
rm -f *.o $(EXE_EXAMPLE_FENL) KokkosCore_config.*
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,117 +0,0 @@
/*
// ************************************************************************
//
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
// Copyright (2012) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
*/
#include <HexElement.hpp>
#include <fenl_impl.hpp>
namespace Kokkos {
namespace Example {
namespace FENL {
#if defined( KOKKOS_HAVE_PTHREAD )
template
Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
template
Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemQuadratic >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
#endif
#if defined (KOKKOS_HAVE_OPENMP)
template
Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
template
Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemQuadratic >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
#endif
#if defined( KOKKOS_HAVE_CUDA )
template
Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
template
Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemQuadratic >(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
#endif
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */

View File

@ -1,89 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FENL_HPP
#define KOKKOS_EXAMPLE_FENL_HPP
#include <stdlib.h>
#include <BoxElemPart.hpp>
#include <WrapMPI.hpp>
namespace Kokkos {
namespace Example {
namespace FENL {
struct Perf {
size_t global_elem_count ;
size_t global_node_count ;
size_t newton_iter_count ;
size_t cg_iter_count ;
double map_ratio ;
double fill_node_set ;
double scan_node_count ;
double fill_graph_entries ;
double sort_graph_entries ;
double fill_element_graph ;
double create_sparse_matrix ;
double fill_time ;
double bc_time ;
double matvec_time ;
double cg_time ;
double newton_residual ;
double error_max ;
};
template < class Device , BoxElemPart::ElemOrder ElemOrder >
Perf fenl(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int global_elems[] );
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_FENL_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,598 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP
#define KOKKOS_EXAMPLE_FENL_IMPL_HPP
#include <math.h>
// Kokkos libraries' headers:
#include <Kokkos_UnorderedMap.hpp>
#include <Kokkos_StaticCrsGraph.hpp>
#include <impl/Kokkos_Timer.hpp>
// Examples headers:
#include <BoxElemFixture.hpp>
#include <VectorImport.hpp>
#include <CGSolve.hpp>
#include <fenl.hpp>
#include <fenl_functors.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
namespace FENL {
inline
double maximum( MPI_Comm comm , double local )
{
double global = local ;
#if defined( KOKKOS_HAVE_MPI )
MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , comm );
#endif
return global ;
}
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
namespace FENL {
class ManufacturedSolution {
public:
// Manufactured solution for one dimensional nonlinear PDE
//
// -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax
//
// Has an analytic solution of the form:
//
// T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 )
//
// Given T_0 and T_L compute K for this analytic solution.
//
// Two analytic solutions:
//
// Solution with singularity:
// , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
// , b( -1.0 / sqrt(T_zmin) )
//
// Solution without singularity:
// , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
// , b( 1.0 / sqrt(T_zmin) )
const double zmin ;
const double zmax ;
const double T_zmin ;
const double T_zmax ;
const double a ;
const double b ;
const double K ;
ManufacturedSolution( const double arg_zmin ,
const double arg_zmax ,
const double arg_T_zmin ,
const double arg_T_zmax )
: zmin( arg_zmin )
, zmax( arg_zmax )
, T_zmin( arg_T_zmin )
, T_zmax( arg_T_zmax )
, a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
, b( 1.0 / sqrt(T_zmin) )
, K( 1.0 / ( 6.0 * a * a ) )
{}
double operator()( const double z ) const
{
const double tmp = a * ( z - zmin ) + b ;
return 1.0 / ( tmp * tmp );
}
};
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
namespace FENL {
template < class Space , BoxElemPart::ElemOrder ElemOrder >
Perf fenl(
MPI_Comm comm ,
const int use_print ,
const int use_trials ,
const int use_atomic ,
const int use_elems[] )
{
typedef Kokkos::Example::BoxElemFixture< Space , ElemOrder > FixtureType ;
typedef Kokkos::Example::CrsMatrix< double , Space >
SparseMatrixType ;
typedef typename SparseMatrixType::StaticCrsGraphType
SparseGraphType ;
typedef Kokkos::Example::FENL::NodeNodeGraph< typename FixtureType::elem_node_type , SparseGraphType , FixtureType::ElemNode >
NodeNodeGraphType ;
typedef Kokkos::Example::FENL::ElementComputation< FixtureType , SparseMatrixType >
ElementComputationType ;
typedef Kokkos::Example::FENL::DirichletComputation< FixtureType , SparseMatrixType >
DirichletComputationType ;
typedef NodeElemGatherFill< ElementComputationType >
NodeElemGatherFillType ;
typedef typename ElementComputationType::vector_type VectorType ;
typedef Kokkos::Example::VectorImport<
typename FixtureType::comm_list_type ,
typename FixtureType::send_nodeid_type ,
VectorType > ImportType ;
//------------------------------------
const unsigned newton_iteration_limit = 10 ;
const double newton_iteration_tolerance = 1e-7 ;
const unsigned cg_iteration_limit = 200 ;
const double cg_iteration_tolerance = 1e-7 ;
//------------------------------------
const int print_flag = use_print && Kokkos::Impl::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ;
int comm_rank ;
int comm_size ;
MPI_Comm_rank( comm , & comm_rank );
MPI_Comm_size( comm , & comm_size );
// Decompose by node to avoid mpi-communication for assembly
const float bubble_x = 1.0 ;
const float bubble_y = 1.0 ;
const float bubble_z = 1.0 ;
const FixtureType fixture( BoxElemPart::DecomposeNode , comm_size , comm_rank ,
use_elems[0] , use_elems[1] , use_elems[2] ,
bubble_x , bubble_y , bubble_z );
{
int global_error = ! fixture.ok();
#if defined( KOKKOS_HAVE_MPI )
int local_error = global_error ;
global_error = 0 ;
MPI_Allreduce( & local_error , & global_error , 1 , MPI_INT , MPI_SUM , comm );
#endif
if ( global_error ) {
throw std::runtime_error(std::string("Error generating finite element fixture"));
}
}
//------------------------------------
const ImportType comm_nodal_import(
comm ,
fixture.recv_node() ,
fixture.send_node() ,
fixture.send_nodeid() ,
fixture.node_count_owned() ,
fixture.node_count() - fixture.node_count_owned() );
//------------------------------------
const double bc_lower_value = 1 ;
const double bc_upper_value = 2 ;
const Kokkos::Example::FENL::ManufacturedSolution
manufactured_solution( 0 , 1 , bc_lower_value , bc_upper_value );
//------------------------------------
for ( int k = 0 ; k < comm_size && use_print ; ++k ) {
if ( k == comm_rank ) {
typename FixtureType::node_grid_type::HostMirror
h_node_grid = Kokkos::create_mirror_view( fixture.node_grid() );
typename FixtureType::node_coord_type::HostMirror
h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() );
typename FixtureType::elem_node_type::HostMirror
h_elem_node = Kokkos::create_mirror_view( fixture.elem_node() );
Kokkos::deep_copy( h_node_grid , fixture.node_grid() );
Kokkos::deep_copy( h_node_coord , fixture.node_coord() );
Kokkos::deep_copy( h_elem_node , fixture.elem_node() );
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
std::cout << "Node grid {" ;
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
std::cout << " (" << h_node_grid(inode,0)
<< "," << h_node_grid(inode,1)
<< "," << h_node_grid(inode,2)
<< ")" ;
}
std::cout << " }" << std::endl ;
std::cout << "Node coord {" ;
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
std::cout << " (" << h_node_coord(inode,0)
<< "," << h_node_coord(inode,1)
<< "," << h_node_coord(inode,2)
<< ")" ;
}
std::cout << " }" << std::endl ;
std::cout << "Manufactured solution"
<< " a[" << manufactured_solution.a << "]"
<< " b[" << manufactured_solution.b << "]"
<< " K[" << manufactured_solution.K << "]"
<< " {" ;
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
std::cout << " " << manufactured_solution( h_node_coord( inode , 2 ) );
}
std::cout << " }" << std::endl ;
std::cout << "ElemNode {" << std::endl ;
for ( unsigned ielem = 0 ; ielem < fixture.elem_count() ; ++ielem ) {
std::cout << " elem[" << ielem << "]{" ;
for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
std::cout << " " << h_elem_node(ielem,inode);
}
std::cout << " }{" ;
for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
std::cout << " (" << h_node_grid(h_elem_node(ielem,inode),0)
<< "," << h_node_grid(h_elem_node(ielem,inode),1)
<< "," << h_node_grid(h_elem_node(ielem,inode),2)
<< ")" ;
}
std::cout << " }" << std::endl ;
}
std::cout << "}" << std::endl ;
}
std::cout.flush();
MPI_Barrier( comm );
}
//------------------------------------
Kokkos::Impl::Timer wall_clock ;
Perf perf_stats = Perf() ;
for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
Perf perf = Perf() ;
perf.global_elem_count = fixture.elem_count_global();
perf.global_node_count = fixture.node_count_global();
//----------------------------------
// Create the sparse matrix graph and element-to-graph map
// from the element->to->node identifier array.
// The graph only has rows for the owned nodes.
typename NodeNodeGraphType::Times graph_times;
const NodeNodeGraphType
mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(), graph_times );
perf.map_ratio = maximum(comm, graph_times.ratio);
perf.fill_node_set = maximum(comm, graph_times.fill_node_set);
perf.scan_node_count = maximum(comm, graph_times.scan_node_count);
perf.fill_graph_entries = maximum(comm, graph_times.fill_graph_entries);
perf.sort_graph_entries = maximum(comm, graph_times.sort_graph_entries);
perf.fill_element_graph = maximum(comm, graph_times.fill_element_graph);
wall_clock.reset();
// Create the sparse matrix from the graph:
SparseMatrixType jacobian( mesh_to_graph.graph );
Space::fence();
perf.create_sparse_matrix = maximum( comm , wall_clock.seconds() );
//----------------------------------
for ( int k = 0 ; k < comm_size && print_flag ; ++k ) {
if ( k == comm_rank ) {
const unsigned nrow = jacobian.graph.numRows();
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
std::cout << "JacobianGraph {" << std::endl ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " row[" << irow << "]{" ;
const unsigned entry_end = jacobian.graph.row_map(irow+1);
for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) {
std::cout << " " << jacobian.graph.entries(entry);
}
std::cout << " }" << std::endl ;
}
std::cout << "}" << std::endl ;
std::cout << "ElemGraph {" << std::endl ;
for ( unsigned ielem = 0 ; ielem < mesh_to_graph.elem_graph.dimension_0() ; ++ielem ) {
std::cout << " elem[" << ielem << "]{" ;
for ( unsigned irow = 0 ; irow < mesh_to_graph.elem_graph.dimension_1() ; ++irow ) {
std::cout << " {" ;
for ( unsigned icol = 0 ; icol < mesh_to_graph.elem_graph.dimension_2() ; ++icol ) {
std::cout << " " << mesh_to_graph.elem_graph(ielem,irow,icol);
}
std::cout << " }" ;
}
std::cout << " }" << std::endl ;
}
std::cout << "}" << std::endl ;
}
std::cout.flush();
MPI_Barrier( comm );
}
//----------------------------------
// Allocate solution vector for each node in the mesh and residual vector for each owned node
const VectorType nodal_solution( "nodal_solution" , fixture.node_count() );
const VectorType nodal_residual( "nodal_residual" , fixture.node_count_owned() );
const VectorType nodal_delta( "nodal_delta" , fixture.node_count_owned() );
// Create element computation functor
const ElementComputationType elemcomp(
use_atomic ? ElementComputationType( fixture , manufactured_solution.K , nodal_solution ,
mesh_to_graph.elem_graph , jacobian , nodal_residual )
: ElementComputationType( fixture , manufactured_solution.K , nodal_solution ) );
const NodeElemGatherFillType gatherfill(
use_atomic ? NodeElemGatherFillType()
: NodeElemGatherFillType( fixture.elem_node() ,
mesh_to_graph.elem_graph ,
nodal_residual ,
jacobian ,
elemcomp.elem_residuals ,
elemcomp.elem_jacobians ) );
// Create boundary condition functor
const DirichletComputationType dirichlet(
fixture , nodal_solution , jacobian , nodal_residual ,
2 /* apply at 'z' ends */ ,
manufactured_solution.T_zmin ,
manufactured_solution.T_zmax );
//----------------------------------
// Nonlinear Newton iteration:
double residual_norm_init = 0 ;
for ( perf.newton_iter_count = 0 ;
perf.newton_iter_count < newton_iteration_limit ;
++perf.newton_iter_count ) {
//--------------------------------
comm_nodal_import( nodal_solution );
//--------------------------------
// Element contributions to residual and jacobian
wall_clock.reset();
Kokkos::deep_copy( nodal_residual , double(0) );
Kokkos::deep_copy( jacobian.coeff , double(0) );
elemcomp.apply();
if ( ! use_atomic ) {
gatherfill.apply();
}
Space::fence();
perf.fill_time = maximum( comm , wall_clock.seconds() );
//--------------------------------
// Apply boundary conditions
wall_clock.reset();
dirichlet.apply();
Space::fence();
perf.bc_time = maximum( comm , wall_clock.seconds() );
//--------------------------------
// Evaluate convergence
const double residual_norm =
std::sqrt(
Kokkos::Example::all_reduce(
Kokkos::Example::dot( fixture.node_count_owned() , nodal_residual, nodal_residual ) , comm ) );
perf.newton_residual = residual_norm ;
if ( 0 == perf.newton_iter_count ) { residual_norm_init = residual_norm ; }
if ( residual_norm < residual_norm_init * newton_iteration_tolerance ) { break ; }
//--------------------------------
// Solve for nonlinear update
CGSolveResult cg_result ;
Kokkos::Example::cgsolve( comm_nodal_import
, jacobian
, nodal_residual
, nodal_delta
, cg_iteration_limit
, cg_iteration_tolerance
, & cg_result
);
// Update solution vector
Kokkos::Example::waxpby( fixture.node_count_owned() , nodal_solution , -1.0 , nodal_delta , 1.0 , nodal_solution );
perf.cg_iter_count += cg_result.iteration ;
perf.matvec_time += cg_result.matvec_time ;
perf.cg_time += cg_result.iter_time ;
//--------------------------------
if ( print_flag ) {
const double delta_norm =
std::sqrt(
Kokkos::Example::all_reduce(
Kokkos::Example::dot( fixture.node_count_owned() , nodal_delta, nodal_delta ) , comm ) );
if ( 0 == comm_rank ) {
std::cout << "Newton iteration[" << perf.newton_iter_count << "]"
<< " residual[" << perf.newton_residual << "]"
<< " update[" << delta_norm << "]"
<< " cg_iteration[" << cg_result.iteration << "]"
<< " cg_residual[" << cg_result.norm_res << "]"
<< std::endl ;
}
for ( int k = 0 ; k < comm_size ; ++k ) {
if ( k == comm_rank ) {
const unsigned nrow = jacobian.graph.numRows();
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
std::cout << "Residual {" ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " " << nodal_residual(irow);
}
std::cout << " }" << std::endl ;
std::cout << "Delta {" ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " " << nodal_delta(irow);
}
std::cout << " }" << std::endl ;
std::cout << "Solution {" ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " " << nodal_solution(irow);
}
std::cout << " }" << std::endl ;
std::cout << "Jacobian[ "
<< jacobian.graph.numRows() << " x " << Kokkos::maximum_entry( jacobian.graph )
<< " ] {" << std::endl ;
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
std::cout << " {" ;
const unsigned entry_end = jacobian.graph.row_map(irow+1);
for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) {
std::cout << " (" << jacobian.graph.entries(entry)
<< "," << jacobian.coeff(entry)
<< ")" ;
}
std::cout << " }" << std::endl ;
}
std::cout << "}" << std::endl ;
}
std::cout.flush();
MPI_Barrier( comm );
}
}
//--------------------------------
}
// Evaluate solution error
if ( 0 == itrial ) {
const typename FixtureType::node_coord_type::HostMirror
h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() );
const typename VectorType::HostMirror
h_nodal_solution = Kokkos::create_mirror_view( nodal_solution );
Kokkos::deep_copy( h_node_coord , fixture.node_coord() );
Kokkos::deep_copy( h_nodal_solution , nodal_solution );
double error_max = 0 ;
for ( unsigned inode = 0 ; inode < fixture.node_count_owned() ; ++inode ) {
const double answer = manufactured_solution( h_node_coord( inode , 2 ) );
const double error = ( h_nodal_solution(inode) - answer ) / answer ;
if ( error_max < fabs( error ) ) { error_max = fabs( error ); }
}
perf.error_max = std::sqrt( Kokkos::Example::all_reduce_max( error_max , comm ) );
perf_stats = perf ;
}
else {
perf_stats.fill_node_set = std::min( perf_stats.fill_node_set , perf.fill_node_set );
perf_stats.scan_node_count = std::min( perf_stats.scan_node_count , perf.scan_node_count );
perf_stats.fill_graph_entries = std::min( perf_stats.fill_graph_entries , perf.fill_graph_entries );
perf_stats.sort_graph_entries = std::min( perf_stats.sort_graph_entries , perf.sort_graph_entries );
perf_stats.fill_element_graph = std::min( perf_stats.fill_element_graph , perf.fill_element_graph );
perf_stats.create_sparse_matrix = std::min( perf_stats.create_sparse_matrix , perf.create_sparse_matrix );
perf_stats.fill_time = std::min( perf_stats.fill_time , perf.fill_time );
perf_stats.bc_time = std::min( perf_stats.bc_time , perf.bc_time );
perf_stats.cg_time = std::min( perf_stats.cg_time , perf.cg_time );
}
}
return perf_stats ;
}
} /* namespace FENL */
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP */

View File

@ -1,422 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <utility>
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
#include <iomanip>
#include <Kokkos_Core.hpp>
#include <WrapMPI.hpp>
#include <fenl.hpp>
// For vtune
#include <sys/types.h>
#include <unistd.h>
//----------------------------------------------------------------------------
enum { CMD_USE_THREADS = 0
, CMD_USE_NUMA
, CMD_USE_CORE_PER_NUMA
, CMD_USE_CUDA
, CMD_USE_OPENMP
, CMD_USE_CUDA_DEV
, CMD_USE_FIXTURE_X
, CMD_USE_FIXTURE_Y
, CMD_USE_FIXTURE_Z
, CMD_USE_FIXTURE_BEGIN
, CMD_USE_FIXTURE_END
, CMD_USE_FIXTURE_QUADRATIC
, CMD_USE_ATOMIC
, CMD_USE_TRIALS
, CMD_VTUNE
, CMD_PRINT
, CMD_ECHO
, CMD_ERROR
, CMD_COUNT };
void print_cmdline( std::ostream & s , const int cmd[] )
{
if ( cmd[ CMD_USE_THREADS ] ) {
s << " Threads(" << cmd[ CMD_USE_THREADS ]
<< ") NUMA(" << cmd[ CMD_USE_NUMA ]
<< ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ]
<< ")" ;
}
if ( cmd[ CMD_USE_OPENMP ] ) {
s << " OpenMP(" << cmd[ CMD_USE_OPENMP ]
<< ") NUMA(" << cmd[ CMD_USE_NUMA ]
<< ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ]
<< ")" ;
}
if ( cmd[ CMD_USE_FIXTURE_X ] ) {
s << " Fixture(" << cmd[ CMD_USE_FIXTURE_X ]
<< "x" << cmd[ CMD_USE_FIXTURE_Y ]
<< "x" << cmd[ CMD_USE_FIXTURE_Z ]
<< ")" ;
}
if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) {
s << " Fixture( " << cmd[ CMD_USE_FIXTURE_BEGIN ]
<< " .. " << cmd[ CMD_USE_FIXTURE_END ]
<< " )" ;
}
if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) {
s << " Quadratic-Element" ;
}
if ( cmd[ CMD_USE_CUDA ] ) {
s << " CUDA(" << cmd[ CMD_USE_CUDA_DEV ] << ")" ;
}
if ( cmd[ CMD_USE_ATOMIC ] ) {
s << " ATOMIC" ;
}
if ( cmd[ CMD_USE_TRIALS ] ) {
s << " TRIALS(" << cmd[ CMD_USE_TRIALS ] << ")" ;
}
if ( cmd[ CMD_VTUNE ] ) {
s << " VTUNE" ;
}
if ( cmd[ CMD_PRINT ] ) {
s << " PRINT" ;
}
s << std::endl ;
}
void print_perf_value( std::ostream & s , const std::vector<size_t> & widths, const Kokkos::Example::FENL::Perf & perf )
{
int i=0;
s << std::setw(widths[i++]) << perf.global_elem_count << " ,";
s << std::setw(widths[i++]) << perf.global_node_count << " ,";
s << std::setw(widths[i++]) << perf.newton_iter_count << " ,";
s << std::setw(widths[i++]) << perf.cg_iter_count << " ,";
s << std::setw(widths[i++]) << perf.map_ratio << " ,";
s << std::setw(widths[i++]) << ( perf.fill_node_set * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.scan_node_count * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.fill_graph_entries * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.sort_graph_entries * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.fill_element_graph * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.create_sparse_matrix * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.fill_time * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( perf.bc_time * 1000.0 ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( ( perf.matvec_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,";
s << std::setw(widths[i++]) << ( ( perf.cg_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,";
s << std::setw(widths[i]) << perf.error_max;
s << std::endl ;
}
template< class Device , Kokkos::Example::BoxElemPart::ElemOrder ElemOrder >
void run( MPI_Comm comm , const int cmd[] )
{
int comm_rank = 0 ;
#if defined( KOKKOS_HAVE_MPI )
MPI_Comm_rank( comm , & comm_rank );
#else
comm = 0 ;
#endif
if ( 0 == comm_rank ) {
if ( cmd[ CMD_USE_THREADS ] ) { std::cout << "THREADS , " << cmd[ CMD_USE_THREADS ] ; }
else if ( cmd[ CMD_USE_OPENMP ] ) { std::cout << "OPENMP , " << cmd[ CMD_USE_OPENMP ] ; }
else if ( cmd[ CMD_USE_CUDA ] ) { std::cout << "CUDA" ; }
if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) { std::cout << " , QUADRATIC-ELEMENT" ; }
else { std::cout << " , LINEAR-ELEMENT" ; }
if ( cmd[ CMD_USE_ATOMIC ] ) { std::cout << " , USING ATOMICS" ; }
}
std::vector< std::pair<std::string,std::string> > headers;
headers.push_back(std::make_pair("ELEMS","count"));
headers.push_back(std::make_pair("NODES","count"));
headers.push_back(std::make_pair("NEWTON","iter"));
headers.push_back(std::make_pair("CG","iter"));
headers.push_back(std::make_pair("MAP_RATIO","ratio"));
headers.push_back(std::make_pair("SET_FILL/NODE","millisec"));
headers.push_back(std::make_pair("SCAN/NODE","millisec"));
headers.push_back(std::make_pair("GRAPH_FILL/NODE","millisec"));
headers.push_back(std::make_pair("SORT/NODE","millisec"));
headers.push_back(std::make_pair("ELEM_GRAPH_FILL/NODE","millisec"));
headers.push_back(std::make_pair("MATRIX_CREATE/NODE","millisec"));
headers.push_back(std::make_pair("MATRIX_FILL/NODE","millisec"));
headers.push_back(std::make_pair("BOUNDARY/NODE","millisec"));
headers.push_back(std::make_pair("MAT_VEC/ITER/ROW","millisec"));
headers.push_back(std::make_pair("CG/ITER/ROW","millisec"));
headers.push_back(std::make_pair("ERROR","ratio"));
// find print widths
size_t min_width = 10;
std::vector< size_t > widths(headers.size());
for (size_t i=0, ie=headers.size(); i<ie; ++i)
widths[i] = std::max(min_width, headers[i].first.size()+1);
// print column headers
if ( 0 == comm_rank ) {
std::cout << std::endl ;
for (size_t i=0; i<headers.size(); ++i)
std::cout << std::setw(widths[i]) << headers[i].first << " ,";
std::cout << "\b\b " << std::endl;
for (size_t i=0; i<headers.size(); ++i)
std::cout << std::setw(widths[i]) << headers[i].second << " ,";
std::cout << "\b\b " << std::endl;
std::cout << std::scientific;
std::cout.precision(3);
}
if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) {
for ( int i = cmd[CMD_USE_FIXTURE_BEGIN] ; i < cmd[CMD_USE_FIXTURE_END] * 2 ; i *= 2 ) {
int nelem[3] ;
nelem[0] = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
nelem[1] = 1 + nelem[0] ;
nelem[2] = 2 * nelem[0] ;
const Kokkos::Example::FENL::Perf perf =
cmd[ CMD_USE_FIXTURE_QUADRATIC ]
? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
: Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear >
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
;
if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf );
}
}
else {
int nelem[3] = { cmd[ CMD_USE_FIXTURE_X ] ,
cmd[ CMD_USE_FIXTURE_Y ] ,
cmd[ CMD_USE_FIXTURE_Z ] };
const Kokkos::Example::FENL::Perf perf =
cmd[ CMD_USE_FIXTURE_QUADRATIC ]
? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
: Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear >
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
;
if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf );
}
}
//----------------------------------------------------------------------------
int main( int argc , char ** argv )
{
int comm_rank = 0 ;
#if defined( KOKKOS_HAVE_MPI )
MPI_Init( & argc , & argv );
MPI_Comm comm = MPI_COMM_WORLD ;
MPI_Comm_rank( comm , & comm_rank );
#else
MPI_Comm comm = 0 ;
(void) comm ; // suppress warning
#endif
int cmdline[ CMD_COUNT ] ;
for ( int i = 0 ; i < CMD_COUNT ; ++i ) cmdline[i] = 0 ;
if ( 0 == comm_rank ) {
for ( int i = 1 ; i < argc ; ++i ) {
if ( 0 == strcasecmp( argv[i] , "threads" ) ) {
cmdline[ CMD_USE_THREADS ] = atoi( argv[++i] );
}
else if ( 0 == strcasecmp( argv[i] , "openmp" ) ) {
cmdline[ CMD_USE_OPENMP ] = atoi( argv[++i] );
}
else if ( 0 == strcasecmp( argv[i] , "cores" ) ) {
sscanf( argv[++i] , "%dx%d" ,
cmdline + CMD_USE_NUMA ,
cmdline + CMD_USE_CORE_PER_NUMA );
}
else if ( 0 == strcasecmp( argv[i] , "cuda" ) ) {
cmdline[ CMD_USE_CUDA ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "cuda-dev" ) ) {
cmdline[ CMD_USE_CUDA ] = 1 ;
cmdline[ CMD_USE_CUDA_DEV ] = atoi( argv[++i] ) ;
}
else if ( 0 == strcasecmp( argv[i] , "fixture" ) ) {
sscanf( argv[++i] , "%dx%dx%d" ,
cmdline + CMD_USE_FIXTURE_X ,
cmdline + CMD_USE_FIXTURE_Y ,
cmdline + CMD_USE_FIXTURE_Z );
}
else if ( 0 == strcasecmp( argv[i] , "fixture-range" ) ) {
sscanf( argv[++i] , "%d..%d" ,
cmdline + CMD_USE_FIXTURE_BEGIN ,
cmdline + CMD_USE_FIXTURE_END );
}
else if ( 0 == strcasecmp( argv[i] , "fixture-quadratic" ) ) {
cmdline[ CMD_USE_FIXTURE_QUADRATIC ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "atomic" ) ) {
cmdline[ CMD_USE_ATOMIC ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "trials" ) ) {
cmdline[ CMD_USE_TRIALS ] = atoi( argv[++i] ) ;
}
else if ( 0 == strcasecmp( argv[i] , "vtune" ) ) {
cmdline[ CMD_VTUNE ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "print" ) ) {
cmdline[ CMD_PRINT ] = 1 ;
}
else if ( 0 == strcasecmp( argv[i] , "echo" ) ) {
cmdline[ CMD_ECHO ] = 1 ;
}
else {
cmdline[ CMD_ERROR ] = 1 ;
std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl ;
}
}
if ( cmdline[ CMD_ECHO ] && 0 == comm_rank ) { print_cmdline( std::cout , cmdline ); }
}
#if defined( KOKKOS_HAVE_MPI )
MPI_Bcast( cmdline , CMD_COUNT , MPI_INT , 0 , comm );
#endif
if ( cmdline[ CMD_VTUNE ] ) {
std::stringstream cmd;
pid_t my_os_pid=getpid();
const std::string vtune_loc =
"/usr/local/intel/vtune_amplifier_xe_2013/bin64/amplxe-cl";
const std::string output_dir = "./vtune/vtune.";
const int p_rank = comm_rank;
cmd << vtune_loc
<< " -collect hotspots -result-dir " << output_dir << p_rank
<< " -target-pid " << my_os_pid << " &";
if (p_rank == 0)
std::cout << cmd.str() << std::endl;
system(cmd.str().c_str());
system("sleep 10");
}
if ( ! cmdline[ CMD_ERROR ] && ! cmdline[ CMD_ECHO ] ) {
if ( ! cmdline[ CMD_USE_TRIALS ] ) { cmdline[ CMD_USE_TRIALS ] = 1 ; }
if ( ! cmdline[ CMD_USE_FIXTURE_X ] && ! cmdline[ CMD_USE_FIXTURE_BEGIN ] ) {
cmdline[ CMD_USE_FIXTURE_X ] = 2 ;
cmdline[ CMD_USE_FIXTURE_Y ] = 2 ;
cmdline[ CMD_USE_FIXTURE_Z ] = 2 ;
}
#if defined( KOKKOS_HAVE_PTHREAD )
if ( cmdline[ CMD_USE_THREADS ] ) {
if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) {
Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] ,
cmdline[ CMD_USE_NUMA ] ,
cmdline[ CMD_USE_CORE_PER_NUMA ] );
}
else {
Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] );
}
run< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
Kokkos::Threads::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if ( cmdline[ CMD_USE_OPENMP ] ) {
if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) {
Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] ,
cmdline[ CMD_USE_NUMA ] ,
cmdline[ CMD_USE_CORE_PER_NUMA ] );
}
else {
Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] );
}
run< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
Kokkos::OpenMP::finalize();
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
if ( cmdline[ CMD_USE_CUDA ] ) {
// Use the last device:
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cmdline[ CMD_USE_CUDA_DEV ] ) );
run< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
#endif
}
#if defined( KOKKOS_HAVE_MPI )
MPI_Finalize();
#endif
return cmdline[ CMD_ERROR ] ? -1 : 0 ;
}

View File

@ -1,355 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP
#define KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP
#include <stdio.h>
#include <utility>
#include <Kokkos_Core.hpp>
#include <HexElement.hpp>
#include <BoxElemPart.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
/** \brief Map a grid onto a unit cube with smooth nonlinear grading
* of the map.
*/
struct MapGridUnitCube {
const float m_a ;
const float m_b ;
const float m_c ;
const size_t m_max_x ;
const size_t m_max_y ;
const size_t m_max_z ;
MapGridUnitCube( const size_t grid_max_x ,
const size_t grid_max_y ,
const size_t grid_max_z ,
const float bubble_x ,
const float bubble_y ,
const float bubble_z )
: m_a( bubble_x )
, m_b( bubble_y )
, m_c( bubble_z )
, m_max_x( grid_max_x )
, m_max_y( grid_max_y )
, m_max_z( grid_max_z )
{}
template< typename Scalar >
KOKKOS_INLINE_FUNCTION
void operator()( int grid_x ,
int grid_y ,
int grid_z ,
Scalar & coord_x ,
Scalar & coord_y ,
Scalar & coord_z ) const
{
// Map to a unit cube [0,1]^3
const double x = double(grid_x) / double(m_max_x);
const double y = double(grid_y) / double(m_max_y);
const double z = double(grid_z) / double(m_max_z);
coord_x = x + x * x * ( x - 1 ) * ( x - 1 ) * m_a ;
coord_y = y + y * y * ( y - 1 ) * ( y - 1 ) * m_b ;
coord_z = z + z * z * ( z - 1 ) * ( z - 1 ) * m_c ;
}
};
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
/** \brief Generate a distributed unstructured finite element mesh
* from a partitioned NX*NY*NZ box of elements.
*
* Order owned nodes first followed by off-process nodes
* grouped by owning process.
*/
template< class Device ,
BoxElemPart::ElemOrder Order ,
class CoordinateMap = MapGridUnitCube >
class BoxElemFixture {
public:
typedef Device execution_space ;
enum { SpaceDim = 3 };
enum { ElemNode = Order == BoxElemPart::ElemLinear ? 8 :
Order == BoxElemPart::ElemQuadratic ? 27 : 0 };
private:
typedef Kokkos::Example::HexElement_TensorData< ElemNode > hex_data ;
Kokkos::Example::BoxElemPart m_box_part ;
CoordinateMap m_coord_map ;
Kokkos::View< double *[SpaceDim] , Device > m_node_coord ;
Kokkos::View< size_t *[SpaceDim] , Device > m_node_grid ;
Kokkos::View< size_t *[ElemNode] , Device > m_elem_node ;
Kokkos::View< size_t *[2] , Device > m_recv_node ;
Kokkos::View< size_t *[2] , Device > m_send_node ;
Kokkos::View< size_t * , Device > m_send_node_id ;
unsigned char m_elem_node_local[ ElemNode ][4] ;
public:
typedef Kokkos::View< const size_t * [ElemNode], Device > elem_node_type ;
typedef Kokkos::View< const double * [SpaceDim], Device > node_coord_type ;
typedef Kokkos::View< const size_t * [SpaceDim], Device > node_grid_type ;
typedef Kokkos::View< const size_t * [2] , Device > comm_list_type ;
typedef Kokkos::View< const size_t * , Device > send_nodeid_type ;
inline bool ok() const { return m_box_part.ok(); }
KOKKOS_INLINE_FUNCTION
size_t node_count() const { return m_node_grid.dimension_0(); }
KOKKOS_INLINE_FUNCTION
size_t node_count_owned() const { return m_box_part.owns_node_count(); }
KOKKOS_INLINE_FUNCTION
size_t node_count_global() const { return m_box_part.global_node_count(); }
KOKKOS_INLINE_FUNCTION
size_t elem_count() const { return m_elem_node.dimension_0(); }
KOKKOS_INLINE_FUNCTION
size_t elem_count_global() const { return m_box_part.global_elem_count(); }
KOKKOS_INLINE_FUNCTION
size_t elem_node_local( size_t inode , int k ) const
{ return m_elem_node_local[inode][k] ; }
KOKKOS_INLINE_FUNCTION
size_t node_grid( size_t inode , int iaxis ) const
{ return m_node_grid(inode,iaxis); }
KOKKOS_INLINE_FUNCTION
size_t node_global_index( size_t local ) const
{
const size_t tmp_node_grid[SpaceDim] =
{ m_node_grid(local,0) , m_node_grid(local,1) , m_node_grid(local,2) };
return m_box_part.global_node_id( tmp_node_grid );
}
KOKKOS_INLINE_FUNCTION
double node_coord( size_t inode , int iaxis ) const
{ return m_node_coord(inode,iaxis); }
KOKKOS_INLINE_FUNCTION
size_t node_grid_max( int iaxis ) const
{ return m_box_part.global_coord_max(iaxis); }
KOKKOS_INLINE_FUNCTION
size_t elem_node( size_t ielem , size_t inode ) const
{ return m_elem_node(ielem,inode); }
elem_node_type elem_node() const { return m_elem_node ; }
node_coord_type node_coord() const { return m_node_coord ; }
node_grid_type node_grid() const { return m_node_grid ; }
comm_list_type recv_node() const { return m_recv_node ; }
comm_list_type send_node() const { return m_send_node ; }
send_nodeid_type send_nodeid() const { return m_send_node_id ; }
KOKKOS_INLINE_FUNCTION
BoxElemFixture( const BoxElemFixture & rhs )
: m_box_part( rhs.m_box_part )
, m_coord_map( rhs.m_coord_map )
, m_node_coord( rhs.m_node_coord )
, m_node_grid( rhs.m_node_grid )
, m_elem_node( rhs.m_elem_node )
, m_recv_node( rhs.m_recv_node )
, m_send_node( rhs.m_send_node )
, m_send_node_id( rhs.m_send_node_id )
{
for ( int i = 0 ; i < ElemNode ; ++i ) {
m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ;
m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ;
m_elem_node_local[i][2] = rhs.m_elem_node_local[i][2] ;
m_elem_node_local[i][3] = 0 ;
}
}
BoxElemFixture & operator = ( const BoxElemFixture & rhs )
{
m_box_part = rhs.m_box_part ;
m_coord_map = rhs.m_coord_map ;
m_node_coord = rhs.m_node_coord ;
m_node_grid = rhs.m_node_grid ;
m_elem_node = rhs.m_elem_node ;
m_recv_node = rhs.m_recv_node ;
m_send_node = rhs.m_send_node ;
m_send_node_id = rhs.m_send_node_id ;
for ( int i = 0 ; i < ElemNode ; ++i ) {
m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ;
m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ;
m_elem_node_local[i][2] = rhs.m_elem_node_local[i][2] ;
m_elem_node_local[i][3] = 0 ;
}
return *this ;
}
BoxElemFixture( const BoxElemPart::Decompose decompose ,
const size_t global_size ,
const size_t global_rank ,
const size_t elem_nx ,
const size_t elem_ny ,
const size_t elem_nz ,
const float bubble_x = 1.1f ,
const float bubble_y = 1.2f ,
const float bubble_z = 1.3f )
: m_box_part( Order , decompose , global_size , global_rank , elem_nx , elem_ny , elem_nz )
, m_coord_map( m_box_part.global_coord_max(0) ,
m_box_part.global_coord_max(1) ,
m_box_part.global_coord_max(2) ,
bubble_x ,
bubble_y ,
bubble_z )
, m_node_coord( "fixture_node_coord" , m_box_part.uses_node_count() )
, m_node_grid( "fixture_node_grid" , m_box_part.uses_node_count() )
, m_elem_node( "fixture_elem_node" , m_box_part.uses_elem_count() )
, m_recv_node( "fixture_recv_node" , m_box_part.recv_node_msg_count() )
, m_send_node( "fixture_send_node" , m_box_part.send_node_msg_count() )
, m_send_node_id( "fixture_send_node_id" , m_box_part.send_node_id_count() )
{
{
const hex_data elem_data ;
for ( int i = 0 ; i < ElemNode ; ++i ) {
m_elem_node_local[i][0] = elem_data.eval_map[i][0] ;
m_elem_node_local[i][1] = elem_data.eval_map[i][1] ;
m_elem_node_local[i][2] = elem_data.eval_map[i][2] ;
m_elem_node_local[i][3] = 0 ;
}
}
const size_t nwork =
std::max( m_recv_node.dimension_0() ,
std::max( m_send_node.dimension_0() ,
std::max( m_send_node_id.dimension_0() ,
std::max( m_node_grid.dimension_0() ,
m_elem_node.dimension_0() * m_elem_node.dimension_1() ))));
Kokkos::parallel_for( nwork , *this );
}
// Initialization:
KOKKOS_INLINE_FUNCTION
void operator()( size_t i ) const
{
if ( i < m_elem_node.dimension_0() * m_elem_node.dimension_1() ) {
const size_t ielem = i / ElemNode ;
const size_t inode = i % ElemNode ;
size_t elem_grid[SpaceDim] ;
size_t tmp_node_grid[SpaceDim] ;
m_box_part.uses_elem_coord( ielem , elem_grid );
enum { elem_node_scale = Order == BoxElemPart::ElemLinear ? 1 :
Order == BoxElemPart::ElemQuadratic ? 2 : 0 };
tmp_node_grid[0] = elem_node_scale * elem_grid[0] + m_elem_node_local[inode][0] ;
tmp_node_grid[1] = elem_node_scale * elem_grid[1] + m_elem_node_local[inode][1] ;
tmp_node_grid[2] = elem_node_scale * elem_grid[2] + m_elem_node_local[inode][2] ;
m_elem_node(ielem,inode) = m_box_part.local_node_id( tmp_node_grid );
}
if ( i < m_node_grid.dimension_0() ) {
size_t tmp_node_grid[SpaceDim] ;
m_box_part.local_node_coord( i , tmp_node_grid );
m_node_grid(i,0) = tmp_node_grid[0] ;
m_node_grid(i,1) = tmp_node_grid[1] ;
m_node_grid(i,2) = tmp_node_grid[2] ;
m_coord_map( tmp_node_grid[0] ,
tmp_node_grid[1] ,
tmp_node_grid[2] ,
m_node_coord(i,0) ,
m_node_coord(i,1) ,
m_node_coord(i,2) );
}
if ( i < m_recv_node.dimension_0() ) {
m_recv_node(i,0) = m_box_part.recv_node_rank(i);
m_recv_node(i,1) = m_box_part.recv_node_count(i);
}
if ( i < m_send_node.dimension_0() ) {
m_send_node(i,0) = m_box_part.send_node_rank(i);
m_send_node(i,1) = m_box_part.send_node_count(i);
}
if ( i < m_send_node_id.dimension_0() ) {
m_send_node_id(i) = m_box_part.send_node_id(i);
}
}
};
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP */

View File

@ -1,413 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <utility>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <limits>
#include <BoxElemPart.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
void box_partition( const size_t global_size ,
const size_t global_rank ,
const size_t global_box[][2] ,
size_t box[][2] )
{
box[0][0] = global_box[0][0] ; box[0][1] = global_box[0][1] ;
box[1][0] = global_box[1][0] ; box[1][1] = global_box[1][1] ;
box[2][0] = global_box[2][0] ; box[2][1] = global_box[2][1] ;
size_t ip = 0 ;
size_t np = global_size ;
while ( 1 < np ) {
// P = [ ip + j * portion , ip + ( j + 1 ) * portion )
size_t jip , jup ;
{
const size_t part = ( 0 == ( np % 5 ) ) ? 5 : (
( 0 == ( np % 3 ) ) ? 3 : 2 );
const size_t portion = np / part ;
if ( 2 < part || global_rank < ip + portion ) {
jip = portion * size_t( double( global_rank - ip ) / double(portion) );
jup = jip + portion ;
}
else {
jip = portion ;
jup = np ;
}
}
// Choose axis with largest count:
const size_t nb[3] = {
box[0][1] - box[0][0] ,
box[1][1] - box[1][0] ,
box[2][1] - box[2][0] };
const int axis = nb[2] > nb[1] ? ( nb[2] > nb[0] ? 2 : 0 )
: ( nb[1] > nb[0] ? 1 : 0 );
box[ axis ][1] = box[ axis ][0] + size_t( double(nb[axis]) * ( double(jup) / double(np) ));
box[ axis ][0] = box[ axis ][0] + size_t( double(nb[axis]) * ( double(jip) / double(np) ));
np = jup - jip ;
ip = ip + jip ;
}
}
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
void BoxElemPart::local( const size_t rank ,
size_t uses_elem[][2] ,
size_t owns_node[][2] ,
size_t uses_node[][2] ) const
{
if ( BoxElemPart::DecomposeElem == m_decompose ) {
Kokkos::Example::box_partition( m_global_size , rank , m_global_elem_box , uses_elem );
for ( int i = 0 ; i < 3 ; ++i ) {
owns_node[i][0] = uses_elem[i][0] ;
owns_node[i][1] = uses_elem[i][1] + ( m_global_elem_box[i][1] == uses_elem[i][1] ? 1 : 0 );
}
}
else {
const size_t global_vert[3][2] =
{ { 0 , m_global_elem_box[0][1] + 1 },
{ 0 , m_global_elem_box[1][1] + 1 },
{ 0 , m_global_elem_box[2][1] + 1 } };
Kokkos::Example::box_partition( m_global_size , rank , global_vert , owns_node );
for ( int i = 0 ; i < 3 ; ++i ) {
uses_elem[i][0] = global_vert[i][0] == owns_node[i][0] ? owns_node[i][0] : owns_node[i][0] - 1 ;
uses_elem[i][1] = global_vert[i][1] == owns_node[i][1] ? owns_node[i][1] - 1 : owns_node[i][1] ;
}
}
for ( int i = 0 ; i < 3 ; ++i ) {
uses_node[i][0] = uses_elem[i][0] ;
uses_node[i][1] = uses_elem[i][1] + 1 ;
}
if ( BoxElemPart::ElemQuadratic == m_elem_order ) {
for ( int i = 0 ; i < 3 ; ++i ) {
owns_node[i][0] = 2 * owns_node[i][0] ;
uses_node[i][0] = 2 * uses_node[i][0] ;
owns_node[i][1] = 2 * owns_node[i][1] - 1 ;
uses_node[i][1] = 2 * uses_node[i][1] - 1 ;
}
}
}
BoxElemPart::BoxElemPart(
const BoxElemPart::ElemOrder elem_order ,
const BoxElemPart::Decompose decompose ,
const size_t global_size ,
const size_t global_rank ,
const size_t elem_nx ,
const size_t elem_ny ,
const size_t elem_nz )
{
m_global_size = global_size ;
m_global_rank = global_rank ;
m_decompose = decompose ;
m_elem_order = elem_order ;
m_global_elem_box[0][0] = 0 ; m_global_elem_box[0][1] = elem_nx ;
m_global_elem_box[1][0] = 0 ; m_global_elem_box[1][1] = elem_ny ;
m_global_elem_box[2][0] = 0 ; m_global_elem_box[2][1] = elem_nz ;
m_global_node_box[0][0] = 0 ; m_global_node_box[0][1] = 0 ;
m_global_node_box[1][0] = 0 ; m_global_node_box[1][1] = 0 ;
m_global_node_box[2][0] = 0 ; m_global_node_box[2][1] = 0 ;
m_owns_node_count = 0 ;
m_send_node_count = 0 ;
m_ok = true ;
//----------------------------------------
if ( ElemLinear == elem_order ) {
m_global_node_box[0][1] = elem_nx + 1 ;
m_global_node_box[1][1] = elem_ny + 1 ;
m_global_node_box[2][1] = elem_nz + 1 ;
}
else if ( ElemQuadratic == elem_order ) {
m_global_node_box[0][1] = 2 * elem_nx + 1 ;
m_global_node_box[1][1] = 2 * elem_ny + 1 ;
m_global_node_box[2][1] = 2 * elem_nz + 1 ;
}
//----------------------------------------
local( m_global_rank , m_uses_elem_box , m_owns_node_box[0] , m_uses_node_box );
const size_t global_node_count_ = Kokkos::Example::box_count( m_global_node_box );
const size_t global_elem_count_ = Kokkos::Example::box_count( m_global_elem_box );
//----------------------------------------
size_t elem_count = Kokkos::Example::box_count( m_uses_elem_box );
size_t node_count = Kokkos::Example::box_count( m_owns_node_box[0] );
m_owns_node[0][0] = global_rank ;
m_owns_node[0][1] = node_count ;
m_owns_node_count = 1 ;
m_send_node_count = 0 ;
for ( size_t rr = 1 ; rr < m_global_size && m_ok ; ++rr ) {
const size_t rank = ( m_global_rank + rr ) % m_global_size ;
size_t elem_box[3][2] , o_node_box[3][2] , u_node_box[3][2] ;
// Boxes for process 'rank'
local( rank , elem_box , o_node_box , u_node_box );
// Box that this process uses but is owned by process 'rank'
Kokkos::Example::box_intersect( m_owns_node_box[ m_owns_node_count ] , m_uses_node_box , o_node_box );
m_owns_node[ m_owns_node_count ][1] = Kokkos::Example::box_count( m_owns_node_box[ m_owns_node_count ] );
if ( m_owns_node[ m_owns_node_count ][1] ) {
if ( ( PROC_NEIGH_MAX - 1 ) <= m_owns_node_count ) {
std::cout << "BoxElemPart exceeded maximum neighbor count" << std::endl ;
m_ok = false ;
break ;
}
m_owns_node[ m_owns_node_count ][0] = rank ;
++m_owns_node_count ;
}
// Box that this process owns and is used by process 'rank'
Kokkos::Example::box_intersect( m_send_node_box[ m_send_node_count ] , m_owns_node_box[0] , u_node_box );
m_send_node[ m_send_node_count ][1] = Kokkos::Example::box_count( m_send_node_box[ m_send_node_count ] );
if ( m_send_node[ m_send_node_count ][1] ) {
if ( ( PROC_NEIGH_MAX - 1 ) <= m_send_node_count ) {
std::cout << "BoxElemPart exceeded maximum neighbor count" << std::endl ;
m_ok = false ;
break ;
}
m_send_node[ m_send_node_count ][0] = rank ;
++m_send_node_count ;
}
// Error checking:
size_t test_box[3][2] ;
elem_count += Kokkos::Example::box_count( elem_box );
node_count += Kokkos::Example::box_count( o_node_box );
{
Kokkos::Example::box_intersect( test_box , m_owns_node_box[0] , o_node_box );
if ( Kokkos::Example::box_count( test_box ) ) {
std::cout << "Box partitioning error" << std::endl ;
std::cout << "owns_node[" << m_global_rank << "]{"
<< " [" << m_owns_node_box[0][0][0] << "," << m_owns_node_box[0][0][1] << ")"
<< " [" << m_owns_node_box[0][1][0] << "," << m_owns_node_box[0][1][1] << ")"
<< " [" << m_owns_node_box[0][2][0] << "," << m_owns_node_box[0][2][1] << ")"
<< "} intersects"
<< " owns_node[" << rank << "]{"
<< " [" << o_node_box[0][0] << "," << o_node_box[0][1] << ")"
<< " [" << o_node_box[1][0] << "," << o_node_box[1][1] << ")"
<< " [" << o_node_box[2][0] << "," << o_node_box[2][1] << ")"
<< "}" << std::endl ;
m_ok = false ;
break ;
}
}
if ( DecomposeElem == decompose ) {
Kokkos::Example::box_intersect( test_box , m_uses_elem_box , elem_box );
if ( Kokkos::Example::box_count( test_box ) ) {
std::cout << "Box partitioning error" << std::endl ;
std::cout << "ElemBox[" << m_global_rank << "]{"
<< " [" << m_uses_elem_box[0][0] << "," << m_uses_elem_box[0][1] << ")"
<< " [" << m_uses_elem_box[1][0] << "," << m_uses_elem_box[1][1] << ")"
<< " [" << m_uses_elem_box[2][0] << "," << m_uses_elem_box[2][1] << ")"
<< "} intersects"
<< " ElemBox[" << rank << "]{"
<< " [" << elem_box[0][0] << "," << elem_box[0][1] << ")"
<< " [" << elem_box[1][0] << "," << elem_box[1][1] << ")"
<< " [" << elem_box[2][0] << "," << elem_box[2][1] << ")"
<< "}" << std::endl ;
m_ok = false ;
break ;
}
}
}
// Sentinal values at the end of the owns and send lists:
m_owns_node[ m_owns_node_count ][0] = ~0u ;
m_owns_node[ m_owns_node_count ][1] = ~0u ;
m_owns_node_box[ m_owns_node_count ][0][0] = 0u ; m_owns_node_box[ m_owns_node_count ][0][0] = ~0u ;
m_owns_node_box[ m_owns_node_count ][1][0] = 0u ; m_owns_node_box[ m_owns_node_count ][1][0] = ~0u ;
m_owns_node_box[ m_owns_node_count ][2][0] = 0u ; m_owns_node_box[ m_owns_node_count ][2][0] = ~0u ;
m_send_node[ m_send_node_count ][0] = ~0u ;
m_send_node[ m_send_node_count ][1] = ~0u ;
m_send_node_box[ m_send_node_count ][0][0] = 0u ; m_send_node_box[ m_send_node_count ][0][0] = ~0u ;
m_send_node_box[ m_send_node_count ][1][0] = 0u ; m_send_node_box[ m_send_node_count ][1][0] = ~0u ;
m_send_node_box[ m_send_node_count ][2][0] = 0u ; m_send_node_box[ m_send_node_count ][2][0] = ~0u ;
{
size_t count = 0 ;
for ( size_t i = 0 ; i < m_owns_node_count ; ++i ) {
count += m_owns_node[i][1] ;
}
if ( count != Kokkos::Example::box_count( m_uses_node_box ) ) {
std::cout << "Node uses count = " << Kokkos::Example::box_count( m_uses_node_box )
<< " error count = " << count << std::endl ;
m_ok = false ;
}
}
if ( global_node_count_ != node_count ) {
std::cout << "Node count = " << global_node_count_ << " overlap error count = " << node_count << std::endl ;
m_ok = false ;
}
if ( DecomposeElem == decompose && global_elem_count_ != elem_count ) {
std::cout << "Elem count = " << global_elem_count_ << " overlap error count = " << elem_count << std::endl ;
m_ok = false ;
}
if ( ! m_ok ) {
for ( int i = 0 ; i < 3 ; ++i ) { for ( int j = 0 ; j < 2 ; ++j ) {
m_global_elem_box[i][j] = 0 ;
m_global_node_box[i][j] = 0 ;
m_uses_elem_box[i][j] = 0 ;
m_uses_node_box[i][j] = 0 ;
}}
m_owns_node_count = 0 ;
m_send_node_count = 0 ;
}
}
void BoxElemPart::print( std::ostream & s ) const
{
s << "BoxElemPart P[" << m_global_rank << ":" << m_global_size << "]"
<< std::endl
<< " elem_box {"
<< " [" << m_uses_elem_box[0][0] << "," << m_uses_elem_box[0][1] << ")"
<< " [" << m_uses_elem_box[1][0] << "," << m_uses_elem_box[1][1] << ")"
<< " [" << m_uses_elem_box[2][0] << "," << m_uses_elem_box[2][1] << ")"
<< " } / {"
<< " [" << m_global_elem_box[0][0] << "," << m_global_elem_box[0][1] << ")"
<< " [" << m_global_elem_box[1][0] << "," << m_global_elem_box[1][1] << ")"
<< " [" << m_global_elem_box[2][0] << "," << m_global_elem_box[2][1] << ")"
<< " }"
<< std::endl
<< " node_box {"
<< " [" << m_owns_node_box[0][0][0] << "," << m_owns_node_box[0][0][1] << ")"
<< " [" << m_owns_node_box[0][1][0] << "," << m_owns_node_box[0][1][1] << ")"
<< " [" << m_owns_node_box[0][2][0] << "," << m_owns_node_box[0][2][1] << ")"
<< " } / {"
<< " [" << m_uses_node_box[0][0] << "," << m_uses_node_box[0][1] << ")"
<< " [" << m_uses_node_box[1][0] << "," << m_uses_node_box[1][1] << ")"
<< " [" << m_uses_node_box[2][0] << "," << m_uses_node_box[2][1] << ")"
<< " } / {"
<< " [" << m_global_node_box[0][0] << "," << m_global_node_box[0][1] << ")"
<< " [" << m_global_node_box[1][0] << "," << m_global_node_box[1][1] << ")"
<< " [" << m_global_node_box[2][0] << "," << m_global_node_box[2][1] << ")"
<< " }"
<< std::endl ;
for ( size_t i = 1 ; i < m_owns_node_count ; ++i ) {
s << " P[" << m_owns_node[i][0] << "]"
<< " recv node_box {"
<< " [" << m_owns_node_box[i][0][0] << "," << m_owns_node_box[i][0][1] << ")"
<< " [" << m_owns_node_box[i][1][0] << "," << m_owns_node_box[i][1][1] << ")"
<< " [" << m_owns_node_box[i][2][0] << "," << m_owns_node_box[i][2][1] << ")"
<< " }"
<< std::endl ;
}
for ( size_t i = 0 ; i < m_send_node_count ; ++i ) {
s << " P[" << m_send_node[i][0] << "]"
<< " send node_box {"
<< " [" << m_send_node_box[i][0][0] << "," << m_send_node_box[i][0][1] << ")"
<< " [" << m_send_node_box[i][1][0] << "," << m_send_node_box[i][1][1] << ")"
<< " [" << m_send_node_box[i][2][0] << "," << m_send_node_box[i][2][1] << ")"
<< " }"
<< std::endl ;
}
}
} /* namespace Example */
} /* namespace Kokkos */
//----------------------------------------------------------------------------

View File

@ -1,320 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BOXELEMPART_HPP
#define KOKKOS_BOXELEMPART_HPP
#include <utility>
#include <ostream>
#include <Kokkos_Macros.hpp>
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
KOKKOS_INLINE_FUNCTION
void box_intersect( size_t box[][2] ,
const size_t boxA[][2] ,
const size_t boxB[][2] )
{
for ( int i = 0 ; i < 3 ; ++i ) {
box[i][0] = boxA[i][0] > boxB[i][0] ? boxA[i][0] : boxB[i][0] ;
box[i][1] = boxA[i][1] < boxB[i][1] ? boxA[i][1] : boxB[i][1] ;
if ( box[i][0] > box[i][1] ) box[i][1] = box[i][0] ;
}
}
KOKKOS_INLINE_FUNCTION
size_t box_count( const size_t box[][2] )
{
return size_t( box[0][1] - box[0][0] ) *
size_t( box[1][1] - box[1][0] ) *
size_t( box[2][1] - box[2][0] );
}
KOKKOS_INLINE_FUNCTION
void box_ghost_layer( const size_t global_box[][2] ,
const size_t local_box[][2] ,
const size_t ghost_layer ,
size_t ghost_box[][2] )
{
for ( int i = 0 ; i < 3 ; ++i ) {
ghost_box[i][0] = global_box[i][0] + ghost_layer > local_box[i][0] ? global_box[i][0] : local_box[i][0] - ghost_layer ;
ghost_box[i][1] = global_box[i][1] < local_box[i][1] + ghost_layer ? global_box[i][1] : local_box[i][1] + ghost_layer ;
}
}
void box_partition( const size_t global_size ,
const size_t global_rank ,
const size_t global_box[][2] ,
size_t box[][2] );
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Example {
/** \brief Partition a box of hexahedral elements among subdomains.
*
* Nodes are ordered locally as follows:
* { owned_by[ this_process ] ,
* owned_by[ neighbor_process[0] ] ,
* owned_by[ neighbor_process[1] ] ,
* owned_by[ neighbor_process[2] ] ,
* ... };
*/
class BoxElemPart {
public:
enum Decompose { DecomposeNode , DecomposeElem };
enum ElemOrder { ElemLinear , ElemQuadratic };
bool ok() const { return m_ok ; }
BoxElemPart( const ElemOrder elem_order ,
const Decompose decompose ,
const size_t global_size ,
const size_t global_rank ,
const size_t elem_nx ,
const size_t elem_ny ,
const size_t elem_nz );
KOKKOS_INLINE_FUNCTION
size_t global_elem_count() const
{ return Kokkos::Example::box_count( m_global_elem_box ); }
KOKKOS_INLINE_FUNCTION
size_t global_node_count() const
{ return Kokkos::Example::box_count( m_global_node_box ); }
KOKKOS_INLINE_FUNCTION
size_t uses_elem_count() const
{ return Kokkos::Example::box_count( m_uses_elem_box ); }
KOKKOS_INLINE_FUNCTION
size_t owns_node_count() const
{ return Kokkos::Example::box_count( m_owns_node_box[0] ); }
KOKKOS_INLINE_FUNCTION
size_t uses_node_count() const
{ return Kokkos::Example::box_count( m_uses_node_box ); }
//----------------------------------------
KOKKOS_INLINE_FUNCTION
size_t uses_elem_offset( const size_t ix ,
const size_t iy ,
const size_t iz ) const
{
return size_t( ix - m_uses_elem_box[0][0] ) + size_t( m_uses_elem_box[0][1] - m_uses_elem_box[0][0] ) * (
size_t( iy - m_uses_elem_box[1][0] ) + size_t( m_uses_elem_box[1][1] - m_uses_elem_box[1][0] ) * (
size_t( iz - m_uses_elem_box[2][0] ) ) );
}
KOKKOS_INLINE_FUNCTION
void uses_elem_coord( size_t lid , size_t c[] ) const
{
const size_t nx = m_uses_elem_box[0][1] - m_uses_elem_box[0][0] ;
const size_t ny = m_uses_elem_box[1][1] - m_uses_elem_box[1][0] ;
c[0] = m_uses_elem_box[0][0] + lid % nx ; lid /= nx ;
c[1] = m_uses_elem_box[1][0] + lid % ny ; lid /= ny ;
c[2] = m_uses_elem_box[2][0] + lid ;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
size_t global_coord_max( size_t axis ) const
{ return m_global_node_box[axis][1] - 1 ; }
//----------------------------------------
KOKKOS_INLINE_FUNCTION
void local_node_coord( size_t lid , size_t coord[] ) const
{
// Local id within an 'owns' block (has sentinal)
size_t j = 0 ;
while ( m_owns_node[j][1] <= lid ) { lid -= m_owns_node[j][1] ; ++j ; }
// Map to global coordinates:
const size_t nx = m_owns_node_box[j][0][1] - m_owns_node_box[j][0][0] ;
const size_t ny = m_owns_node_box[j][1][1] - m_owns_node_box[j][1][0] ;
coord[0] = m_owns_node_box[j][0][0] + lid % nx ; lid /= nx ;
coord[1] = m_owns_node_box[j][1][0] + lid % ny ; lid /= ny ;
coord[2] = m_owns_node_box[j][2][0] + lid ;
}
KOKKOS_INLINE_FUNCTION
size_t local_node_id( const size_t c[] ) const
{
// Find which 'owns' block and accumulate the offset of this block:
size_t lid = 0 ;
size_t j = 0 ;
while ( ! ( m_owns_node_box[j][0][0] <= c[0] && c[0] < m_owns_node_box[j][0][1] &&
m_owns_node_box[j][1][0] <= c[1] && c[1] < m_owns_node_box[j][1][1] &&
m_owns_node_box[j][2][0] <= c[2] && c[2] < m_owns_node_box[j][2][1] ) ) {
lid += m_owns_node[j][1] ;
++j ;
}
// Map offset to the block plus offset within the block:
return lid +
size_t( c[0] - m_owns_node_box[j][0][0] ) + size_t( m_owns_node_box[j][0][1] - m_owns_node_box[j][0][0] ) * (
size_t( c[1] - m_owns_node_box[j][1][0] ) + size_t( m_owns_node_box[j][1][1] - m_owns_node_box[j][1][0] ) * (
size_t( c[2] - m_owns_node_box[j][2][0] ) ) );
}
KOKKOS_INLINE_FUNCTION
size_t global_node_id( const size_t c[] ) const
{
return size_t( c[0] - m_global_node_box[0][0] ) + size_t( m_global_node_box[0][1] - m_global_node_box[0][0] ) * (
size_t( c[1] - m_global_node_box[1][0] ) + size_t( m_global_node_box[1][1] - m_global_node_box[1][0] ) * (
size_t( c[2] - m_global_node_box[2][0] ) ) );
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION
size_t recv_node_msg_count() const { return m_owns_node_count - 1 ; }
KOKKOS_INLINE_FUNCTION
size_t recv_node_rank( size_t msg ) const { return m_owns_node[msg+1][0] ; }
KOKKOS_INLINE_FUNCTION
size_t recv_node_count( size_t msg ) const { return m_owns_node[msg+1][1] ; }
//----------------------------------------
KOKKOS_INLINE_FUNCTION
size_t send_node_msg_count() const { return m_send_node_count ; }
KOKKOS_INLINE_FUNCTION
size_t send_node_rank( size_t msg ) const { return m_send_node[msg][0] ; }
KOKKOS_INLINE_FUNCTION
size_t send_node_count( size_t msg ) const { return m_send_node[msg][1] ; }
KOKKOS_INLINE_FUNCTION
size_t send_node_id_count() const
{
size_t count = 0 ;
for ( size_t i = 0 ; i < m_send_node_count ; ++i ) {
count += m_send_node[i][1] ;
}
return count ;
}
KOKKOS_INLINE_FUNCTION
size_t send_node_id( size_t item ) const
{
// Find which send list this send item is in:
size_t j = 0 ;
while ( m_send_node[j][1] <= item ) { item -= m_send_node[j][1] ; ++j ; }
// Map to global coordinate:
const size_t nx = m_send_node_box[j][0][1] - m_send_node_box[j][0][0] ;
const size_t ny = m_send_node_box[j][1][1] - m_send_node_box[j][1][0] ;
size_t c[3] ;
c[0] = m_send_node_box[j][0][0] + item % nx ; item /= nx ;
c[1] = m_send_node_box[j][1][0] + item % ny ; item /= ny ;
c[2] = m_send_node_box[j][2][0] + item ;
// Map to local id:
return size_t( c[0] - m_owns_node_box[0][0][0] ) + size_t( m_owns_node_box[0][0][1] - m_owns_node_box[0][0][0] ) * (
size_t( c[1] - m_owns_node_box[0][1][0] ) + size_t( m_owns_node_box[0][1][1] - m_owns_node_box[0][1][0] ) * (
size_t( c[2] - m_owns_node_box[0][2][0] ) ) );
}
//----------------------------------------
void print( std::ostream & s ) const ;
private:
// Maximum number of processes in a neighborhood, including this process
enum { PROC_NEIGH_MAX = 64 };
void local( const size_t rank ,
size_t uses_elem[][2] ,
size_t owns_node[][2] ,
size_t uses_node[][2] ) const ;
size_t m_global_size ;
size_t m_global_rank ;
Decompose m_decompose ;
ElemOrder m_elem_order ;
size_t m_global_elem_box[3][2] ;
size_t m_global_node_box[3][2] ;
size_t m_uses_elem_box[3][2] ;
size_t m_uses_node_box[3][2] ;
// [ processor rank , count ]
size_t m_owns_node_box[ PROC_NEIGH_MAX ][3][2] ;
size_t m_owns_node[ PROC_NEIGH_MAX ][2] ;
size_t m_owns_node_count ;
size_t m_send_node_box[ PROC_NEIGH_MAX ][3][2] ;
size_t m_send_node[ PROC_NEIGH_MAX ][2] ;
size_t m_send_node_count ;
bool m_ok ;
};
} // namespace Example
} // namespace Kokkos
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_BOXELEMPART_HPP */

View File

@ -1,13 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
SET(SOURCES_TEST Main.cpp TestFixture.cpp BoxElemPart.cpp )
# Automatically picks up 'kokkosexample_fixture'
TRIBITS_ADD_EXECUTABLE_AND_TEST(
TestFixture
SOURCES ${SOURCES_TEST}
)

View File

@ -1,270 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HEXELEMENT_HPP
#define KOKKOS_HEXELEMENT_HPP
namespace Kokkos {
namespace Example {
template< unsigned NodeCount >
class HexElement_TensorData ;
template< unsigned NodeCount , class Device >
class HexElement_TensorEval ;
//----------------------------------------------------------------------------
/** \brief Evaluate Hex element on interval [-1,1]^3 */
template<>
class HexElement_TensorData< 8 > {
public:
static const unsigned element_node_count = 8 ;
static const unsigned spatial_dimension = 3 ;
static const unsigned integration_count_1d = 2 ;
static const unsigned function_count_1d = 2 ;
float values_1d [ function_count_1d ][ integration_count_1d ];
float derivs_1d [ function_count_1d ][ integration_count_1d ];
float weights_1d[ integration_count_1d ];
unsigned char eval_map[ element_node_count ][4] ;
static float eval_value_1d( const unsigned jf , const float x )
{
return 0 == jf ? 0.5 * ( 1.0 - x ) : (
1 == jf ? 0.5 * ( 1.0 + x ) : 0 );
}
static float eval_deriv_1d( const unsigned jf , const float )
{
return 0 == jf ? -0.5 : (
1 == jf ? 0.5 : 0 );
}
HexElement_TensorData()
{
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
{ { 0 , 0 , 0 },
{ 1 , 0 , 0 },
{ 1 , 1 , 0 },
{ 0 , 1 , 0 },
{ 0 , 0 , 1 },
{ 1 , 0 , 1 },
{ 1 , 1 , 1 },
{ 0 , 1 , 1 } };
weights_1d[0] = 1 ;
weights_1d[1] = 1 ;
const float points_1d[ integration_count_1d ] =
{ -0.577350269 , 0.577350269 };
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
eval_map[i][0] = tmp_map[i][0];
eval_map[i][1] = tmp_map[i][1];
eval_map[i][2] = tmp_map[i][2];
}
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
}}
}
};
//----------------------------------------------------------------------------
template<>
class HexElement_TensorData< 27 > {
public:
static const unsigned element_node_count = 27 ;
static const unsigned spatial_dimension = 3 ;
static const unsigned integration_count_1d = 3 ;
static const unsigned function_count_1d = 3 ;
float values_1d [ function_count_1d ][ integration_count_1d ];
float derivs_1d [ function_count_1d ][ integration_count_1d ];
float weights_1d[ integration_count_1d ];
unsigned char eval_map[ element_node_count ][4] ;
// sizeof(EvaluateElementHex) = 111 bytes =
// sizeof(float) * 9 +
// sizeof(float) * 9 +
// sizeof(float) * 3 +
// sizeof(char) * 27
static float eval_value_1d( const unsigned jf , const float p )
{
return 0 == jf ? 0.5 * p * ( p - 1 ) : (
1 == jf ? 1.0 - p * p : (
2 == jf ? 0.5 * p * ( p + 1 ) : 0 ));
}
static float eval_deriv_1d( const unsigned jf , const float p )
{
return 0 == jf ? p - 0.5 : (
1 == jf ? -2.0 * p : (
2 == jf ? p + 0.5 : 0 ));
}
HexElement_TensorData()
{
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
{ { 0 , 0 , 0 },
{ 2 , 0 , 0 },
{ 2 , 2 , 0 },
{ 0 , 2 , 0 },
{ 0 , 0 , 2 },
{ 2 , 0 , 2 },
{ 2 , 2 , 2 },
{ 0 , 2 , 2 },
{ 1 , 0 , 0 },
{ 2 , 1 , 0 },
{ 1 , 2 , 0 },
{ 0 , 1 , 0 },
{ 0 , 0 , 1 },
{ 2 , 0 , 1 },
{ 2 , 2 , 1 },
{ 0 , 2 , 1 },
{ 1 , 0 , 2 },
{ 2 , 1 , 2 },
{ 1 , 2 , 2 },
{ 0 , 1 , 2 },
{ 1 , 1 , 1 },
{ 1 , 1 , 0 },
{ 1 , 1 , 2 },
{ 0 , 1 , 1 },
{ 2 , 1 , 1 },
{ 1 , 0 , 1 },
{ 1 , 2 , 1 } };
// Interval [-1,1]
weights_1d[0] = 0.555555556 ;
weights_1d[1] = 0.888888889 ;
weights_1d[2] = 0.555555556 ;
const float points_1d[3] = { -0.774596669 ,
0.000000000 ,
0.774596669 };
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
eval_map[i][0] = tmp_map[i][0];
eval_map[i][1] = tmp_map[i][1];
eval_map[i][2] = tmp_map[i][2];
}
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
}}
}
};
//----------------------------------------------------------------------------
template< unsigned NodeCount >
class HexElement_Data {
public:
static const unsigned spatial_dimension = 3 ;
static const unsigned element_node_count = NodeCount ;
static const unsigned integration_count = NodeCount ;
static const unsigned function_count = NodeCount ;
float weights[ integration_count ] ;
float values[ integration_count ][ function_count ];
float gradients[ integration_count ][ spatial_dimension ][ function_count ];
HexElement_Data()
{
HexElement_TensorData< NodeCount > tensor_data ;
for ( unsigned ip = 0 ; ip < integration_count ; ++ip ) {
const unsigned ipx = tensor_data.eval_map[ip][0] ;
const unsigned ipy = tensor_data.eval_map[ip][1] ;
const unsigned ipz = tensor_data.eval_map[ip][2] ;
weights[ip] = tensor_data.weights_1d[ ipx ] *
tensor_data.weights_1d[ ipy ] *
tensor_data.weights_1d[ ipz ] ;
for ( unsigned jf = 0 ; jf < function_count ; ++jf ) {
const unsigned jfx = tensor_data.eval_map[jf][0] ;
const unsigned jfy = tensor_data.eval_map[jf][1] ;
const unsigned jfz = tensor_data.eval_map[jf][2] ;
values[ip][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
tensor_data.values_1d[ ipy ][ jfy ] *
tensor_data.values_1d[ ipz ][ jfz ] ;
gradients[ip][0][jf] = tensor_data.derivs_1d[ ipx ][ jfx ] *
tensor_data.values_1d[ ipy ][ jfy ] *
tensor_data.values_1d[ ipz ][ jfz ] ;
gradients[ip][1][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
tensor_data.derivs_1d[ ipy ][ jfy ] *
tensor_data.values_1d[ ipz ][ jfz ] ;
gradients[ip][2][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
tensor_data.values_1d[ ipy ][ jfy ] *
tensor_data.derivs_1d[ ipz ][ jfz ] ;
}
}
}
};
//----------------------------------------------------------------------------
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_HEXELEMENT_HPP */

View File

@ -1,304 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#include <utility>
#include <iostream>
#include <Kokkos_Core.hpp>
#include <BoxElemPart.hpp>
namespace Kokkos {
namespace Example {
template< class > void test_fixture();
}
}
int test_box( const size_t global_size
, const size_t global_box[][2]
, const bool print_verbose )
{
size_t global_count = 0 ;
size_t global_max = 0 ;
size_t global_min = Kokkos::Example::box_count( global_box );
size_t global_box_max[3][2] = { { 0 , 0 } , { 0 , 0 } , { 0 , 0 } };
size_t global_box_min[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } };
size_t intersect_error = 0 ;
size_t neighbor_max = 0 ;
for ( size_t global_rank = 0 ; global_rank < global_size ; ++global_rank ) {
size_t box[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } };
size_t ghost_box[3][2] ;
size_t neighbor_count = 0 ;
Kokkos::Example::box_partition( global_size , global_rank , global_box , box );
Kokkos::Example::box_ghost_layer( global_box , box , 1 , ghost_box );
{
const size_t n = Kokkos::Example::box_count( box );
for ( int i = 0 ; i < 3 ; ++i ) {
if ( ( box[i][1] - box[i][0] ) < ( global_box_min[i][1] - global_box_min[i][0] ) ) {
global_box_min[i][0] = box[i][0] ;
global_box_min[i][1] = box[i][1] ;
}
if ( ( box[i][1] - box[i][0] ) > ( global_box_max[i][1] - global_box_max[i][0] ) ) {
global_box_max[i][0] = box[i][0] ;
global_box_max[i][1] = box[i][1] ;
}
}
global_max = std::max( global_max , n );
global_min = std::min( global_min , n );
global_count += n ;
}
for ( size_t other_rank = 0 ; other_rank < global_size ; ++other_rank ) {
if ( other_rank == global_rank ) continue ;
size_t other_box[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } };
size_t intersect_box[3][2] ;
Kokkos::Example::box_partition( global_size , other_rank , global_box , other_box );
Kokkos::Example::box_intersect( intersect_box , box , other_box );
const size_t n = Kokkos::Example::box_count( intersect_box );
intersect_error += n ;
Kokkos::Example::box_intersect( intersect_box , ghost_box , other_box );
neighbor_count += Kokkos::Example::box_count( intersect_box ) ? 1 : 0 ;
if ( n ) {
std::cout << "box partition intersection error" << std::endl ;
std::cout << "box = {"
<< " [ " << box[0][0] << " , " << box[0][1] << " )"
<< " [ " << box[1][0] << " , " << box[1][1] << " )"
<< " [ " << box[2][0] << " , " << box[2][1] << " )"
<< " }" << std::endl ;
std::cout << "other_box = {"
<< " [ " << other_box[0][0] << " , " << other_box[0][1] << " )"
<< " [ " << other_box[1][0] << " , " << other_box[1][1] << " )"
<< " [ " << other_box[2][0] << " , " << other_box[2][1] << " )"
<< " }" << std::endl ;
return 0 ;
}
}
neighbor_max = std::max( neighbor_max , neighbor_count );
}
if ( print_verbose ) {
std::cout << "global_part = " << global_size << std::endl ;
std::cout << "global_box = { "
<< " [ " << global_box[0][0] << " .. " << global_box[0][1] << " ) X"
<< " [ " << global_box[1][0] << " .. " << global_box[1][1] << " ) X"
<< " [ " << global_box[2][0] << " .. " << global_box[2][1] << " )"
<< " }" << std::endl ;
std::cout << "count( global_box ) = " << Kokkos::Example::box_count( global_box ) << std::endl ;
std::cout << "sum partition( global_box ) = " << global_count << std::endl ;
std::cout << "avg partition( global_box ) = " << size_t( double(global_count) / double(global_size)) << std::endl ;
std::cout << "min partition( global_box ) = " << global_min << std::endl ;
std::cout << "min part X ( global_box ) = [ " << global_box_min[0][0] << " .. " << global_box_min[0][1] << " )" << std::endl ;
std::cout << "min part Y ( global_box ) = [ " << global_box_min[1][0] << " .. " << global_box_min[1][1] << " )" << std::endl ;
std::cout << "min part Z ( global_box ) = [ " << global_box_min[2][0] << " .. " << global_box_min[2][1] << " )" << std::endl ;
std::cout << "max partition( global_box ) = " << global_max << std::endl ;
std::cout << "max part X ( global_box ) = [ " << global_box_max[0][0] << " .. " << global_box_max[0][1] << " )" << std::endl ;
std::cout << "max part Y ( global_box ) = [ " << global_box_max[1][0] << " .. " << global_box_max[1][1] << " )" << std::endl ;
std::cout << "max part Z ( global_box ) = [ " << global_box_max[2][0] << " .. " << global_box_max[2][1] << " )" << std::endl ;
std::cout << "sum intersect( global_box ) = " << intersect_error << std::endl ;
std::cout << "max neighbor = " << neighbor_max << std::endl ;
}
return neighbor_max ;
}
void test_elem()
{
const Kokkos::Example::BoxElemPart::Decompose
decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ; // DecomposeElem | DecomposeNode ;
const size_t global_size = 256 ;
const size_t global_nx = 100 ;
const size_t global_ny = 120 ;
const size_t global_nz = 140 ;
double node_count_avg = 0 ;
size_t node_count_max = 0 ;
size_t node_count_min = ( global_nx + 1 ) * ( global_ny + 1 ) * ( global_nz + 1 );
double elem_count_avg = 0 ;
size_t elem_count_max = 0 ;
size_t elem_count_min = global_nx * global_ny * global_nz ;
double recv_count_avg = 0 ;
size_t recv_count_max = 0 ;
size_t recv_count_min = global_size ;
double send_count_avg = 0 ;
size_t send_count_max = 0 ;
size_t send_count_min = global_size ;
for ( size_t r = 0 ; r < global_size ; ++r ) {
const Kokkos::Example::BoxElemPart
fixture( Kokkos::Example::BoxElemPart::ElemLinear ,
decompose , global_size , r , global_nx , global_ny , global_nz );
// Print a sample:
// if ( r == global_size * 2 / 3 ) fixture.print( std::cout );
// Verify recv/send alignment:
{
size_t recv_lid = fixture.owns_node_count();
for ( size_t i = 0 ; i < fixture.recv_node_msg_count() ; ++i ) {
const size_t recv_rank = fixture.recv_node_rank( i );
const size_t recv_count = fixture.recv_node_count( i );
const Kokkos::Example::BoxElemPart other_fixture(
Kokkos::Example::BoxElemPart::ElemLinear ,
decompose , global_size , recv_rank , global_nx , global_ny , global_nz );
size_t send_item = 0 ;
size_t j = 0 ;
while ( j < other_fixture.send_node_msg_count() && other_fixture.send_node_rank(j) != r ) {
send_item += other_fixture.send_node_count( j );
++j ;
}
if ( recv_count != other_fixture.send_node_count(j) ) {
std::cout << "Error P[" << r << "].recv(" << recv_count << ") != "
<< "P[" << recv_rank << "].send(" << other_fixture.send_node_count(j) << ")"
<< std::endl ;
}
else {
for ( size_t k = 0 ; k < recv_count ; ++k , ++send_item , ++recv_lid ) {
const size_t send_lid = other_fixture.send_node_id( send_item );
size_t recv_coord[3] , send_coord[3] ;
fixture.local_node_coord( recv_lid , recv_coord );
other_fixture.local_node_coord( send_lid , send_coord );
if ( recv_coord[0] != send_coord[0] ||
recv_coord[1] != send_coord[1] ||
recv_coord[2] != send_coord[2] ) {
std::cout << "Error P[" << r << "].recv[" << recv_lid << "]{ "
<< recv_coord[0] << " , "
<< recv_coord[1] << " , "
<< recv_coord[2] << " } != "
<< "P[" << recv_rank << "].send[" << send_lid << "]{ "
<< send_coord[0] << " , "
<< send_coord[1] << " , "
<< send_coord[2] << " }"
<< std::endl ;
}
}
}
}
}
node_count_avg += fixture.owns_node_count();
elem_count_avg += fixture.uses_elem_count();
recv_count_avg += fixture.recv_node_msg_count();
send_count_avg += fixture.send_node_msg_count();
elem_count_min = std::min( (size_t) fixture.uses_elem_count() , elem_count_min );
elem_count_max = std::max( (size_t) fixture.uses_elem_count() , elem_count_max );
node_count_min = std::min( (size_t) fixture.owns_node_count() , node_count_min );
node_count_max = std::max( (size_t) fixture.owns_node_count() , node_count_max );
recv_count_max = std::max( (size_t) fixture.recv_node_msg_count() , recv_count_max );
recv_count_min = std::min( (size_t) fixture.recv_node_msg_count() , recv_count_min );
send_count_max = std::max( (size_t) fixture.send_node_msg_count() , send_count_max );
send_count_min = std::min( (size_t) fixture.send_node_msg_count() , send_count_min );
}
node_count_avg /= double(global_size);
elem_count_avg /= double(global_size);
recv_count_avg /= double(global_size);
send_count_avg /= double(global_size);
std::cout << "Elem min(" << elem_count_min << ") avg(" << elem_count_avg << ") max(" << elem_count_max << ") " << std::endl
<< "Node min(" << node_count_min << ") avg(" << node_count_avg << ") max(" << node_count_max << ") " << std::endl
<< "Recv min(" << recv_count_min << ") avg(" << recv_count_avg << ") max(" << recv_count_max << ") " << std::endl
<< "Send min(" << send_count_min << ") avg(" << send_count_avg << ") max(" << send_count_max << ") " << std::endl
;
}
int main()
{
for ( int i = 1 ; i <= 32 ; ++i ) {
const size_t global_size = 16 * i ;
const size_t global_box[3][2] = { { 0 , 65 } , { 0 , 65 } , { 0 , 65 } };
if ( 30 < test_box( global_size , global_box , false ) ) {
test_box( global_size , global_box , true );
}
}
// test_elem();
{
std::cout << "test_fixture< Host >" << std::endl ;
Kokkos::HostSpace::execution_space::initialize( 1 );
Kokkos::Example::test_fixture< Kokkos::HostSpace::execution_space >();
Kokkos::HostSpace::execution_space::finalize();
}
#if defined( KOKKOS_HAVE_CUDA )
{
std::cout << "test_fixture< Cuda >" << std::endl ;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
Kokkos::Example::test_fixture< Kokkos::Cuda >();
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
#endif
}

View File

@ -1,49 +0,0 @@
KOKKOS_PATH = ../..
vpath %.cpp ${KOKKOS_PATH}/example/fixture
EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp )
default: build_all
echo "End Build"
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = nvcc_wrapper
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += \
-I${KOKKOS_PATH}/example/common \
-I${KOKKOS_PATH}/example/fixture
OBJ_EXAMPLE_FIXTURE = Main.o TestFixture.o BoxElemPart.o
EXE_EXAMPLE_FIXTURE = KokkosExample_Fixture
TARGETS = $(EXE_EXAMPLE_FIXTURE)
#TEST_TARGETS =
$(EXE_EXAMPLE_FIXTURE) : $(OBJ_EXAMPLE_FIXTURE) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FIXTURE) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FIXTURE)
build_all : $(TARGETS)
test : build_all
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,58 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <TestFixture.hpp>
namespace Kokkos {
namespace Example {
template void test_fixture< Kokkos::HostSpace::execution_space >();
#if defined( KOKKOS_HAVE_CUDA )
template void test_fixture<Kokkos::Cuda>();
#endif
} /* namespace Example */
} /* namespace Kokkos */

View File

@ -1,156 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_EXAMPLE_TESTFIXTURE_HPP
#define KOKKOS_EXAMPLE_TESTFIXTURE_HPP
#include <utility>
#include <iostream>
#include <Kokkos_Core.hpp>
#include <BoxElemPart.hpp>
#include <BoxElemFixture.hpp>
namespace Kokkos {
namespace Example {
template< class Device >
struct FixtureVerifyElemNodeCoord
{
typedef Device execution_space ;
typedef struct { size_t success , error ; } value_type ;
typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > FixtureType ;
FixtureType m_fixture ;
KOKKOS_INLINE_FUNCTION
void init( value_type & update ) const { update.success = update.error = 0 ; }
KOKKOS_INLINE_FUNCTION
void join( volatile value_type & update ,
volatile const value_type & input ) const
{
update.success += input.success ;
update.error += input.error ;
}
KOKKOS_INLINE_FUNCTION
void operator()( size_t ielem , value_type & update ) const
{
unsigned node_coord[ FixtureType::ElemNode ][3] ;
for ( unsigned i = 0 ; i < FixtureType::ElemNode ; ++i ) {
const unsigned node_id = m_fixture.elem_node(ielem,i);
node_coord[i][0] = m_fixture.node_grid(node_id,0);
node_coord[i][1] = m_fixture.node_grid(node_id,1);
node_coord[i][2] = m_fixture.node_grid(node_id,2);
}
int error = 0 ;
for ( unsigned i = 1 ; i < FixtureType::ElemNode ; ++i ) {
if ( node_coord[0][0] + m_fixture.elem_node_local(i,0) != node_coord[i][0] ||
node_coord[0][1] + m_fixture.elem_node_local(i,1) != node_coord[i][1] ||
node_coord[0][2] + m_fixture.elem_node_local(i,2) != node_coord[i][2] ) {
error = 1 ;
}
}
if ( error ) {
++update.error ;
}
else {
++update.success ;
}
}
FixtureVerifyElemNodeCoord( const FixtureType & f ) : m_fixture(f) {}
};
template< class Device >
void test_fixture()
{
typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > FixtureType ;
const Kokkos::Example::BoxElemPart::Decompose
decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ; // DecomposeElem | DecomposeNode ;
const unsigned global_size = 256 ;
const unsigned global_nx = 400 ;
const unsigned global_ny = 400 ;
const unsigned global_nz = 400 ;
for ( unsigned my_rank = 0 ; my_rank < global_size ; ++my_rank ) {
const FixtureType fixture( decompose , global_size , my_rank , global_nx , global_ny , global_nz );
// Verify grid coordinates of element's nodes
typename FixtureVerifyElemNodeCoord<Device>::value_type result = { 0 , 0 };
Kokkos::parallel_reduce( fixture.elem_node().dimension_0() , FixtureVerifyElemNodeCoord<Device>( fixture ) , result );
if ( result.error ) {
std::cout << "P[" << my_rank << ":" << global_size
<< "] Fixture elem_node_coord"
<< " success(" << result.success << ")"
<< " error(" << result.error << ")"
<< std::endl ;
}
// Check send/recv alignment
}
}
} /* namespace Example */
} /* namespace Kokkos */
#endif /* #ifndef KOKKOS_EXAMPLE_TESTFIXTURE_HPP */

View File

@ -1,17 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SOURCES "")
SET(SOURCES
G2L_Main.cpp
)
TRIBITS_ADD_EXECUTABLE(
global_2_local_ids
SOURCES ${SOURCES}
COMM serial mpi
)

View File

@ -1,266 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#ifndef KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP
#define KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP
#include <Kokkos_Core.hpp>
#include <Kokkos_UnorderedMap.hpp>
#include <vector>
#include <algorithm>
#include <iomanip>
#include <impl/Kokkos_Timer.hpp>
// This test will simulate global ids
namespace G2L {
static const unsigned begin_id_size = 256u;
static const unsigned end_id_size = 1u << 25;
static const unsigned id_step = 2u;
//use to help generate global ids
union helper
{
uint32_t word;
uint8_t byte[4];
};
//generate a unique global id from the local id
template <typename Device>
struct generate_ids
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
local_id_view local_2_global;
generate_ids( local_id_view & ids)
: local_2_global(ids)
{
Kokkos::parallel_for(local_2_global.size(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
helper x = {static_cast<uint32_t>(i)};
// shuffle the bytes of i to create a unique, semi-random global_id
x.word = ~x.word;
uint8_t tmp = x.byte[3];
x.byte[3] = x.byte[1];
x.byte[1] = tmp;
tmp = x.byte[2];
x.byte[2] = x.byte[0];
x.byte[0] = tmp;
local_2_global[i] = x.word;
}
};
// fill a map of global_id -> local_id
template <typename Device>
struct fill_map
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
global_id_view global_2_local;
local_id_view local_2_global;
fill_map( global_id_view gIds, local_id_view lIds)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_for(local_2_global.size(), *this);
}
KOKKOS_INLINE_FUNCTION
void operator()(size_type i) const
{
global_2_local.insert( local_2_global[i], i);
}
};
// check that the global id is found and that it maps to the local id
template <typename Device>
struct find_test
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view;
global_id_view global_2_local;
local_id_view local_2_global;
typedef size_t value_type;
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_reduce(local_2_global.size(), *this, num_errors);
}
KOKKOS_INLINE_FUNCTION
void init(value_type & v) const
{ v = 0; }
KOKKOS_INLINE_FUNCTION
void join(volatile value_type & dst, volatile value_type const & src) const
{ dst += src; }
KOKKOS_INLINE_FUNCTION
void operator()(size_type i, value_type & num_errors) const
{
uint32_t index = global_2_local.find( local_2_global[i] );
if ( !global_2_local.valid_at(index)
|| global_2_local.key_at(index) != local_2_global[i]
|| global_2_local.value_at(index) != i)
++num_errors;
}
};
// run test
template <typename Device>
size_t test_global_to_local_ids(unsigned num_ids, unsigned capacity, unsigned num_find_iterations)
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
double elasped_time = 0;
Kokkos::Impl::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local(capacity);
int shiftw = 15;
//create
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "allocate: " << elasped_time << std::endl;
timer.reset();
// generate unique ids
{
generate_ids<Device> gen(local_2_global);
}
// generate
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "generate: " << elasped_time << std::endl;
timer.reset();
{
fill_map<Device> fill(global_2_local, local_2_global);
}
// fill
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "fill: " << elasped_time << std::endl;
timer.reset();
size_t num_errors = global_2_local.failed_insert();
if (num_errors == 0u) {
for (unsigned i=0; i<num_find_iterations; ++i)
{
find_test<Device> find(global_2_local, local_2_global,num_errors);
}
// find
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "lookup: " << elasped_time << std::endl;
}
else {
std::cout << " !!! Fill Failed !!!" << std::endl;
}
return num_errors;
}
template <typename Device>
size_t run_test(unsigned num_ids, unsigned num_find_iterations)
{
// expect to fail
unsigned capacity = (num_ids*2u)/3u;
std::cout << " 66% of needed capacity (should fail)" << std::endl;
test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations);
//should not fail
std::cout << " 100% of needed capacity" << std::endl;
capacity = num_ids;
size_t num_errors = test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations);
//should not fail
std::cout << " 150% of needed capacity" << std::endl;
capacity = (num_ids*3u)/2u;
num_errors += test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations);
return num_errors;
}
} // namespace G2L
#endif //KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP

View File

@ -1,149 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <G2L.hpp>
namespace G2L {
size_t run_serial(unsigned num_ids, unsigned num_find_iterations)
{
#ifdef KOKKOS_HAVE_SERIAL
std::cout << "Serial" << std::endl;
return run_test<Kokkos::Serial>(num_ids,num_find_iterations);
#else
return 0;
#endif // KOKKOS_HAVE_SERIAL
}
size_t run_threads(unsigned num_ids, unsigned num_find_iterations)
{
#ifdef KOKKOS_HAVE_PTHREAD
std::cout << "Threads" << std::endl;
return run_test<Kokkos::Threads>(num_ids,num_find_iterations);
#else
return 0;
#endif
}
size_t run_openmp(unsigned num_ids, unsigned num_find_iterations)
{
#ifdef KOKKOS_HAVE_OPENMP
std::cout << "OpenMP" << std::endl;
return run_test<Kokkos::OpenMP>(num_ids,num_find_iterations);
#else
return 0;
#endif
}
size_t run_cuda(unsigned num_ids, unsigned num_find_iterations)
{
#ifdef KOKKOS_HAVE_CUDA
std::cout << "Cuda" << std::endl;
return run_test<Kokkos::Cuda>(num_ids,num_find_iterations);
#else
return 0;
#endif
}
} // namespace G2L
int main(int argc, char *argv[])
{
unsigned num_ids = 100000;
unsigned num_find_iterations = 1000;
if (argc == 3) {
num_ids = atoi(argv[1]);
num_find_iterations = atoi(argv[2]);
}
else if (argc != 1) {
std::cout << argv[0] << " num_ids num_find_iterations" << std::endl;
return 0;
}
// query the topology of the host
unsigned threads_count = 4 ;
if (Kokkos::hwloc::available()) {
threads_count = Kokkos::hwloc::get_available_numa_count() *
Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
}
std::cout << "Threads: " << threads_count << std::endl;
std::cout << "Number of ids: " << num_ids << std::endl;
std::cout << "Number of find iterations: " << num_find_iterations << std::endl;
size_t num_errors = 0;
num_errors += G2L::run_serial(num_ids,num_find_iterations);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::HostSpace::execution_space::initialize(threads_count);
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
num_errors += G2L::run_cuda(num_ids,num_find_iterations);
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
#endif
#ifdef KOKKOS_HAVE_PTHREAD
Kokkos::Threads::initialize( threads_count );
num_errors += G2L::run_threads(num_ids,num_find_iterations);
Kokkos::Threads::finalize();
#endif
#ifdef KOKKOS_HAVE_OPENMP
Kokkos::OpenMP::initialize( threads_count );
num_errors += G2L::run_openmp(num_ids,num_find_iterations);
Kokkos::OpenMP::finalize();
#endif
return num_errors;
}

View File

@ -1,53 +0,0 @@
KOKKOS_PATH ?= ../..
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
SRC_DIR := $(dir $(MAKEFILE_PATH))
SRC = $(wildcard $(SRC_DIR)/*.cpp)
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
#SRC = $(wildcard *.cpp)
#OBJ = $(SRC:%.cpp=%.o)
default: build
echo "Start Build"
# use installed Makefile.kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(NVCC_WRAPPER)
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "Cuda,OpenMP"
#KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "OpenMP"
#KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
LIB =
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean:
rm -f *.a *.o *.cuda *.host
# Compilation rules
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,14 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SOURCES "")
FILE(GLOB SOURCES *.cpp)
TRIBITS_ADD_EXECUTABLE(
grow_array
SOURCES ${SOURCES}
COMM serial mpi
)

View File

@ -1,53 +0,0 @@
KOKKOS_PATH ?= ../..
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
SRC_DIR := $(dir $(MAKEFILE_PATH))
SRC = $(wildcard $(SRC_DIR)/*.cpp)
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
#SRC = $(wildcard *.cpp)
#OBJ = $(SRC:%.cpp=%.o)
default: build
echo "Start Build"
# use installed Makefile.kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(NVCC_WRAPPER)
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "Cuda,OpenMP"
#KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "OpenMP"
#KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
LIB =
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean:
rm -f *.a *.o *.cuda *.host
# Compilation rules
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,257 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef EXAMPLE_GROW_ARRAY
#define EXAMPLE_GROW_ARRAY
#include <stdlib.h>
#include <Kokkos_Core.hpp>
#include <algorithm>
#if defined(KOKKOS_HAVE_CUDA)
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#endif
namespace Example {
//----------------------------------------------------------------------------
template< class ExecSpace >
struct SortView {
template< typename ValueType >
SortView( const Kokkos::View<ValueType*,ExecSpace> v , int begin , int end )
{
std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end );
}
};
#if defined(KOKKOS_HAVE_CUDA)
template<>
struct SortView< Kokkos::Cuda > {
template< typename ValueType >
SortView( const Kokkos::View<ValueType*,Kokkos::Cuda> v , int begin , int end )
{
thrust::sort( thrust::device_ptr<ValueType>( v.ptr_on_device() + begin )
, thrust::device_ptr<ValueType>( v.ptr_on_device() + end ) );
}
};
#endif
//----------------------------------------------------------------------------
template< class ExecSpace >
struct GrowArrayFunctor {
typedef ExecSpace execution_space ;
enum { SHIFT = sizeof(int) == 8 ? 6 : 5 }; // 8 or 4 byte int
enum { MASK = ( 1 << SHIFT ) - 1 };
const Kokkos::View<int*,ExecSpace> m_search_flags ; // bit flags for values to append
const Kokkos::View<int*,ExecSpace> m_search_array ; // array to append values
const Kokkos::View<int,ExecSpace> m_search_count ; // offset
const int m_search_total ;
const int m_search_team_chunk ;
GrowArrayFunctor( int array_length , int search_length , int print = 1 )
: m_search_flags( "flags" , ( search_length + MASK ) >> SHIFT ) // One bit per search entry
, m_search_array( "array" , array_length )
, m_search_count( "count" )
, m_search_total( search_length )
, m_search_team_chunk( 2048 )
{}
KOKKOS_INLINE_FUNCTION
bool flag_is_set( const int index ) const
{
// 64 or 32 bit integer:
const int j = index >> SHIFT ; // which integer flag
const int k = 1 << ( index & MASK ); // which bit in that integer
const int s = ( j < int(m_search_flags.dimension_0()) ) && ( 0 != ( m_search_flags(j) & k ) );
return s ;
}
typedef typename Kokkos::TeamPolicy<ExecSpace>::member_type team_member ;
KOKKOS_INLINE_FUNCTION
void operator()( const team_member & member ) const
{
enum { LOCAL_BUFFER_LENGTH = 16 };
int local_buffer[ LOCAL_BUFFER_LENGTH ] ;
int local_count = 0 ;
// Each team searches 'm_search_team_chunk' indices.
// The threads of a team must iterate together because all
// threads in the team must call 'team_scan' to prevent deadlock in the team.
int search_team_begin = member.league_rank() * m_search_team_chunk ;
const int search_team_end = search_team_begin + m_search_team_chunk ;
int k = 0 ;
while ( search_team_begin < search_team_end ) {
// This iteration searches [ search_team_begin .. search_team_begin + member.team_size() ]
const int thread_search_index = search_team_begin + member.team_rank();
// If this thread's search index is in the range
// and the flag is set, push into this thread's local buffer.
if ( thread_search_index < m_search_total && flag_is_set(thread_search_index) ) {
local_buffer[ local_count ] = thread_search_index ;
++local_count ;
}
// Move the team's search range forward
search_team_begin += member.team_size(); // Striding team by team size
// Count number of times a thread's buffer might have grown:
++k ;
// Write buffer if end of search or a thread might have filled its buffer.
if ( k == LOCAL_BUFFER_LENGTH /* A thread in my team might have filled its buffer */ ||
! ( search_team_begin < search_team_end ) /* Team is at the end of its search */ ) {
// Team's exclusive scan of threads' contributions, with global offset.
// This thread writes its buffer into [ team_offset .. team_offset + local_count )
const int team_offset = member.team_scan( local_count , & *m_search_count );
// Copy locally buffered entries into global array:
for ( int i = 0 ; i < local_count ; ++i ) {
m_search_array( team_offset + i ) = local_buffer[i] ;
}
k = 0 ;
local_count = 0 ;
}
}
}
};
template< class ExecSpace >
void grow_array( int array_length , int search_length , int print = 1 )
{
typedef GrowArrayFunctor< ExecSpace > FunctorType ;
FunctorType functor( array_length , search_length , print );
typename Kokkos::View<int,ExecSpace>::HostMirror count = Kokkos::create_mirror_view( functor.m_search_count );
typename Kokkos::View<int*,ExecSpace>::HostMirror flags = Kokkos::create_mirror_view( functor.m_search_flags );
// Set at most 'array_length' random bits over the search length.
for ( int i = 0 ; i < array_length ; ++i ) {
// 'lrand48()' generates random number between [0..2^31]
// index = ( lrand48() * search_length ) / ( 2^31 )
const long int index = ( lrand48() * search_length ) >> 31 ;
// set the bit within the flags:
flags( index >> FunctorType::SHIFT ) |= ( 1 << ( index & FunctorType::MASK ) );
}
Kokkos::deep_copy( functor.m_search_flags , flags );
// Each team works on 'functor.m_search_team_chunk' span of the search_length
Kokkos::TeamPolicy< ExecSpace >
work( /* #teams */ ( search_length + functor.m_search_team_chunk - 1 ) / functor.m_search_team_chunk
, /* threads/team */ Kokkos::TeamPolicy< ExecSpace >::team_size_max( functor ) );
// Fill array:
Kokkos::parallel_for( work , functor );
// How much was filled:
Kokkos::deep_copy( count , functor.m_search_count );
// Sort array:
SortView< ExecSpace >( functor.m_search_array , 0 , *count );
// Mirror the results:
typename Kokkos::View<int*,ExecSpace>::HostMirror results = Kokkos::create_mirror_view( functor.m_search_array );
Kokkos::deep_copy( results , functor.m_search_array );
// Verify results:
int result_error_count = 0 ;
int flags_error_count = 0 ;
for ( int i = 0 ; i < *count ; ++i ) {
const int index = results(i);
const int entry = index >> FunctorType::SHIFT ;
const int bit = 1 << ( index & FunctorType::MASK );
const bool flag = 0 != ( flags( entry ) & bit );
if ( ! flag ) {
if ( print ) std::cerr << "result( " << i << " : " << index << " )";
++result_error_count ;
}
flags( entry ) &= ~bit ; // Clear that verified bit
}
for ( int i = 0 ; i < int(flags.dimension_0()) ; ++i ) {
// If any uncleared bits then an error
if ( flags(i) ) {
if ( print ) std::cerr << "flags( " << i << " : " << flags(i) << " )" ;
++flags_error_count ;
}
}
if ( result_error_count || flags_error_count ) {
std::cerr << std::endl << "Example::GrowArrayFunctor( " << array_length
<< " , " << search_length
<< " ) result_error_count( " << result_error_count << " )"
<< " ) flags_error_count( " << flags_error_count << " )"
<< std::endl ;
}
}
} // namespace Example
//----------------------------------------------------------------------------
#endif /* #ifndef EXAMPLE_GROW_ARRAY */

View File

@ -1,110 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <sstream>
#include <Kokkos_Core.hpp>
#include <grow_array.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
int main( int argc , char ** argv )
{
int num_threads = 4 ;
int use_numa = 1 ;
int use_core = 1 ;
int length_array = 1000000 ;
int span_values = 100000000 ;
if ( Kokkos::hwloc::available() ) {
use_numa = Kokkos::hwloc::get_available_numa_count();
use_core = Kokkos::hwloc::get_available_cores_per_numa() - 1 ;
num_threads = use_numa * use_core * Kokkos::hwloc::get_available_threads_per_core();
}
#if defined( KOKKOS_HAVE_SERIAL )
{
std::cout << "Kokkos::Serial" << std::endl ;
// The Serial device accepts these arguments, though it may ignore them.
Kokkos::Serial::initialize( num_threads , use_numa , use_core );
Example::grow_array< Kokkos::Serial >( length_array , span_values );
Kokkos::Serial::finalize ();
}
#endif // defined( KOKKOS_HAVE_SERIAL )
#if defined( KOKKOS_HAVE_PTHREAD )
{
std::cout << "Kokkos::Threads" << std::endl ;
Kokkos::Threads::initialize( num_threads , use_numa , use_core );
Example::grow_array< Kokkos::Threads >( length_array , span_values );
Kokkos::Threads::finalize();
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
{
std::cout << "Kokkos::OpenMP" << std::endl ;
Kokkos::OpenMP::initialize( num_threads , use_numa , use_core );
Example::grow_array< Kokkos::OpenMP >( length_array , span_values );
Kokkos::OpenMP::finalize();
}
#endif
#if defined( KOKKOS_HAVE_CUDA )
{
std::cout << "Kokkos::Cuda" << std::endl ;
Kokkos::HostSpace::execution_space::initialize(1);
Kokkos::Cuda::initialize();
Example::grow_array< Kokkos::Cuda >( length_array , span_values );
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
#endif
return 0 ;
}

View File

@ -1,16 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SOURCES "")
SET(LIBRARIES "")
FILE(GLOB SOURCES *.cpp )
TRIBITS_ADD_EXECUTABLE(
md_skeleton
SOURCES ${SOURCES}
COMM serial mpi
DEPLIBS ${LIBRARIES}
)

View File

@ -1,53 +0,0 @@
KOKKOS_PATH ?= ../..
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
SRC_DIR := $(dir $(MAKEFILE_PATH))
SRC = $(wildcard $(SRC_DIR)/*.cpp)
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
#SRC = $(wildcard *.cpp)
#OBJ = $(SRC:%.cpp=%.o)
default: build
echo "Start Build"
# use installed Makefile.kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(NVCC_WRAPPER)
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "Cuda,OpenMP"
#KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "OpenMP"
#KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
LIB =
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean:
rm -f *.a *.o *.cuda *.host
# Compilation rules
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,3 +0,0 @@
To build this example on a 2012-model Macbook Pro with NVIDIA Kepler GPU:
./build.cuda_std g++_osx cuda_osx 30 opt

View File

@ -1,192 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/* Define values which set the max number of registers used for the Force Kernel
* Its 32 * 2048 / (KOKKOS_CUDA_MAX_THREADS * KOKKOS_CUDA_MIN_BLOCKS)
* Have to be set before including Kokkos header files.
*/
#define KOKKOS_CUDA_MAX_THREADS 512
#define KOKKOS_CUDA_MIN_BLOCKS 3
#include <system.h>
#include <cstdio>
/* Simple Lennard Jones Force Kernel using neighborlists
* Calculates for every pair of atoms (i,j) with distance smaller r_cut
* f_ij = 4*epsilon * ( (sigma/r_ij)^12 - (sigma/r_ij)^6 )
* where r_ij is the distance of atoms (i,j).
* The force on atom i is the sum over f_ij:
* f_i = sum_j (f_ij)
* Neighborlists are used in order to pre calculate which atoms j are
* close enough to i to be able to contribute. By choosing a larger neighbor
* cutoff then the force cutoff, the neighbor list can be reused several times
* (typically 10 - 100).
*/
struct ForceFunctor {
typedef t_x_array::execution_space execution_space; //Device Type for running the kernel
typedef double2 value_type; // When energy calculation is requested return energy, and virial
t_x_array_randomread x; //atom positions
t_f_array f; //atom forces
t_int_1d_const numneigh; //number of neighbors per atom
t_neighbors_const neighbors; //neighborlist
double cutforcesq; //force cutoff
double epsilon; //Potential parameter
double sigma6; //Potential parameter
ForceFunctor(System s) {
x = s.d_x;
f = s.f;
numneigh = s.numneigh;
neighbors = s.neighbors;
cutforcesq = s.force_cutsq;
epsilon = 1.0;
sigma6 = 1.0;
}
/* Operator for not calculating energy and virial */
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
force<0>(i);
}
/* Operator for calculating energy and virial */
KOKKOS_INLINE_FUNCTION
void operator() (const int &i, double2 &energy_virial) const {
double2 ev = force<1>(i);
energy_virial.x += ev.x;
energy_virial.y += ev.y;
}
template<int EVFLAG>
KOKKOS_INLINE_FUNCTION
double2 force(const int &i) const
{
const int numneighs = numneigh[i];
const double xtmp = x(i, 0);
const double ytmp = x(i, 1);
const double ztmp = x(i, 2);
double fix = 0;
double fiy = 0;
double fiz = 0;
double energy = 0;
double virial = 0;
//pragma simd forces vectorization (ignoring the performance objections of the compiler)
//give hint to compiler that fix, fiy and fiz are used for reduction only
#ifdef USE_SIMD
#pragma simd reduction (+: fix,fiy,fiz,energy,virial)
#endif
for(int k = 0; k < numneighs; k++) {
const int j = neighbors(i, k);
const double delx = xtmp - x(j, 0);
const double dely = ytmp - x(j, 1);
const double delz = ztmp - x(j, 2);
const double rsq = delx * delx + dely * dely + delz * delz;
//if(i==0) printf("%i %i %lf %lf\n",i,j,rsq,cutforcesq);
if(rsq < cutforcesq) {
const double sr2 = 1.0 / rsq;
const double sr6 = sr2 * sr2 * sr2 * sigma6;
const double force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
fix += delx * force;
fiy += dely * force;
fiz += delz * force;
if(EVFLAG) {
energy += sr6 * (sr6 - 1.0) * epsilon;
virial += delx * delx * force + dely * dely * force + delz * delz * force;
}
}
}
f(i, 0) += fix;
f(i, 1) += fiy;
f(i, 2) += fiz;
double2 energy_virial ;
energy_virial.x = 4.0 * energy ;
energy_virial.y = 0.5 * virial ;
return energy_virial;
}
/* init and join functions when doing the reduction to obtain energy and virial */
KOKKOS_FUNCTION
static void init(volatile value_type &update) {
update.x = update.y = 0;
}
KOKKOS_FUNCTION
static void join(volatile value_type &update ,
const volatile value_type &source) {
update.x += source.x ;
update.y += source.y ;
}
};
/* Calling function */
double2 force(System &s,int evflag) {
ForceFunctor f(s);
double2 ev ; ev.x = 0 ; ev.y = 0 ;
if(!evflag)
Kokkos::parallel_for(s.nlocal,f);
else
Kokkos::parallel_reduce(s.nlocal,f,ev);
execution_space::fence();
return ev;
}

View File

@ -1,205 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include "system.h"
int create_system(System &system, int nx, int ny, int nz, double rho);
int neigh_setup(System &system);
int neigh_build(System &system);
double2 force(System &system,int evflag);
/* simple MD Skeleton which
* - constructs a simple FCC lattice,
* - computes a neighborlist
* - compute LJ-Force kernel a number of times
*/
int main(int argc, char** argv) {
printf("Running MD Skeleton\n");
/* Thread numbers for Host */
int num_threads = 1;
int teams = 1;
int device = 0; // Default device for GPU runs
/* avoid unused variable warnings */
(void)num_threads;
(void)teams;
(void)device;
/* Default value for number of force calculations */
int iter = 100;
/* Default value for system size (4*nx*ny*nz atoms)
* nx, ny and nz are set to system_size if not specififed on commandline */
int system_size = 20;
int nx = -1;
int ny = -1;
int nz = -1;
int neighbor_size = 1; // Default bin size for neighbor list construction
double rho = 0.8442; // Number density of the system
double delta = 0; // Scaling factor for random offsets of atom positions
/* read in command-line arguments */
for(int i = 0; i < argc; i++) {
if((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--num_threads") == 0)) {
num_threads = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "--teams") == 0)) {
teams = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-d") == 0) || (strcmp(argv[i], "--device") == 0)) {
device = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "--delta") == 0)) {
delta = atof(argv[++i]);
continue;
}
if((strcmp(argv[i], "-i") == 0) || (strcmp(argv[i], "--iter") == 0)) {
iter = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-rho") == 0)) {
rho = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--size") == 0)) {
system_size = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nx") == 0)) {
nx = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-ny") == 0)) {
ny = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-nz") == 0)) {
nz = atoi(argv[++i]);
continue;
}
if((strcmp(argv[i], "-b") == 0) || (strcmp(argv[i], "--neigh_bins") == 0)) {
neighbor_size = atoi(argv[++i]);
continue;
}
}
if( nx < 0 ) nx = system_size;
if( ny < 0 ) ny = system_size;
if( nz < 0 ) nz = system_size;
printf("-> Init Device\n");
#if defined( KOKKOS_HAVE_CUDA )
Kokkos::HostSpace::execution_space::initialize(teams*num_threads);
Kokkos::Cuda::SelectDevice select_device(device);
Kokkos::Cuda::initialize(select_device);
#elif defined( KOKKOS_HAVE_OPENMP )
Kokkos::OpenMP::initialize(teams*num_threads);
#elif defined( KOKKOS_HAVE_PTHREAD )
Kokkos::Threads::initialize(teams*num_threads);
#endif
System system;
system.neigh_cut = 2.8;
system.force_cut = 2.5;
system.force_cutsq = system.force_cut*system.force_cut;
system.delta = delta;
printf("-> Build system\n");
create_system(system,nx,ny,nz,rho);
printf("-> Created %i atoms and %i ghost atoms\n",system.nlocal,system.nghost);
system.nbinx = system.box.xprd/neighbor_size+1;
system.nbiny = system.box.yprd/neighbor_size+1;
system.nbinz = system.box.zprd/neighbor_size+1;
printf("-> Building Neighborlist\n");
neigh_setup(system);
neigh_build(system);
double2 ev = force(system,1);
printf("-> Calculate Energy: %f Virial: %f\n",ev.x,ev.y);
printf("-> Running %i force calculations\n",iter);
Kokkos::Impl::Timer timer;
for(int i=0;i<iter;i++) {
force(system,0);
}
double time = timer.seconds();
printf("Time: %e s for %i iterations with %i atoms\n",time,iter,system.nlocal);
execution_space::finalize();
}

View File

@ -1,430 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <system.h>
#include <cstdio>
#include <Kokkos_Core.hpp>
#define SMALL 1.0e-6
#define FACTOR 0.999
/* BinningFunctor puts atoms into bins of the simulation box
* Neighborlists are then created by checking only distances of atoms
* in adjacent bins. That makes neighborlist construction a O(N) operation.
*/
struct BinningFunctor {
typedef t_int_2d::execution_space execution_space;
System s;
int atoms_per_bin;
BinningFunctor(System _s): s(_s) {
atoms_per_bin = s.bins.dimension_1();
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const
{
const int ibin = coord2bin(s.d_x(i, 0), s.d_x(i, 1), s.d_x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&s.bincount[ibin], 1);
if(ac < atoms_per_bin) {
s.bins(ibin, ac) = i;
} else if(s.d_resize(0) < ac) {
s.d_resize(0) = ac;
}
}
KOKKOS_INLINE_FUNCTION
int coord2bin(double x, double y, double z) const
{
int ix, iy, iz;
if(x >= s.box.xprd)
ix = (int)((x - s.box.xprd) * s.bininvx) + s.nbinx - s.mbinxlo;
else if(x >= 0.0)
ix = (int)(x * s.bininvx) - s.mbinxlo;
else
ix = (int)(x * s.bininvx) - s.mbinxlo - 1;
if(y >= s.box.yprd)
iy = (int)((y - s.box.yprd) * s.bininvy) + s.nbiny - s.mbinylo;
else if(y >= 0.0)
iy = (int)(y * s.bininvy) - s.mbinylo;
else
iy = (int)(y * s.bininvy) - s.mbinylo - 1;
if(z >= s.box.zprd)
iz = (int)((z - s.box.zprd) * s.bininvz) + s.nbinz - s.mbinzlo;
else if(z >= 0.0)
iz = (int)(z * s.bininvz) - s.mbinzlo;
else
iz = (int)(z * s.bininvz) - s.mbinzlo - 1;
return (iz * s.mbiny * s.mbinx + iy * s.mbinx + ix + 1);
}
};
/* Build the actual neighborlist*/
struct BuildFunctor {
typedef t_int_2d::execution_space execution_space;
System s;
int maxneighs;
BuildFunctor(System _s): s(_s) {
maxneighs = s.neighbors.dimension_1();
}
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const
{
int n = 0;
const t_int_1d_const_um bincount_c = s.bincount;
const double xtmp = s.d_x(i, 0);
const double ytmp = s.d_x(i, 1);
const double ztmp = s.d_x(i, 2);
const int ibin = coord2bin(xtmp, ytmp, ztmp);
// loop over all bins in neighborhood (includes ibin)
for(int k = 0; k < s.nstencil; k++) {
const int jbin = ibin + s.d_stencil[k];
// get subview of jbin
const t_int_1d_const_um loc_bin =
Kokkos::subview(s.bins,jbin,Kokkos::ALL());
if(ibin == jbin)
for(int m = 0; m < bincount_c[jbin]; m++) {
const int j = loc_bin[m];
//for same bin as atom i skip j if i==j
if (j == i) continue;
const double delx = xtmp - s.d_x(j, 0);
const double dely = ytmp - s.d_x(j, 1);
const double delz = ztmp - s.d_x(j, 2);
const double rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= s.neigh_cutsq && n<maxneighs) s.neighbors(i,n++) = j;
}
else {
for(int m = 0; m < bincount_c[jbin]; m++) {
const int j = loc_bin[m];
const double delx = xtmp - s.d_x(j, 0);
const double dely = ytmp - s.d_x(j, 1);
const double delz = ztmp - s.d_x(j, 2);
const double rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= s.neigh_cutsq && n<maxneighs) s.neighbors(i,n++) = j;
}
}
}
s.numneigh[i] = n;
if(n >= maxneighs) {
if(n >= s.d_resize(0)) s.d_resize(0) = n;
}
}
KOKKOS_INLINE_FUNCTION
int coord2bin(double x, double y, double z) const
{
int ix, iy, iz;
if(x >= s.box.xprd)
ix = (int)((x - s.box.xprd) * s.bininvx) + s.nbinx - s.mbinxlo;
else if(x >= 0.0)
ix = (int)(x * s.bininvx) - s.mbinxlo;
else
ix = (int)(x * s.bininvx) - s.mbinxlo - 1;
if(y >= s.box.yprd)
iy = (int)((y - s.box.yprd) * s.bininvy) + s.nbiny - s.mbinylo;
else if(y >= 0.0)
iy = (int)(y * s.bininvy) - s.mbinylo;
else
iy = (int)(y * s.bininvy) - s.mbinylo - 1;
if(z >= s.box.zprd)
iz = (int)((z - s.box.zprd) * s.bininvz) + s.nbinz - s.mbinzlo;
else if(z >= 0.0)
iz = (int)(z * s.bininvz) - s.mbinzlo;
else
iz = (int)(z * s.bininvz) - s.mbinzlo - 1;
return (iz * s.mbiny * s.mbinx + iy * s.mbinx + ix + 1);
}
};
/* Reset an array to zero */
struct MemsetZeroFunctor {
typedef t_x_array::execution_space execution_space ;
void* ptr;
KOKKOS_INLINE_FUNCTION void operator()(const int i) const {
((int*)ptr)[i] = 0;
}
};
/* Calculate distance of two bins */
double bindist(System &s, int i, int j, int k)
{
double delx, dely, delz;
if(i > 0)
delx = (i - 1) * s.binsizex;
else if(i == 0)
delx = 0.0;
else
delx = (i + 1) * s.binsizex;
if(j > 0)
dely = (j - 1) * s.binsizey;
else if(j == 0)
dely = 0.0;
else
dely = (j + 1) * s.binsizey;
if(k > 0)
delz = (k - 1) * s.binsizez;
else if(k == 0)
delz = 0.0;
else
delz = (k + 1) * s.binsizez;
return (delx * delx + dely * dely + delz * delz);
}
/* Setup the neighborlist construction
* Determine binsizes, a stencil for defining adjacency, etc.
*/
void neigh_setup(System &s) {
s.neigh_cutsq = s.neigh_cut * s.neigh_cut;
/*
c bins must evenly divide into box size,
c becoming larger than cutneigh if necessary
c binsize = 1/2 of cutoff is near optimal
if (flag == 0) {
nbinx = 2.0 * xprd / cutneigh;
nbiny = 2.0 * yprd / cutneigh;
nbinz = 2.0 * zprd / cutneigh;
if (nbinx == 0) nbinx = 1;
if (nbiny == 0) nbiny = 1;
if (nbinz == 0) nbinz = 1;
}
*/
s.binsizex = s.box.xprd / s.nbinx;
s.binsizey = s.box.yprd / s.nbiny;
s.binsizez = s.box.zprd / s.nbinz;
s.bininvx = 1.0 / s.binsizex;
s.bininvy = 1.0 / s.binsizey;
s.bininvz = 1.0 / s.binsizez;
double coord = s.box.xlo - s.neigh_cut - SMALL * s.box.xprd;
s.mbinxlo = static_cast<int>(coord * s.bininvx);
if(coord < 0.0) s.mbinxlo = s.mbinxlo - 1;
coord = s.box.xhi + s.neigh_cut + SMALL * s.box.xprd;
int mbinxhi = static_cast<int>(coord * s.bininvx);
coord = s.box.ylo - s.neigh_cut - SMALL * s.box.yprd;
s.mbinylo = static_cast<int>(coord * s.bininvy);
if(coord < 0.0) s.mbinylo = s.mbinylo - 1;
coord = s.box.yhi + s.neigh_cut + SMALL * s.box.yprd;
int mbinyhi = static_cast<int>(coord * s.bininvy);
coord = s.box.zlo - s.neigh_cut - SMALL * s.box.zprd;
s.mbinzlo = static_cast<int>(coord * s.bininvz);
if(coord < 0.0) s.mbinzlo = s.mbinzlo - 1;
coord = s.box.zhi + s.neigh_cut + SMALL * s.box.zprd;
int mbinzhi = static_cast<int>(coord * s.bininvz);
/* extend bins by 1 in each direction to insure stencil coverage */
s.mbinxlo = s.mbinxlo - 1;
mbinxhi = mbinxhi + 1;
s.mbinx = mbinxhi - s.mbinxlo + 1;
s.mbinylo = s.mbinylo - 1;
mbinyhi = mbinyhi + 1;
s.mbiny = mbinyhi - s.mbinylo + 1;
s.mbinzlo = s.mbinzlo - 1;
mbinzhi = mbinzhi + 1;
s.mbinz = mbinzhi - s.mbinzlo + 1;
/*
compute bin stencil of all bins whose closest corner to central bin
is within neighbor cutoff
for partial Newton (newton = 0),
stencil is all surrounding bins including self
for full Newton (newton = 1),
stencil is bins to the "upper right" of central bin, does NOT include self
next(xyz) = how far the stencil could possibly extend
factor < 1.0 for special case of LJ benchmark so code will create
correct-size stencil when there are 3 bins for every 5 lattice spacings
*/
int nextx = static_cast<int>(s.neigh_cut * s.bininvx);
if(nextx * s.binsizex < FACTOR * s.neigh_cut) nextx++;
int nexty = static_cast<int>(s.neigh_cut * s.bininvy);
if(nexty * s.binsizey < FACTOR * s.neigh_cut) nexty++;
int nextz = static_cast<int>(s.neigh_cut * s.bininvz);
if(nextz * s.binsizez < FACTOR * s.neigh_cut) nextz++;
int nmax = (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1);
s.d_stencil = t_int_1d("stencil", nmax);
s.h_stencil = Kokkos::create_mirror_view(s.d_stencil);
s.nstencil = 0;
int kstart = -nextz;
for(int k = kstart; k <= nextz; k++) {
for(int j = -nexty; j <= nexty; j++) {
for(int i = -nextx; i <= nextx; i++) {
if(bindist(s,i, j, k) < s.neigh_cutsq) {
s.h_stencil(s.nstencil++) = k * s.mbiny * s.mbinx + j * s.mbinx + i;
}
}
}
}
/* Allocate neighbor arrays */
Kokkos::deep_copy(s.d_stencil, s.h_stencil);
s.mbins = s.mbinx * s.mbiny * s.mbinz;
s.bincount = t_int_1d("bincount", s.mbins);
s.bins = t_int_2d("bins", s.mbins, 8);
s.neighbors = t_neighbors("neighbors",s.natoms,80);
s.numneigh = t_int_1d("numneigh",s.natoms);
s.d_resize = t_int_scalar("resize");
s.h_resize = Kokkos::create_mirror_view(s.d_resize);
}
/* Build the neighborlist
* This is a try and rerun algorithm for handling the case where the bins array
* and the neighbors array are not big enough. So if one is too small, it will
* reallocate and rerun the binnind algorithm or the neighborlist construction.
*/
void neigh_build(System &s) {
/* Binning of atoms */
s.h_resize(0) = 1;
while(s.h_resize(0) > 0) {
s.h_resize(0) = 0;
Kokkos::deep_copy(s.d_resize, s.h_resize);
MemsetZeroFunctor f_zero;
f_zero.ptr = (void*) s.bincount.ptr_on_device();
Kokkos::parallel_for(s.mbins, f_zero);
execution_space::fence();
BinningFunctor f(s);
Kokkos::parallel_for(s.natoms, f);
execution_space::fence();
/* Check if bins was large enough, if nor reallocated and rerun */
deep_copy(s.h_resize, s.d_resize);
if(s.h_resize(0)) {
int atoms_per_bin = s.h_resize(0)+2;
s.bins = t_int_2d("bins", s.mbins, atoms_per_bin);
}
}
/* Neighborlist construction */
s.h_resize(0) = 1;
while(s.h_resize(0)) {
s.h_resize(0) = 0;
Kokkos::deep_copy(s.d_resize, s.h_resize);
BuildFunctor f(s);
Kokkos::parallel_for(s.nlocal, f);
execution_space::fence();
/* Check if neighbors was large enough, if nor reallocated and rerun */
deep_copy(s.h_resize, s.d_resize);
if(s.h_resize(0)) {
int maxneighs = s.h_resize(0) * 1.2;
s.neighbors = t_neighbors("neighbors", s.natoms, maxneighs);
}
}
}

View File

@ -1,271 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <system.h>
#include <cmath>
#include <cstdio>
#include <cstdlib>
/* initialize atoms on fcc lattice in parallel fashion */
#define MAX(a,b) (a>b?a:b)
#define MIN(a,b) (a<b?a:b)
int create_system(System &system, int nx, int ny, int nz, double rho)
{
/* Box Setup */
double lattice = pow((4.0 / rho), (1.0 / 3.0));
system.box.xprd = nx * lattice;
system.box.yprd = ny * lattice;
system.box.zprd = nz * lattice;
system.box.xlo = 0;
system.box.ylo = 0;
system.box.zlo = 0;
system.box.xhi = system.box.xprd;
system.box.yhi = system.box.yprd;
system.box.zhi = system.box.zprd;
int ghost_dist = int(system.neigh_cut/lattice) + 1;
/* total # of atoms */
system.nlocal = 4 * nx * ny * nz;
system.nghost = 4 * (nx + 2 * ghost_dist) *
(ny + 2 * ghost_dist) *
(nz + 2 * ghost_dist) -
system.nlocal;
system.natoms = system.nlocal + system.nghost;
system.d_x = t_x_array("X",system.natoms);
system.h_x = Kokkos::create_mirror_view(system.d_x);
system.f = t_f_array("F",system.natoms);
/* determine loop bounds of lattice subsection that overlaps my sub-box
insure loop bounds do not exceed nx,ny,nz */
double alat = pow((4.0 / rho), (1.0 / 3.0));
int ilo = static_cast<int>(system.box.xlo / (0.5 * alat) - 1);
int ihi = static_cast<int>(system.box.xhi / (0.5 * alat) + 1);
int jlo = static_cast<int>(system.box.ylo / (0.5 * alat) - 1);
int jhi = static_cast<int>(system.box.yhi / (0.5 * alat) + 1);
int klo = static_cast<int>(system.box.zlo / (0.5 * alat) - 1);
int khi = static_cast<int>(system.box.zhi / (0.5 * alat) + 1);
ilo = MAX(ilo, 0);
ihi = MIN(ihi, 2 * nx - 1);
jlo = MAX(jlo, 0);
jhi = MIN(jhi, 2 * ny - 1);
klo = MAX(klo, 0);
khi = MIN(khi, 2 * nz - 1);
/* generates positions of atoms on fcc sublattice*/
srand(3718273);
/* create non-ghost atoms */
{
double xtmp, ytmp, ztmp;
int sx = 0;
int sy = 0;
int sz = 0;
int ox = 0;
int oy = 0;
int oz = 0;
int subboxdim = 8;
int n = 0;
int iflag = 0;
while(oz * subboxdim <= khi) {
const int k = oz * subboxdim + sz;
const int j = oy * subboxdim + sy;
const int i = ox * subboxdim + sx;
if(iflag) continue;
if(((i + j + k) % 2 == 0) &&
(i >= ilo) && (i <= ihi) &&
(j >= jlo) && (j <= jhi) &&
(k >= klo) && (k <= khi)) {
const int nold = n;
while(nold == n) {
xtmp = 0.5 * alat * i + system.delta/1000*(rand()%1000-500);
ytmp = 0.5 * alat * j + system.delta/1000*(rand()%1000-500);
ztmp = 0.5 * alat * k + system.delta/1000*(rand()%1000-500);
if(xtmp >= system.box.xlo && xtmp < system.box.xhi &&
ytmp >= system.box.ylo && ytmp < system.box.yhi &&
ztmp >= system.box.zlo && ztmp < system.box.zhi) {
system.h_x(n,0) = xtmp;
system.h_x(n,1) = ytmp;
system.h_x(n,2) = ztmp;
n++;
}
}
}
sx++;
if(sx == subboxdim) {
sx = 0;
sy++;
}
if(sy == subboxdim) {
sy = 0;
sz++;
}
if(sz == subboxdim) {
sz = 0;
ox++;
}
if(ox * subboxdim > ihi) {
ox = 0;
oy++;
}
if(oy * subboxdim > jhi) {
oy = 0;
oz++;
}
}
/* check that correct # of atoms were created */
if(system.nlocal != n) {
printf("Created incorrect # of atoms\n");
return 1;
}
}
/* create ghost atoms */
{
double xtmp, ytmp, ztmp;
int ilo_g = ilo - 2 * ghost_dist;
int jlo_g = jlo - 2 * ghost_dist;
int klo_g = klo - 2 * ghost_dist;
int ihi_g = ihi + 2 * ghost_dist;
int jhi_g = jhi + 2 * ghost_dist;
int khi_g = khi + 2 * ghost_dist;
int subboxdim = 8;
int sx = 0;
int sy = 0;
int sz = 0;
int ox = subboxdim * ilo_g;
int oy = subboxdim * jlo_g;
int oz = subboxdim * klo_g;
int n = system.nlocal;
int iflag = 0;
while(oz * subboxdim <= khi_g) {
const int k = oz * subboxdim + sz;
const int j = oy * subboxdim + sy;
const int i = ox * subboxdim + sx;
if(iflag) continue;
if(((i + j + k) % 2 == 0) &&
(i >= ilo_g) && (i <= ihi_g) &&
(j >= jlo_g) && (j <= jhi_g) &&
(k >= klo_g) && (k <= khi_g) &&
((i < ilo) || (i > ihi) ||
(j < jlo) || (j > jhi) ||
(k < klo) || (k > khi))
) {
xtmp = 0.5 * alat * i;
ytmp = 0.5 * alat * j;
ztmp = 0.5 * alat * k;
system.h_x(n,0) = xtmp + system.delta/1000*(rand()%1000-500);;
system.h_x(n,1) = ytmp + system.delta/1000*(rand()%1000-500);;
system.h_x(n,2) = ztmp + system.delta/1000*(rand()%1000-500);;
n++;
}
sx++;
if(sx == subboxdim) {
sx = 0;
sy++;
}
if(sy == subboxdim) {
sy = 0;
sz++;
}
if(sz == subboxdim) {
sz = 0;
ox++;
//printf("%i %i %i // %i %i %i\n",ox,oy,oz,i,j,k);
}
if(ox * subboxdim > ihi_g) {
ox = subboxdim * ilo_g;
oy++;
}
if(oy * subboxdim > jhi_g) {
oy = subboxdim * jlo_g;
oz++;
}
}
}
Kokkos::deep_copy(system.d_x,system.h_x);
return 0;
}

View File

@ -1,92 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef SYSTEM_H_
#define SYSTEM_H_
#include <types.h>
struct Box {
double xprd, yprd, zprd;
double xlo, xhi;
double ylo, yhi;
double zlo, zhi;
};
struct System {
Box box;
int natoms;
int nlocal;
int nghost;
t_x_array d_x;
t_x_array_host h_x;
t_f_array f;
t_neighbors neighbors;
t_int_1d numneigh;
double delta;
double neigh_cut,neigh_cutsq;
int mbins;
int nbinx,nbiny,nbinz;
int mbinx,mbiny,mbinz;
int mbinxlo,mbinylo,mbinzlo;
double binsizex,binsizey,binsizez;
double bininvx,bininvy,bininvz;
t_int_1d bincount;
t_int_2d bins;
t_int_scalar d_resize;
t_int_scalar_host h_resize;
t_int_1d d_stencil;
t_int_1d_host h_stencil;
int nstencil;
double force_cut,force_cutsq;
};
#endif

View File

@ -1,118 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef TYPES_H_
#define TYPES_H_
/* Determine default device type and necessary includes */
#include <Kokkos_Core.hpp>
typedef Kokkos::DefaultExecutionSpace execution_space ;
#if ! defined( KOKKOS_HAVE_CUDA )
struct double2 {
double x, y;
KOKKOS_INLINE_FUNCTION
double2(double xinit, double yinit) {
x = xinit;
y = yinit;
}
KOKKOS_INLINE_FUNCTION
double2() {
x = 0.0;
y = 0.0;
}
KOKKOS_INLINE_FUNCTION
double2& operator += (const double2& src) {
x+=src.x;
y+=src.y;
return *this;
}
KOKKOS_INLINE_FUNCTION
volatile double2& operator += (const volatile double2& src) volatile {
x+=src.x;
y+=src.y;
return *this;
}
};
#endif
#include <impl/Kokkos_Timer.hpp>
/* Define types used throughout the code */
//Position arrays
typedef Kokkos::View<double*[3], Kokkos::LayoutRight, execution_space> t_x_array ;
typedef t_x_array::HostMirror t_x_array_host ;
typedef Kokkos::View<const double*[3], Kokkos::LayoutRight, execution_space> t_x_array_const ;
typedef Kokkos::View<const double*[3], Kokkos::LayoutRight, execution_space, Kokkos::MemoryRandomAccess > t_x_array_randomread ;
//Force array
typedef Kokkos::View<double*[3], execution_space> t_f_array ;
//Neighborlist
typedef Kokkos::View<int**, execution_space > t_neighbors ;
typedef Kokkos::View<const int**, execution_space > t_neighbors_const ;
typedef Kokkos::View<int*, execution_space, Kokkos::MemoryUnmanaged > t_neighbors_sub ;
typedef Kokkos::View<const int*, execution_space, Kokkos::MemoryUnmanaged > t_neighbors_const_sub ;
//1d int array
typedef Kokkos::View<int*, execution_space > t_int_1d ;
typedef t_int_1d::HostMirror t_int_1d_host ;
typedef Kokkos::View<const int*, execution_space > t_int_1d_const ;
typedef Kokkos::View<int*, execution_space , Kokkos::MemoryUnmanaged> t_int_1d_um ;
typedef Kokkos::View<const int* , execution_space , Kokkos::MemoryUnmanaged> t_int_1d_const_um ;
//2d int array
typedef Kokkos::View<int**, Kokkos::LayoutRight, execution_space > t_int_2d ;
typedef t_int_2d::HostMirror t_int_2d_host ;
//Scalar ints
typedef Kokkos::View<int[1], Kokkos::LayoutLeft, execution_space> t_int_scalar ;
typedef t_int_scalar::HostMirror t_int_scalar_host ;
#endif /* TYPES_H_ */

View File

@ -1,610 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_BOXMESHFIXTURE_HPP
#define KOKKOS_BOXMESHFIXTURE_HPP
#include <cmath>
#include <stdexcept>
#include <sstream>
#include <Kokkos_Core.hpp>
#include <BoxMeshPartition.hpp>
#include <FEMesh.hpp>
#include <HexElement.hpp>
//----------------------------------------------------------------------------
struct FixtureElementHex8 {
static const unsigned element_node_count = 8 ;
HybridFEM::HexElement_TensorData< element_node_count > elem_data ;
BoxBoundsLinear box_bounds ;
FixtureElementHex8() : elem_data(), box_bounds() {}
static void create_node_boxes_from_vertex_boxes(
const BoxType & vertex_box_global ,
const std::vector< BoxType > & vertex_box_parts ,
BoxType & node_box_global ,
std::vector< BoxType > & node_box_parts )
{
node_box_global = vertex_box_global ;
node_box_parts = vertex_box_parts ;
}
void elem_to_node( const unsigned node_local , unsigned coord[] ) const
{
coord[0] += elem_data.eval_map[ node_local ][0] ;
coord[1] += elem_data.eval_map[ node_local ][1] ;
coord[2] += elem_data.eval_map[ node_local ][2] ;
}
};
struct FixtureElementHex27 {
static const unsigned element_node_count = 27 ;
HybridFEM::HexElement_TensorData< element_node_count > elem_data ;
BoxBoundsQuadratic box_bounds ;
FixtureElementHex27() : elem_data(), box_bounds() {}
static void create_node_boxes_from_vertex_boxes(
const BoxType & vertex_box_global ,
const std::vector< BoxType > & vertex_box_parts ,
BoxType & node_box_global ,
std::vector< BoxType > & node_box_parts )
{
node_box_global = vertex_box_global ;
node_box_parts = vertex_box_parts ;
node_box_global[0][1] = 2 * node_box_global[0][1] - 1 ;
node_box_global[1][1] = 2 * node_box_global[1][1] - 1 ;
node_box_global[2][1] = 2 * node_box_global[2][1] - 1 ;
for ( unsigned i = 0 ; i < vertex_box_parts.size() ; ++i ) {
node_box_parts[i][0][0] = 2 * node_box_parts[i][0][0] ;
node_box_parts[i][1][0] = 2 * node_box_parts[i][1][0] ;
node_box_parts[i][2][0] = 2 * node_box_parts[i][2][0] ;
node_box_parts[i][0][1] =
std::min( node_box_global[0][1] , 2 * node_box_parts[i][0][1] );
node_box_parts[i][1][1] =
std::min( node_box_global[1][1] , 2 * node_box_parts[i][1][1] );
node_box_parts[i][2][1] =
std::min( node_box_global[2][1] , 2 * node_box_parts[i][2][1] );
}
}
void elem_to_node( const unsigned node_local , unsigned coord[] ) const
{
coord[0] = 2 * coord[0] + elem_data.eval_map[ node_local ][0] ;
coord[1] = 2 * coord[1] + elem_data.eval_map[ node_local ][1] ;
coord[2] = 2 * coord[2] + elem_data.eval_map[ node_local ][2] ;
}
};
//----------------------------------------------------------------------------
template< typename Scalar , class Device , class ElementSpec >
struct BoxMeshFixture {
typedef Scalar coordinate_scalar_type ;
typedef Device execution_space ;
static const unsigned element_node_count = ElementSpec::element_node_count ;
typedef HybridFEM::FEMesh< coordinate_scalar_type ,
element_node_count ,
execution_space > FEMeshType ;
typedef typename FEMeshType::node_coords_type node_coords_type ;
typedef typename FEMeshType::elem_node_ids_type elem_node_ids_type ;
typedef typename FEMeshType::node_elem_ids_type node_elem_ids_type ;
static void verify(
const typename FEMeshType::node_coords_type::HostMirror & node_coords ,
const typename FEMeshType::elem_node_ids_type::HostMirror & elem_node_ids ,
const typename FEMeshType::node_elem_ids_type::HostMirror & node_elem_ids )
{
typedef typename FEMeshType::size_type size_type ;
//typedef typename node_coords_type::value_type coords_type ; // unused
const size_type node_count_total = node_coords.dimension_0();
const size_type elem_count_total = elem_node_ids.dimension_0();
const ElementSpec element ;
for ( size_type node_index = 0 ;
node_index < node_count_total ; ++node_index ) {
for ( size_type
j = node_elem_ids.row_map[ node_index ] ;
j < node_elem_ids.row_map[ node_index + 1 ] ; ++j ) {
const size_type elem_index = node_elem_ids.entries(j,0);
const size_type node_local = node_elem_ids.entries(j,1);
const size_type en_id = elem_node_ids(elem_index,node_local);
if ( node_index != en_id ) {
std::ostringstream msg ;
msg << "BoxMeshFixture node_elem_ids error"
<< " : node_index(" << node_index
<< ") entry(" << j
<< ") elem_index(" << elem_index
<< ") node_local(" << node_local
<< ") elem_node_id(" << en_id
<< ")" ;
throw std::runtime_error( msg.str() );
}
}
}
for ( size_type elem_index = 0 ;
elem_index < elem_count_total; ++elem_index ) {
coordinate_scalar_type elem_node_coord[ element_node_count ][3] ;
for ( size_type nn = 0 ; nn < element_node_count ; ++nn ) {
const size_type node_index = elem_node_ids( elem_index , nn );
for ( size_type nc = 0 ; nc < 3 ; ++nc ) {
elem_node_coord[nn][nc] = node_coords( node_index , nc );
}
}
for ( size_type nn = 0 ; nn < element_node_count ; ++nn ) {
const unsigned ix = element.elem_data.eval_map[nn][0] ;
const unsigned iy = element.elem_data.eval_map[nn][1] ;
const unsigned iz = element.elem_data.eval_map[nn][2] ;
if ( elem_node_coord[nn][0] != elem_node_coord[0][0] + ix ||
elem_node_coord[nn][1] != elem_node_coord[0][1] + iy ||
elem_node_coord[nn][2] != elem_node_coord[0][2] + iz ) {
std::ostringstream msg ;
msg << "BoxMeshFixture elem_node_coord mapping failure { "
<< elem_node_coord[nn][0] << " "
<< elem_node_coord[nn][1] << " "
<< elem_node_coord[nn][2] << " } != { "
<< elem_node_coord[ 0][0] + ix << " "
<< elem_node_coord[ 0][1] + iy << " "
<< elem_node_coord[ 0][2] + iz
<< " }" ;
throw std::runtime_error( msg.str() );
}
}
}
}
//------------------------------------
// Initialize element-node connectivity:
// Order elements that only depend on owned nodes first.
// These elements could be computed while waiting for
// received node data.
static void layout_elements_interior_exterior(
const BoxType vertex_box_local_used ,
const BoxType vertex_box_local_owned ,
const BoxType node_box_local_used ,
const std::vector<size_t> & node_used_id_map ,
const ElementSpec element_fixture ,
const size_t elem_count_interior ,
const typename elem_node_ids_type::HostMirror elem_node_ids )
{
size_t elem_index_interior = 0 ;
size_t elem_index_boundary = elem_count_interior ;
for ( size_t iz = vertex_box_local_used[2][0] ;
iz < vertex_box_local_used[2][1] - 1 ; ++iz ) {
for ( size_t iy = vertex_box_local_used[1][0] ;
iy < vertex_box_local_used[1][1] - 1 ; ++iy ) {
for ( size_t ix = vertex_box_local_used[0][0] ;
ix < vertex_box_local_used[0][1] - 1 ; ++ix ) {
size_t elem_index ;
// If lower and upper vertices are owned then element is interior
if ( contain( vertex_box_local_owned, ix, iy, iz ) &&
contain( vertex_box_local_owned, ix+1, iy+1, iz+1 ) ) {
elem_index = elem_index_interior++ ;
}
else {
elem_index = elem_index_boundary++ ;
}
for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) {
unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) };
element_fixture.elem_to_node( nn , coord );
const size_t node_local_id =
box_map_id( node_box_local_used ,
node_used_id_map ,
coord[0] , coord[1] , coord[2] );
elem_node_ids( elem_index , nn ) = node_local_id ;
}
}}}
}
//------------------------------------
// Nested partitioning of elements by number of thread 'gangs'
static void layout_elements_partitioned(
const BoxType vertex_box_local_used ,
const BoxType /*vertex_box_local_owned*/ ,
const BoxType node_box_local_used ,
const std::vector<size_t> & node_used_id_map ,
const ElementSpec element_fixture ,
const size_t thread_gang_count ,
const typename elem_node_ids_type::HostMirror elem_node_ids )
{
std::vector< BoxType > element_box_gangs( thread_gang_count );
BoxType element_box_local_used = vertex_box_local_used ;
element_box_local_used[0][1] -= 1 ;
element_box_local_used[1][1] -= 1 ;
element_box_local_used[2][1] -= 1 ;
box_partition_rcb( element_box_local_used , element_box_gangs );
size_t elem_index = 0 ;
for ( size_t ig = 0 ; ig < thread_gang_count ; ++ig ) {
const BoxType box = element_box_gangs[ig] ;
for ( size_t iz = box[2][0] ; iz < box[2][1] ; ++iz ) {
for ( size_t iy = box[1][0] ; iy < box[1][1] ; ++iy ) {
for ( size_t ix = box[0][0] ; ix < box[0][1] ; ++ix , ++elem_index ) {
for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) {
unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) };
element_fixture.elem_to_node( nn , coord );
const size_t node_local_id =
box_map_id( node_box_local_used ,
node_used_id_map ,
coord[0] , coord[1] , coord[2] );
elem_node_ids( elem_index , nn ) = node_local_id ;
}
}}}
}
}
//------------------------------------
static FEMeshType create( const size_t proc_count ,
const size_t proc_local ,
const size_t gang_count ,
const size_t elems_x ,
const size_t elems_y ,
const size_t elems_z ,
const double x_coord_curve = 1 ,
const double y_coord_curve = 1 ,
const double z_coord_curve = 1 )
{
const size_t vertices_x = elems_x + 1 ;
const size_t vertices_y = elems_y + 1 ;
const size_t vertices_z = elems_z + 1 ;
const BoxBoundsLinear vertex_box_bounds ;
const ElementSpec element ;
// Partition based upon vertices:
BoxType vertex_box_global ;
std::vector< BoxType > vertex_box_parts( proc_count );
vertex_box_global[0][0] = 0 ; vertex_box_global[0][1] = vertices_x ;
vertex_box_global[1][0] = 0 ; vertex_box_global[1][1] = vertices_y ;
vertex_box_global[2][0] = 0 ; vertex_box_global[2][1] = vertices_z ;
box_partition_rcb( vertex_box_global , vertex_box_parts );
const BoxType vertex_box_local_owned = vertex_box_parts[ proc_local ];
// Determine interior and used vertices:
BoxType vertex_box_local_interior ;
BoxType vertex_box_local_used ;
vertex_box_bounds.apply( vertex_box_global ,
vertex_box_local_owned ,
vertex_box_local_interior ,
vertex_box_local_used );
// Element counts:
const long local_elems_x =
( vertex_box_local_used[0][1] - vertex_box_local_used[0][0] ) - 1 ;
const long local_elems_y =
( vertex_box_local_used[1][1] - vertex_box_local_used[1][0] ) - 1 ;
const long local_elems_z =
( vertex_box_local_used[2][1] - vertex_box_local_used[2][0] ) - 1 ;
const size_t elem_count_total = std::max( long(0) , local_elems_x ) *
std::max( long(0) , local_elems_y ) *
std::max( long(0) , local_elems_z );
const long interior_elems_x =
( vertex_box_local_owned[0][1] - vertex_box_local_owned[0][0] ) - 1 ;
const long interior_elems_y =
( vertex_box_local_owned[1][1] - vertex_box_local_owned[1][0] ) - 1 ;
const long interior_elems_z =
( vertex_box_local_owned[2][1] - vertex_box_local_owned[2][0] ) - 1 ;
const size_t elem_count_interior = std::max( long(0) , interior_elems_x ) *
std::max( long(0) , interior_elems_y ) *
std::max( long(0) , interior_elems_z );
// Expand vertex boxes to node boxes:
BoxType node_box_global ;
BoxType node_box_local_used ;
std::vector< BoxType > node_box_parts ;
element.create_node_boxes_from_vertex_boxes(
vertex_box_global , vertex_box_parts ,
node_box_global , node_box_parts );
// Node communication maps:
size_t node_count_interior = 0 ;
size_t node_count_owned = 0 ;
size_t node_count_total = 0 ;
std::vector<size_t> node_used_id_map ;
std::vector<size_t> node_part_counts ;
std::vector< std::vector<size_t> > node_send_map ;
box_partition_maps( node_box_global ,
node_box_parts ,
element.box_bounds ,
proc_local ,
node_box_local_used ,
node_used_id_map ,
node_count_interior ,
node_count_owned ,
node_count_total ,
node_part_counts ,
node_send_map );
size_t node_count_send = 0 ;
for ( size_t i = 0 ; i < node_send_map.size() ; ++i ) {
node_count_send += node_send_map[i].size();
}
size_t recv_msg_count = 0 ;
size_t send_msg_count = 0 ;
size_t send_count = 0 ;
for ( size_t i = 1 ; i < proc_count ; ++i ) {
if ( node_part_counts[i] ) ++recv_msg_count ;
if ( node_send_map[i].size() ) {
++send_msg_count ;
send_count += node_send_map[i].size();
}
}
// Finite element mesh:
FEMeshType mesh ;
if ( node_count_total ) {
mesh.node_coords = node_coords_type( "node_coords", node_count_total );
}
if ( elem_count_total ) {
mesh.elem_node_ids =
elem_node_ids_type( "elem_node_ids", elem_count_total );
}
mesh.parallel_data_map.assign( node_count_interior ,
node_count_owned ,
node_count_total ,
recv_msg_count ,
send_msg_count ,
send_count );
typename node_coords_type::HostMirror node_coords =
Kokkos::create_mirror( mesh.node_coords );
typename elem_node_ids_type::HostMirror elem_node_ids =
Kokkos::create_mirror( mesh.elem_node_ids );
//------------------------------------
// set node coordinates to grid location for subsequent verification
for ( size_t iz = node_box_local_used[2][0] ;
iz < node_box_local_used[2][1] ; ++iz ) {
for ( size_t iy = node_box_local_used[1][0] ;
iy < node_box_local_used[1][1] ; ++iy ) {
for ( size_t ix = node_box_local_used[0][0] ;
ix < node_box_local_used[0][1] ; ++ix ) {
const size_t node_local_id =
box_map_id( node_box_local_used , node_used_id_map , ix , iy , iz );
node_coords( node_local_id , 0 ) = ix ;
node_coords( node_local_id , 1 ) = iy ;
node_coords( node_local_id , 2 ) = iz ;
}}}
//------------------------------------
// Initialize element-node connectivity:
if ( 1 < gang_count ) {
layout_elements_partitioned( vertex_box_local_used ,
vertex_box_local_owned ,
node_box_local_used ,
node_used_id_map ,
element ,
gang_count ,
elem_node_ids );
}
else {
layout_elements_interior_exterior( vertex_box_local_used ,
vertex_box_local_owned ,
node_box_local_used ,
node_used_id_map ,
element ,
elem_count_interior ,
elem_node_ids );
}
//------------------------------------
// Populate node->element connectivity:
std::vector<size_t> node_elem_work( node_count_total , (size_t) 0 );
for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
for ( size_t n = 0 ; n < element_node_count ; ++n ) {
++node_elem_work[ elem_node_ids(i,n) ];
}
}
mesh.node_elem_ids =
Kokkos::create_staticcrsgraph< node_elem_ids_type >( "node_elem_ids" , node_elem_work );
typename node_elem_ids_type::HostMirror
node_elem_ids = Kokkos::create_mirror( mesh.node_elem_ids );
for ( size_t i = 0 ; i < node_count_total ; ++i ) {
node_elem_work[i] = node_elem_ids.row_map[i];
}
// Looping in element order insures the list of elements
// is sorted by element index.
for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
for ( size_t n = 0 ; n < element_node_count ; ++n ) {
const unsigned nid = elem_node_ids(i, n);
const unsigned j = node_elem_work[nid] ; ++node_elem_work[nid] ;
node_elem_ids.entries( j , 0 ) = i ;
node_elem_ids.entries( j , 1 ) = n ;
}
}
//------------------------------------
// Verify setup with node coordinates matching grid indices.
verify( node_coords , elem_node_ids , node_elem_ids );
//------------------------------------
// Scale node coordinates to problem extent with
// nonlinear mapping.
{
const double problem_extent[3] =
{ static_cast<double>( vertex_box_global[0][1] - 1 ) ,
static_cast<double>( vertex_box_global[1][1] - 1 ) ,
static_cast<double>( vertex_box_global[2][1] - 1 ) };
const double grid_extent[3] =
{ static_cast<double>( node_box_global[0][1] - 1 ) ,
static_cast<double>( node_box_global[1][1] - 1 ) ,
static_cast<double>( node_box_global[2][1] - 1 ) };
for ( size_t i = 0 ; i < node_count_total ; ++i ) {
const double x_unit = node_coords(i,0) / grid_extent[0] ;
const double y_unit = node_coords(i,1) / grid_extent[1] ;
const double z_unit = node_coords(i,2) / grid_extent[2] ;
node_coords(i,0) = coordinate_scalar_type( problem_extent[0] * std::pow( x_unit , x_coord_curve ) );
node_coords(i,1) = coordinate_scalar_type( problem_extent[1] * std::pow( y_unit , y_coord_curve ) );
node_coords(i,2) = coordinate_scalar_type( problem_extent[2] * std::pow( z_unit , z_coord_curve ) );
}
}
Kokkos::deep_copy( mesh.node_coords , node_coords );
Kokkos::deep_copy( mesh.elem_node_ids , elem_node_ids );
Kokkos::deep_copy( mesh.node_elem_ids.entries , node_elem_ids.entries );
//------------------------------------
// Communication lists:
{
recv_msg_count = 0 ;
send_msg_count = 0 ;
send_count = 0 ;
for ( size_t i = 1 ; i < proc_count ; ++i ) {
// Order sending starting with the local processor rank
// to try to smooth out the amount of messages simultaneously
// send to a particular processor.
const int proc = ( proc_local + i ) % proc_count ;
if ( node_part_counts[i] ) {
mesh.parallel_data_map.host_recv(recv_msg_count,0) = proc ;
mesh.parallel_data_map.host_recv(recv_msg_count,1) = node_part_counts[i] ;
++recv_msg_count ;
}
if ( node_send_map[i].size() ) {
mesh.parallel_data_map.host_send(send_msg_count,0) = proc ;
mesh.parallel_data_map.host_send(send_msg_count,1) = node_send_map[i].size() ;
for ( size_t j = 0 ; j < node_send_map[i].size() ; ++j , ++send_count ) {
mesh.parallel_data_map.host_send_item(send_count) = node_send_map[i][j] - node_count_interior ;
}
++send_msg_count ;
}
}
}
return mesh ;
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_BOXMESHFIXTURE_HPP */

View File

@ -1,381 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <limits>
#include <BoxMeshPartition.hpp>
//----------------------------------------------------------------------------
namespace {
void box_partition( size_t ip , size_t up ,
const BoxType & box ,
BoxType * const p_box )
{
const size_t np = up - ip ;
if ( 1 == np ) {
p_box[ip] = box ;
}
else {
// Choose axis with largest count:
const size_t n0 = box[0][1] - box[0][0] ;
const size_t n1 = box[1][1] - box[1][0] ;
const size_t n2 = box[2][1] - box[2][0] ;
const size_t axis = n2 > n1 ? ( n2 > n0 ? 2 : ( n1 > n0 ? 1 : 0 ) ) :
( n1 > n0 ? 1 : 0 );
const size_t n = box[ axis ][1] - box[ axis ][0] ;
if ( 0 == np % 3 ) {
const size_t np_part = np / 3 ; // exact
const size_t nbox_low = (size_t)(( (double) n ) * ( 1.0 / 3.0 ));
const size_t nbox_mid = (size_t)(( (double) n ) * ( 2.0 / 3.0 ));
BoxType dbox_low = box ; // P = [ip,ip+np/3)
BoxType dbox_mid = box ; // P = [ip+np/3,ip+2*np/3)
BoxType dbox_upp = box ; // P = [ip+2*np/3,ip+np)
dbox_low[ axis ][1] = box[ axis ][0] + nbox_low ;
dbox_mid[ axis ][1] = box[ axis ][0] + nbox_mid ;
dbox_mid[ axis ][0] = dbox_low[ axis ][1];
dbox_upp[ axis ][0] = dbox_mid[ axis ][1];
box_partition( ip, ip + np_part, dbox_low , p_box );
box_partition( ip+ np_part, ip + 2*np_part, dbox_mid , p_box );
box_partition( ip+2*np_part, up, dbox_upp , p_box );
}
else {
const size_t np_low = np / 2 ; /* Rounded down */
const size_t nbox_low = (size_t)
(((double)n) * ( ((double) np_low ) / ((double) np ) ));
BoxType dbox_low = box ;
BoxType dbox_upp = box ;
dbox_low[ axis ][1] = dbox_low[ axis ][0] + nbox_low ;
dbox_upp[ axis ][0] = dbox_low[ axis ][1];
box_partition( ip, ip + np_low, dbox_low , p_box );
box_partition( ip + np_low, up, dbox_upp , p_box );
}
}
}
size_t box_map_offset( const BoxType & local_use ,
const size_t global_i ,
const size_t global_j ,
const size_t global_k )
{
const size_t max = std::numeric_limits<size_t>::max();
const size_t n[3] =
{ local_use[0][1] - local_use[0][0] ,
local_use[1][1] - local_use[1][0] ,
local_use[2][1] - local_use[2][0] };
const size_t use[3] = {
( global_i >= local_use[0][0] ? global_i - local_use[0][0] : max ) ,
( global_j >= local_use[1][0] ? global_j - local_use[1][0] : max ) ,
( global_k >= local_use[2][0] ? global_k - local_use[2][0] : max ) };
const size_t offset =
( use[0] < n[0] && use[1] < n[1] && use[2] < n[2] ) ?
( use[0] + n[0] * ( use[1] + n[1] * use[2] ) ) : max ;
if ( offset == max ) {
std::ostringstream msg ;
msg << "box_map_offset ERROR: "
<< " use " << local_use
<< " ( " << global_i
<< " , " << global_j
<< " , " << global_k
<< " )" ;
throw std::runtime_error( msg.str() );
}
return offset ;
}
} // namespace
//----------------------------------------------------------------------------
void BoxBoundsLinear::apply( const BoxType & box_global ,
const BoxType & box_part ,
BoxType & box_interior ,
BoxType & box_use ) const
{
const unsigned ghost = 1 ;
if ( 0 == count( box_part ) ) {
box_interior = box_part ;
box_use = box_part ;
}
else {
for ( size_t i = 0 ; i < 3 ; ++i ) {
box_interior[i][0] =
( box_part[i][0] == box_global[i][0] ) ? box_part[i][0] : (
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][0] + ghost :
box_part[i][1] );
box_interior[i][1] =
( box_part[i][1] == box_global[i][1] ) ? box_part[i][1] : (
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][1] - ghost :
box_part[i][0] );
box_use[i][0] =
( box_part[i][0] > ghost + box_global[i][0] ) ? box_part[i][0] - ghost :
box_global[i][0] ;
box_use[i][1] =
( box_part[i][1] + ghost < box_global[i][1] ) ? box_part[i][1] + ghost :
box_global[i][1] ;
}
}
}
void BoxBoundsQuadratic::apply( const BoxType & box_global ,
const BoxType & box_part ,
BoxType & box_interior ,
BoxType & box_use ) const
{
if ( 0 == count( box_part ) ) {
box_interior = box_part ;
box_use = box_part ;
}
else {
for ( size_t i = 0 ; i < 3 ; ++i ) {
const bool odd = ( box_part[i][0] - box_global[i][0] ) & 01 ;
const unsigned ghost = odd ? 1 : 2 ;
box_interior[i][0] =
( box_part[i][0] == box_global[i][0] ) ? box_part[i][0] : (
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][0] + ghost :
box_part[i][1] );
box_interior[i][1] =
( box_part[i][1] == box_global[i][1] ) ? box_part[i][1] : (
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][1] - ghost :
box_part[i][0] );
box_use[i][0] =
( box_part[i][0] > ghost + box_global[i][0] ) ? box_part[i][0] - ghost :
box_global[i][0] ;
box_use[i][1] =
( box_part[i][1] + ghost < box_global[i][1] ) ? box_part[i][1] + ghost :
box_global[i][1] ;
}
}
}
//----------------------------------------------------------------------------
void box_partition_rcb( const BoxType & root_box ,
std::vector<BoxType> & part_boxes )
{
const BoxBoundsLinear use_boxes ;
const size_t part_count = part_boxes.size();
box_partition( 0 , part_count , root_box , & part_boxes[0] );
// Verify partitioning
size_t total_cell = 0 ;
for ( size_t i = 0 ; i < part_count ; ++i ) {
total_cell += count( part_boxes[i] );
BoxType box_interior , box_use ;
use_boxes.apply( root_box , part_boxes[i] , box_interior , box_use );
if ( count( box_use ) < count( part_boxes[i] ) ||
count( part_boxes[i] ) < count( box_interior ) ||
part_boxes[i] != intersect( part_boxes[i] , box_use ) ||
box_interior != intersect( part_boxes[i] , box_interior )) {
std::ostringstream msg ;
msg << "box_partition_rcb ERROR : "
<< "part_boxes[" << i << "] = "
<< part_boxes[i]
<< " use " << box_use
<< " interior " << box_interior
<< std::endl
<< " part ^ use " << intersect( part_boxes[i] , box_use )
<< " part ^ interior " << intersect( part_boxes[i] , box_interior );
throw std::runtime_error( msg.str() );
}
for ( size_t j = i + 1 ; j < part_count ; ++j ) {
const BoxType tmp = intersect( part_boxes[i] , part_boxes[j] );
if ( count( tmp ) ) {
throw std::runtime_error( std::string("box partition intersection") );
}
}
}
if ( total_cell != count( root_box ) ) {
throw std::runtime_error( std::string("box partition count") );
}
}
//----------------------------------------------------------------------------
size_t box_map_id( const BoxType & local_use ,
const std::vector<size_t> & local_use_id_map ,
const size_t global_i ,
const size_t global_j ,
const size_t global_k )
{
const size_t offset =
box_map_offset( local_use , global_i , global_j , global_k );
return local_use_id_map[ offset ];
}
//----------------------------------------------------------------------------
void box_partition_maps( const BoxType & root_box ,
const std::vector<BoxType> & part_boxes ,
const BoxBounds & use_boxes ,
const size_t my_part ,
BoxType & my_use_box ,
std::vector<size_t> & my_use_id_map ,
size_t & my_count_interior ,
size_t & my_count_owned ,
size_t & my_count_uses ,
std::vector<size_t> & my_part_counts ,
std::vector<std::vector<size_t> > & my_send_map )
{
const size_t np = part_boxes.size();
if ( np <= my_part ) {
std::ostringstream msg ;
msg << "box_partition_maps ERROR : "
<< " np(" << np << ") <= my_part(" << my_part << ")" ;
throw std::runtime_error( msg.str() );
}
const BoxType my_owned_box = part_boxes[my_part];
BoxType my_interior_box ;
use_boxes.apply( root_box, my_owned_box, my_interior_box, my_use_box );
my_count_interior = count( my_interior_box );
my_count_owned = count( my_owned_box );
my_count_uses = count( my_use_box );
my_use_id_map.assign( my_count_uses , std::numeric_limits<size_t>::max() );
// Order ids as { owned-interior , owned-parallel , received_{(p+i)%np} }
size_t offset_interior = 0 ;
size_t offset_parallel = my_count_interior ;
for ( size_t iz = my_owned_box[2][0] ; iz < my_owned_box[2][1] ; ++iz ) {
for ( size_t iy = my_owned_box[1][0] ; iy < my_owned_box[1][1] ; ++iy ) {
for ( size_t ix = my_owned_box[0][0] ; ix < my_owned_box[0][1] ; ++ix ) {
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
if ( contain( my_interior_box , ix , iy , iz ) ) {
my_use_id_map[ offset ] = offset_interior++ ;
}
else {
my_use_id_map[ offset ] = offset_parallel++ ;
}
}}}
my_part_counts.assign( np , (size_t) 0 );
my_send_map.assign( np , std::vector<size_t>() );
my_part_counts[0] = my_count_owned ;
for ( size_t i = 1 ; i < np ; ++i ) {
const size_t ip = ( my_part + i ) % np ;
const BoxType p_owned_box = part_boxes[ip];
BoxType p_use_box , p_interior_box ;
use_boxes.apply( root_box, p_owned_box, p_interior_box, p_use_box );
const BoxType recv_box = intersect( my_use_box , p_owned_box );
const BoxType send_box = intersect( my_owned_box , p_use_box );
if ( 0 != ( my_part_counts[i] = count( recv_box ) ) ) {
for ( size_t iz = recv_box[2][0] ; iz < recv_box[2][1] ; ++iz ) {
for ( size_t iy = recv_box[1][0] ; iy < recv_box[1][1] ; ++iy ) {
for ( size_t ix = recv_box[0][0] ; ix < recv_box[0][1] ; ++ix ) {
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
my_use_id_map[ offset ] = offset_parallel++ ;
}}}
}
if ( 0 != count( send_box ) ) {
for ( size_t iz = send_box[2][0] ; iz < send_box[2][1] ; ++iz ) {
for ( size_t iy = send_box[1][0] ; iy < send_box[1][1] ; ++iy ) {
for ( size_t ix = send_box[0][0] ; ix < send_box[0][1] ; ++ix ) {
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
my_send_map[ i ].push_back( my_use_id_map[ offset ] );
}}}
}
}
}

View File

@ -1,210 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef BOXMESHPARTITION_HPP
#define BOXMESHPARTITION_HPP
#include <cstddef>
#include <utility>
#include <vector>
#include <iostream>
//----------------------------------------------------------------------------
struct BoxType {
size_t data[3][2] ;
typedef size_t range_type[2] ;
inline
const range_type & operator[]( size_t i ) const { return data[i]; }
inline
range_type & operator[]( size_t i ) { return data[i]; }
inline
bool operator == ( const BoxType & rhs ) const
{
return data[0][0] == rhs.data[0][0] && data[0][1] == rhs.data[0][1] &&
data[1][0] == rhs.data[1][0] && data[1][1] == rhs.data[2][1] &&
data[2][0] == rhs.data[2][0] && data[2][1] == rhs.data[2][1] ;
}
inline
bool operator != ( const BoxType & rhs ) const
{
return data[0][0] != rhs.data[0][0] || data[0][1] != rhs.data[0][1] ||
data[1][0] != rhs.data[1][0] || data[1][1] != rhs.data[1][1] ||
data[2][0] != rhs.data[2][0] || data[2][1] != rhs.data[2][1] ;
}
};
inline
size_t count( const BoxType & b )
{
size_t n = 1 ;
for ( size_t i = 0 ; i < 3 ; ++i ) {
n *= b[i][1] > b[i][0] ? b[i][1] - b[i][0] : 0 ;
}
return n ;
}
inline
bool contain( const BoxType & b , size_t i , size_t j , size_t k )
{
return b[0][0] <= i && i < b[0][1] &&
b[1][0] <= j && j < b[1][1] &&
b[2][0] <= k && k < b[2][1] ;
}
inline
BoxType intersect( const BoxType & x , const BoxType & y )
{
BoxType z ;
for ( size_t i = 0 ; i < 3 ; ++i ) {
z[i][0] = std::max( x[i][0] , y[i][0] );
z[i][1] = std::min( x[i][1] , y[i][1] );
}
return z ;
}
inline
std::ostream & operator << ( std::ostream & s , const BoxType & box )
{
s << "{ "
<< box[0][0] << " " << box[0][1] << " , "
<< box[1][0] << " " << box[1][1] << " , "
<< box[2][0] << " " << box[2][1] << " }" ;
return s ;
}
//----------------------------------------------------------------------------
class BoxBounds {
public:
/** \brief Default bounds to one layer of ghosting */
virtual
void apply( const BoxType & box_global ,
const BoxType & box_part ,
BoxType & box_interior ,
BoxType & box_use ) const = 0 ;
virtual ~BoxBounds() {}
BoxBounds() {}
};
class BoxBoundsLinear : public BoxBounds
{
public:
/** \brief Default bounds to one layer of ghosting */
virtual
void apply( const BoxType & box_global ,
const BoxType & box_part ,
BoxType & box_interior ,
BoxType & box_use ) const ;
virtual ~BoxBoundsLinear() {}
BoxBoundsLinear() {}
};
class BoxBoundsQuadratic : public BoxBounds {
public:
/** \brief Quadratic mesh: even ordinates have two layers,
* odd ordinates have one layer.
*/
virtual
void apply( const BoxType & box_global ,
const BoxType & box_part ,
BoxType & box_interior ,
BoxType & box_use ) const ;
virtual ~BoxBoundsQuadratic() {}
BoxBoundsQuadratic() {}
};
//----------------------------------------------------------------------------
/* Partition box into part_boxes.size() sub-boxes */
void box_partition_rcb( const BoxType & root_box ,
std::vector<BoxType> & part_boxes );
//----------------------------------------------------------------------------
/* Determine local id layout and communication maps for partitioned boxes.
*
* Local ids are layed out as follows:
* { [ owned-interior ids not sent ] ,
* [ owned-boundary ids to be sent to other processes ] ,
* [ received ids from processor ( my_part + 1 ) % part_count ]
* [ received ids from processor ( my_part + 2 ) % part_count ]
* [ received ids from processor ( my_part + 3 ) % part_count ]
* ... };
*
* This layout allows
* (1) received data to be copied into a contiguous block of memory
* (2) send data to be extracted from a contiguous block of memory.
*/
void box_partition_maps(
const BoxType & root_box , // [in] Global box
const std::vector<BoxType> & part_boxes , // [in] Partitioned boxes
const BoxBounds & use_boxes , // [in] Ghost boundaries
const size_t my_part , // [in] My local part
BoxType & my_use_box , // [out] My used box with ghost
std::vector<size_t> & my_use_id_map , // [out] Local ordering map
size_t & my_count_interior , // [out] How many interior
size_t & my_count_owned , // [out] How many owned
size_t & my_count_uses , // [out] How may used
std::vector<size_t> & my_part_counts , // [out] Partitioning of my_use_id_map
std::vector<std::vector<size_t> > & my_send_map ); // [out] Send id map
/* Mapping of cartesian coordinate to local id */
size_t box_map_id( const BoxType & my_use_box ,
const std::vector<size_t> & my_use_id_map ,
const size_t global_i ,
const size_t global_j ,
const size_t global_k );
//----------------------------------------------------------------------------
#endif /* #ifndef BOXMESHPARTITION_HPP */

View File

@ -1,16 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SOURCES "")
FILE(GLOB SOURCES *.cpp)
SET(LIBRARIES kokkoscore)
TRIBITS_ADD_EXECUTABLE(
multi_fem
SOURCES ${SOURCES}
COMM serial mpi
)

View File

@ -1,452 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef EXPLICIT_DRIVER_HPP
#define EXPLICIT_DRIVER_HPP
#include <sys/time.h>
#include <iostream>
#include <iomanip>
#include <cstdlib>
#include <cmath>
#include <impl/Kokkos_Timer.hpp>
#include <ExplicitFunctors.hpp>
//----------------------------------------------------------------------------
namespace Explicit {
struct PerformanceData {
double mesh_time ;
double init_time ;
double internal_force_time ;
double central_diff ;
double comm_time ;
size_t number_of_steps ;
PerformanceData()
: mesh_time(0)
, init_time(0)
, internal_force_time(0)
, central_diff(0)
, comm_time(0)
, number_of_steps(0)
{}
void best( const PerformanceData & rhs )
{
if ( rhs.mesh_time < mesh_time ) mesh_time = rhs.mesh_time ;
if ( rhs.init_time < init_time ) init_time = rhs.init_time ;
if ( rhs.internal_force_time < internal_force_time ) internal_force_time = rhs.internal_force_time ;
if ( rhs.central_diff < central_diff ) central_diff = rhs.central_diff ;
if ( rhs.comm_time < comm_time ) comm_time = rhs.comm_time ;
}
};
template< typename Scalar , class FixtureType >
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
const int global_max_x ,
const int global_max_y ,
const int global_max_z ,
const int steps ,
const int print_sample )
{
typedef Scalar scalar_type ;
typedef FixtureType fixture_type ;
typedef typename fixture_type::execution_space execution_space ;
//typedef typename fixture_type::FEMeshType mesh_type ; // unused
enum { ElementNodeCount = fixture_type::element_node_count };
const int NumStates = 2;
const int total_num_steps = steps ;
const Scalar user_dt = 5.0e-6;
//const Scalar end_time = 0.0050;
// element block parameters
const Scalar lin_bulk_visc = 0.0;
const Scalar quad_bulk_visc = 0.0;
// const Scalar lin_bulk_visc = 0.06;
// const Scalar quad_bulk_visc = 1.2;
// const Scalar hg_stiffness = 0.0;
// const Scalar hg_viscosity = 0.0;
// const Scalar hg_stiffness = 0.03;
// const Scalar hg_viscosity = 0.001;
// material properties
const Scalar youngs_modulus=1.0e6;
const Scalar poissons_ratio=0.0;
const Scalar density = 8.0e-4;
const comm::Machine machine = mesh.parallel_data_map.machine ;
PerformanceData perf_data ;
Kokkos::Impl::Timer wall_clock ;
//------------------------------------
// Generate fields
typedef Fields< scalar_type , execution_space > fields_type ;
fields_type mesh_fields( mesh ,
lin_bulk_visc ,
quad_bulk_visc ,
youngs_modulus ,
poissons_ratio ,
density );
typename fields_type::node_coords_type::HostMirror
model_coords_h = Kokkos::create_mirror( mesh_fields.model_coords );
typename fields_type::geom_state_array_type::HostMirror
displacement_h = Kokkos::create_mirror( mesh_fields.displacement );
typename fields_type::geom_state_array_type::HostMirror
velocity_h = Kokkos::create_mirror( mesh_fields.velocity );
Kokkos::deep_copy( model_coords_h , mesh_fields.model_coords );
//------------------------------------
// Initialization
initialize_element<Scalar,execution_space>::apply( mesh_fields );
initialize_node< Scalar,execution_space>::apply( mesh_fields );
const Scalar x_bc = global_max_x ;
// Initial condition on velocity to initiate a pulse along the X axis
{
const unsigned X = 0;
for (int inode = 0; inode< mesh_fields.num_nodes; ++inode) {
if ( model_coords_h(inode,X) == 0) {
velocity_h(inode,X,0) = 1.0e3;
velocity_h(inode,X,1) = 1.0e3;
}
}
}
Kokkos::deep_copy( mesh_fields.velocity , velocity_h );
//--------------------------------------------------------------------------
// We will call a sequence of functions. These functions have been
// grouped into several functors to balance the number of global memory
// accesses versus requiring too many registers or too much L1 cache.
// Global memory accees have read/write cost and memory subsystem contention cost.
//--------------------------------------------------------------------------
perf_data.init_time = comm::max( machine , wall_clock.seconds() );
// Parameters required for the internal force computations.
int current_state = 0;
int previous_state = 0;
int next_state = 0;
perf_data.number_of_steps = total_num_steps ;
#if defined( KOKKOS_HAVE_MPI )
typedef typename
fields_type::geom_state_array_type::value_type comm_value_type ;
const unsigned comm_value_count = 6 ;
Kokkos::AsyncExchange< comm_value_type , execution_space ,
Kokkos::ParallelDataMap >
comm_exchange( mesh.parallel_data_map , comm_value_count );
#endif
for (int step = 0; step < total_num_steps; ++step) {
wall_clock.reset();
//------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_MPI )
{
// Communicate "send" nodes' displacement and velocity next_state
// to the ghosted nodes.
// buffer packages: { { dx , dy , dz , vx , vy , vz }_node }
pack_state< Scalar , execution_space >
::apply( comm_exchange.buffer() ,
mesh.parallel_data_map.count_interior ,
mesh.parallel_data_map.count_send ,
mesh_fields , next_state );
comm_exchange.setup();
comm_exchange.send_receive();
unpack_state< Scalar , execution_space >
::apply( mesh_fields , next_state ,
comm_exchange.buffer() ,
mesh.parallel_data_map.count_owned ,
mesh.parallel_data_map.count_receive );
execution_space::fence();
}
#endif
perf_data.comm_time += comm::max( machine , wall_clock.seconds() );
//------------------------------------------------------------------------
// rotate the states
previous_state = current_state;
current_state = next_state;
++next_state;
next_state %= NumStates;
wall_clock.reset();
// First kernel 'grad_hgop' combines two functions:
// gradient, velocity gradient
grad< Scalar , execution_space >::apply( mesh_fields ,
current_state ,
previous_state );
// Combine tensor decomposition and rotation functions.
decomp_rotate< Scalar , execution_space >::apply( mesh_fields ,
current_state ,
previous_state );
internal_force< Scalar , execution_space >::apply( mesh_fields ,
user_dt ,
current_state );
execution_space::fence();
perf_data.internal_force_time +=
comm::max( machine , wall_clock.seconds() );
wall_clock.reset();
// Assembly of elements' contributions to nodal force into
// a nodal force vector. Update the accelerations, velocities,
// displacements.
// The same pattern can be used for matrix-free residual computations.
nodal_step< Scalar , execution_space >::apply( mesh_fields ,
x_bc ,
current_state,
next_state );
execution_space::fence();
perf_data.central_diff +=
comm::max( machine , wall_clock.seconds() );
if ( print_sample && 0 == step % 100 ) {
Kokkos::deep_copy( displacement_h , mesh_fields.displacement );
Kokkos::deep_copy( velocity_h , mesh_fields.velocity );
if ( 1 == print_sample ) {
std::cout << "step " << step
<< " : displacement(*,0,0) =" ;
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
if ( model_coords_h(i,1) == 0 && model_coords_h(i,2) == 0 ) {
std::cout << " " << displacement_h(i,0,next_state);
}
}
std::cout << std::endl ;
const float tol = 1.0e-6 ;
const int yb = global_max_y ;
const int zb = global_max_z ;
std::cout << "step " << step
<< " : displacement(*," << yb << "," << zb << ") =" ;
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
if ( fabs( model_coords_h(i,1) - yb ) < tol &&
fabs( model_coords_h(i,2) - zb ) < tol ) {
std::cout << " " << displacement_h(i,0,next_state);
}
}
std::cout << std::endl ;
}
else if ( 2 == print_sample ) {
const float tol = 1.0e-6 ;
const int xb = global_max_x / 2 ;
const int yb = global_max_y / 2 ;
const int zb = global_max_z / 2 ;
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
if ( fabs( model_coords_h(i,0) - xb ) < tol &&
fabs( model_coords_h(i,1) - yb ) < tol &&
fabs( model_coords_h(i,2) - zb ) < tol ) {
std::cout << "step " << step
<< " : displacement("
<< xb << "," << yb << "," << zb << ") = {"
<< std::setprecision(6)
<< " " << displacement_h(i,0,next_state)
<< std::setprecision(2)
<< " " << displacement_h(i,1,next_state)
<< std::setprecision(2)
<< " " << displacement_h(i,2,next_state)
<< " }" << std::endl ;
}
}
}
}
}
return perf_data ;
}
template <typename Scalar, typename Device>
static void driver( const char * const label ,
comm::Machine machine ,
const int gang_count ,
const int elem_count_beg ,
const int elem_count_end ,
const int runs )
{
typedef Scalar scalar_type ;
typedef Device execution_space ;
typedef double coordinate_scalar_type ;
typedef FixtureElementHex8 fixture_element_type ;
typedef BoxMeshFixture< coordinate_scalar_type ,
execution_space ,
fixture_element_type > fixture_type ;
typedef typename fixture_type::FEMeshType mesh_type ;
const size_t proc_count = comm::size( machine );
const size_t proc_rank = comm::rank( machine );
const int space = 15 ;
const int steps = 1000 ;
const int print_sample = 0 ;
if ( comm::rank( machine ) == 0 ) {
std::cout << std::endl ;
std::cout << "\"MiniExplicitDynamics with Kokkos " << label
<< " time_steps(" << steps << ")"
<< "\"" << std::endl;
std::cout << std::left << std::setw(space) << "\"Element\" , ";
std::cout << std::left << std::setw(space) << "\"Node\" , ";
std::cout << std::left << std::setw(space) << "\"Initialize\" , ";
std::cout << std::left << std::setw(space) << "\"ElemForce\" , ";
std::cout << std::left << std::setw(space) << "\"NodeUpdate\" , ";
std::cout << std::left << std::setw(space) << "\"NodeComm\" , ";
std::cout << std::left << std::setw(space) << "\"Time/Elem\" , ";
std::cout << std::left << std::setw(space) << "\"Time/Node\"";
std::cout << std::endl;
std::cout << std::left << std::setw(space) << "\"count\" , ";
std::cout << std::left << std::setw(space) << "\"count\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
std::cout << std::left << std::setw(space) << "\"microsec\"";
std::cout << std::endl;
}
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
{
const int iz = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
const int iy = iz + 1 ;
const int ix = 2 * iy ;
const int nelem = ix * iy * iz ;
const int nnode = ( ix + 1 ) * ( iy + 1 ) * ( iz + 1 );
mesh_type mesh =
fixture_type::create( proc_count , proc_rank , gang_count ,
ix , iy , iz );
mesh.parallel_data_map.machine = machine ;
PerformanceData perf , best ;
for(int j = 0; j < runs; j++){
perf = run<scalar_type,fixture_type>(mesh,ix,iy,iz,steps,print_sample);
if( j == 0 ) {
best = perf ;
}
else {
best.best( perf );
}
}
if ( comm::rank( machine ) == 0 ) {
double time_per_element =
( best.internal_force_time ) / ( nelem * perf.number_of_steps );
double time_per_node =
( best.comm_time + best.central_diff ) / ( nnode * perf.number_of_steps );
std::cout << std::setw(space-3) << nelem << " , "
<< std::setw(space-3) << nnode << " , "
<< std::setw(space-3) << best.number_of_steps << " , "
<< std::setw(space-3) << best.init_time * 1000000 << " , "
<< std::setw(space-3)
<< ( best.internal_force_time * 1000000 ) / best.number_of_steps << " , "
<< std::setw(space-3)
<< ( best.central_diff * 1000000 ) / best.number_of_steps << " , "
<< std::setw(space-3)
<< ( best.comm_time * 1000000 ) / best.number_of_steps << " , "
<< std::setw(space-3) << time_per_element * 1000000 << " , "
<< std::setw(space-3) << time_per_node * 1000000
<< std::endl ;
}
}
}
} // namespace Explicit
#endif /* #ifndef EXPLICIT_DRIVER_HPP */

File diff suppressed because it is too large Load Diff

View File

@ -1,86 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_FEMESH_HPP
#define KOKKOS_FEMESH_HPP
#include <utility>
#include <limits>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <Kokkos_Core.hpp>
#include <Kokkos_StaticCrsGraph.hpp>
#include <ParallelComm.hpp>
#include <ParallelDataMap.hpp>
namespace HybridFEM {
//----------------------------------------------------------------------------
/** \brief Finite element mesh fixture for hybrid parallel performance tests.
*/
template< typename CoordScalarType , unsigned ElemNodeCount , class Device >
struct FEMesh {
typedef typename Device::size_type size_type ;
static const size_type element_node_count = ElemNodeCount ;
typedef Kokkos::View< CoordScalarType*[3] , Device > node_coords_type ;
typedef Kokkos::View< size_type*[ElemNodeCount], Device > elem_node_ids_type ;
typedef Kokkos::StaticCrsGraph< size_type[2] , Device > node_elem_ids_type ;
node_coords_type node_coords ;
elem_node_ids_type elem_node_ids ;
node_elem_ids_type node_elem_ids ;
Kokkos::ParallelDataMap parallel_data_map ;
};
//----------------------------------------------------------------------------
} /* namespace HybridFEM */
#endif /* #ifndef KOKKOS_FEMESH_HPP */

View File

@ -1,268 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef ELEMENTHEX_HPP
#define ELEMENTHEX_HPP
namespace HybridFEM {
template< unsigned NodeCount >
class HexElement_TensorData ;
template< unsigned NodeCount , class Device >
class HexElement_TensorEval ;
//----------------------------------------------------------------------------
/** \brief Evaluate Hex element on interval [-1,1]^3 */
template<>
class HexElement_TensorData< 8 > {
public:
static const unsigned element_node_count = 8 ;
static const unsigned spatial_dimension = 3 ;
static const unsigned integration_count_1d = 2 ;
static const unsigned function_count_1d = 2 ;
float values_1d [ function_count_1d ][ integration_count_1d ];
float derivs_1d [ function_count_1d ][ integration_count_1d ];
float weights_1d[ integration_count_1d ];
unsigned char eval_map[ element_node_count ][4] ;
static float eval_value_1d( const unsigned jf , const float x )
{
return 0 == jf ? 0.5 * ( 1.0 - x ) : (
1 == jf ? 0.5 * ( 1.0 + x ) : 0 );
}
static float eval_deriv_1d( const unsigned jf , const float )
{
return 0 == jf ? -0.5 : (
1 == jf ? 0.5 : 0 );
}
HexElement_TensorData()
{
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
{ { 0 , 0 , 0 },
{ 1 , 0 , 0 },
{ 1 , 1 , 0 },
{ 0 , 1 , 0 },
{ 0 , 0 , 1 },
{ 1 , 0 , 1 },
{ 1 , 1 , 1 },
{ 0 , 1 , 1 } };
weights_1d[0] = 1 ;
weights_1d[1] = 1 ;
const float points_1d[ integration_count_1d ] =
{ -0.577350269 , 0.577350269 };
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
eval_map[i][0] = tmp_map[i][0];
eval_map[i][1] = tmp_map[i][1];
eval_map[i][2] = tmp_map[i][2];
}
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
}}
}
};
//----------------------------------------------------------------------------
template<>
class HexElement_TensorData< 27 > {
public:
static const unsigned element_node_count = 27 ;
static const unsigned spatial_dimension = 3 ;
static const unsigned integration_count_1d = 3 ;
static const unsigned function_count_1d = 3 ;
float values_1d [ function_count_1d ][ integration_count_1d ];
float derivs_1d [ function_count_1d ][ integration_count_1d ];
float weights_1d[ integration_count_1d ];
unsigned char eval_map[ element_node_count ][4] ;
// sizeof(EvaluateElementHex) = 111 bytes =
// sizeof(float) * 9 +
// sizeof(float) * 9 +
// sizeof(float) * 3 +
// sizeof(char) * 27
static float eval_value_1d( const unsigned jf , const float p )
{
return 0 == jf ? 0.5 * p * ( p - 1 ) : (
1 == jf ? 1.0 - p * p : (
2 == jf ? 0.5 * p * ( p + 1 ) : 0 ));
}
static float eval_deriv_1d( const unsigned jf , const float p )
{
return 0 == jf ? p - 0.5 : (
1 == jf ? -2.0 * p : (
2 == jf ? p + 0.5 : 0 ));
}
HexElement_TensorData()
{
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
{ { 0 , 0 , 0 },
{ 2 , 0 , 0 },
{ 2 , 2 , 0 },
{ 0 , 2 , 0 },
{ 0 , 0 , 2 },
{ 2 , 0 , 2 },
{ 2 , 2 , 2 },
{ 0 , 2 , 2 },
{ 1 , 0 , 0 },
{ 2 , 1 , 0 },
{ 1 , 2 , 0 },
{ 0 , 1 , 0 },
{ 0 , 0 , 1 },
{ 2 , 0 , 1 },
{ 2 , 2 , 1 },
{ 0 , 2 , 1 },
{ 1 , 0 , 2 },
{ 2 , 1 , 2 },
{ 1 , 2 , 2 },
{ 0 , 1 , 2 },
{ 1 , 1 , 1 },
{ 1 , 1 , 0 },
{ 1 , 1 , 2 },
{ 0 , 1 , 1 },
{ 2 , 1 , 1 },
{ 1 , 0 , 1 },
{ 1 , 2 , 1 } };
// Interval [-1,1]
weights_1d[0] = 0.555555556 ;
weights_1d[1] = 0.888888889 ;
weights_1d[2] = 0.555555556 ;
const float points_1d[3] = { -0.774596669 ,
0.000000000 ,
0.774596669 };
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
eval_map[i][0] = tmp_map[i][0];
eval_map[i][1] = tmp_map[i][1];
eval_map[i][2] = tmp_map[i][2];
}
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
}}
}
};
//----------------------------------------------------------------------------
template< unsigned NodeCount >
class HexElement_Data {
public:
static const unsigned spatial_dimension = 3 ;
static const unsigned element_node_count = NodeCount ;
static const unsigned integration_count = NodeCount ;
static const unsigned function_count = NodeCount ;
float weights[ integration_count ] ;
float values[ integration_count ][ function_count ];
float gradients[ integration_count ][ spatial_dimension ][ function_count ];
HexElement_Data()
{
HexElement_TensorData< NodeCount > tensor_data ;
for ( unsigned ip = 0 ; ip < integration_count ; ++ip ) {
const unsigned ipx = tensor_data.eval_map[ip][0] ;
const unsigned ipy = tensor_data.eval_map[ip][1] ;
const unsigned ipz = tensor_data.eval_map[ip][2] ;
weights[ip] = tensor_data.weights_1d[ ipx ] *
tensor_data.weights_1d[ ipy ] *
tensor_data.weights_1d[ ipz ] ;
for ( unsigned jf = 0 ; jf < function_count ; ++jf ) {
const unsigned jfx = tensor_data.eval_map[jf][0] ;
const unsigned jfy = tensor_data.eval_map[jf][1] ;
const unsigned jfz = tensor_data.eval_map[jf][2] ;
values[ip][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
tensor_data.values_1d[ ipy ][ jfy ] *
tensor_data.values_1d[ ipz ][ jfz ] ;
gradients[ip][0][jf] = tensor_data.derivs_1d[ ipx ][ jfx ] *
tensor_data.values_1d[ ipy ][ jfy ] *
tensor_data.values_1d[ ipz ][ jfz ] ;
gradients[ip][1][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
tensor_data.derivs_1d[ ipy ][ jfy ] *
tensor_data.values_1d[ ipz ][ jfz ] ;
gradients[ip][2][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
tensor_data.values_1d[ ipy ][ jfy ] *
tensor_data.derivs_1d[ ipz ][ jfz ] ;
}
}
}
};
//----------------------------------------------------------------------------
} /* namespace HybridFEM */
#endif /* #ifndef ELEMENTHEX_HPP */

View File

@ -1,443 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP
#define KOKKOS_HEXEXPLICITFUNCTIONS_HPP
#include <math.h>
namespace Explicit {
struct Hex8Functions
{
static const unsigned SpatialDim = 3 ;
static const unsigned ElemNodeCount = 8 ;
// Indices for full 3x3 tensor:
static const unsigned K_F_XX = 0 ;
static const unsigned K_F_YY = 1 ;
static const unsigned K_F_ZZ = 2 ;
static const unsigned K_F_XY = 3 ;
static const unsigned K_F_YZ = 4 ;
static const unsigned K_F_ZX = 5 ;
static const unsigned K_F_YX = 6 ;
static const unsigned K_F_ZY = 7 ;
static const unsigned K_F_XZ = 8 ;
static const unsigned K_F_SIZE = 9 ;
// Indexes into a 3 by 3 symmetric tensor stored as a length 6 vector
static const unsigned K_S_XX = 0 ;
static const unsigned K_S_YY = 1 ;
static const unsigned K_S_ZZ = 2 ;
static const unsigned K_S_XY = 3 ;
static const unsigned K_S_YZ = 4 ;
static const unsigned K_S_ZX = 5 ;
static const unsigned K_S_YX = 3 ;
static const unsigned K_S_ZY = 4 ;
static const unsigned K_S_XZ = 5 ;
static const unsigned K_S_SIZE = 6 ;
// Indexes into a 3 by 3 skew symmetric tensor stored as a length 3 vector
static const unsigned K_V_XY = 0 ;
static const unsigned K_V_YZ = 1 ;
static const unsigned K_V_ZX = 2 ;
static const unsigned K_V_SIZE = 3 ;
//--------------------------------------------------------------------------
template< typename ScalarA , typename ScalarB >
KOKKOS_INLINE_FUNCTION static
double dot8( const ScalarA * const a , const ScalarB * const b )
{ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] +
a[4] * b[4] + a[5] * b[5] + a[6] * b[6] + a[7] * b[7] ; }
//--------------------------------------------------------------------------
template< class ScalarPrecise ,
class ScalarCompact >
KOKKOS_INLINE_FUNCTION static
void grad( const ScalarPrecise x[] ,
const ScalarPrecise z[] ,
ScalarCompact grad_y[] )
{
const ScalarCompact R42=(x[3] - x[1]);
const ScalarCompact R52=(x[4] - x[1]);
const ScalarCompact R54=(x[4] - x[3]);
const ScalarCompact R63=(x[5] - x[2]);
const ScalarCompact R83=(x[7] - x[2]);
const ScalarCompact R86=(x[7] - x[5]);
const ScalarCompact R31=(x[2] - x[0]);
const ScalarCompact R61=(x[5] - x[0]);
const ScalarCompact R74=(x[6] - x[3]);
const ScalarCompact R72=(x[6] - x[1]);
const ScalarCompact R75=(x[6] - x[4]);
const ScalarCompact R81=(x[7] - x[0]);
const ScalarCompact t1=(R63 + R54);
const ScalarCompact t2=(R61 + R74);
const ScalarCompact t3=(R72 + R81);
const ScalarCompact t4 =(R86 + R42);
const ScalarCompact t5 =(R83 + R52);
const ScalarCompact t6 =(R75 + R31);
// Calculate Y gradient from X and Z data
grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);
}
template< class ScalarPrecise ,
class ScalarCompact >
static KOKKOS_INLINE_FUNCTION
void grad( const ScalarPrecise x[] ,
const ScalarPrecise y[] ,
const ScalarPrecise z[] ,
ScalarCompact grad_x[] ,
ScalarCompact grad_y[] ,
ScalarCompact grad_z[] )
{
grad( x , z , grad_y );
grad( z , y , grad_x );
grad( y , x , grad_z );
}
//--------------------------------------------------------------------------
template< class ScalarPrecise ,
class ScalarCompact >
KOKKOS_INLINE_FUNCTION static
void polar_decomp( const float dt ,
const ScalarCompact v_gr[] ,
ScalarPrecise stretch[] /* INOUT */ ,
ScalarCompact str_ten[] /* OUT */ ,
ScalarCompact rot[] /* OUT */ )
{
const float dt_half = 0.5 * dt;
ScalarCompact vort[ K_V_SIZE ]; // Vorticity
// Symmetric part
str_ten[K_S_XX] = v_gr[K_F_XX];
str_ten[K_S_YY] = v_gr[K_F_YY];
str_ten[K_S_ZZ] = v_gr[K_F_ZZ];
str_ten[K_S_XY] = 0.5 * ( v_gr[K_F_XY] + v_gr[K_F_YX] );
str_ten[K_S_YZ] = 0.5 * ( v_gr[K_F_YZ] + v_gr[K_F_ZY] );
str_ten[K_S_ZX] = 0.5 * ( v_gr[K_F_ZX] + v_gr[K_F_XZ] );
// Skew Symmetric part
vort[K_V_XY] = 0.5 * ( v_gr[K_F_XY] - v_gr[K_F_YX] );
vort[K_V_YZ] = 0.5 * ( v_gr[K_F_YZ] - v_gr[K_F_ZY] );
vort[K_V_ZX] = 0.5 * ( v_gr[K_F_ZX] - v_gr[K_F_XZ] );
// calculate the rates of rotation via gauss elimination.
ScalarCompact z1 = str_ten[K_S_XY] * stretch[K_S_ZX] -
str_ten[K_S_ZX] * stretch[K_S_XY] +
str_ten[K_S_YY] * stretch[K_S_YZ] -
str_ten[K_S_YZ] * stretch[K_S_YY] +
str_ten[K_S_YZ] * stretch[K_S_ZZ] -
str_ten[K_S_ZZ] * stretch[K_S_YZ];
ScalarCompact z2 = str_ten[K_S_ZX] * stretch[K_S_XX] -
str_ten[K_S_XX] * stretch[K_S_ZX] +
str_ten[K_S_YZ] * stretch[K_S_XY] -
str_ten[K_S_XY] * stretch[K_S_YZ] +
str_ten[K_S_ZZ] * stretch[K_S_ZX] -
str_ten[K_S_ZX] * stretch[K_S_ZZ];
ScalarCompact z3 = str_ten[K_S_XX] * stretch[K_S_XY] -
str_ten[K_S_XY] * stretch[K_S_XX] +
str_ten[K_S_XY] * stretch[K_S_YY] -
str_ten[K_S_YY] * stretch[K_S_XY] +
str_ten[K_S_ZX] * stretch[K_S_YZ] -
str_ten[K_S_YZ] * stretch[K_S_ZX];
{
// forward elimination
const ScalarCompact a1inv = 1.0 / (stretch[K_S_YY] + stretch[K_S_ZZ]);
const ScalarCompact a4BYa1 = -1 * stretch[K_S_XY] * a1inv;
const ScalarCompact a2inv = 1.0 / (stretch[K_S_ZZ] + stretch[K_S_XX] + stretch[K_S_XY] * a4BYa1);
const ScalarCompact a5 = -stretch[K_S_YZ] + stretch[K_S_ZX] * a4BYa1;
z2 -= z1 * a4BYa1;
const ScalarCompact a6BYa1 = -1 * stretch[K_S_ZX] * a1inv;
const ScalarCompact a5BYa2 = a5 * a2inv;
z3 -= z1 * a6BYa1 - z2 * a5BYa2;
// backward substitution -
z3 /= (stretch[K_S_XX] + stretch[K_S_YY] + stretch[K_S_ZX] * a6BYa1 + a5 * a5BYa2);
z2 = (z2 - a5 * z3) * a2inv;
z1 = (z1*a1inv - a6BYa1 * z3 -a4BYa1 * z2);
}
// calculate rotation rates - recall that spin_rate is an asymmetric tensor,
// so compute spin rate vector as dual of spin rate tensor,
// i.e w_i = e_ijk * spin_rate_jk
z1 += vort[K_V_YZ];
z2 += vort[K_V_ZX];
z3 += vort[K_V_XY];
{
// update rotation tensor:
// 1) premultiply old rotation tensor to get right-hand side.
ScalarCompact r_XX = rot[K_F_XX] + dt_half*( z3 * rot[K_F_YX] - z2 * rot[K_F_ZX] );
ScalarCompact r_YX = rot[K_F_YX] + dt_half*( z1 * rot[K_F_ZX] - z3 * rot[K_F_XX] );
ScalarCompact r_ZX = rot[K_F_ZX] + dt_half*( z2 * rot[K_F_XX] - z1 * rot[K_F_YX] );
ScalarCompact r_XY = rot[K_F_XY] + dt_half*( z3 * rot[K_F_YY] - z2 * rot[K_F_ZY] );
ScalarCompact r_YY = rot[K_F_YY] + dt_half*( z1 * rot[K_F_ZY] - z3 * rot[K_F_XY] );
ScalarCompact r_ZY = rot[K_F_ZY] + dt_half*( z2 * rot[K_F_XY] - z1 * rot[K_F_YY] );
ScalarCompact r_XZ = rot[K_F_XZ] + dt_half*( z3 * rot[K_F_YZ] - z2 * rot[K_F_ZZ] );
ScalarCompact r_YZ = rot[K_F_YZ] + dt_half*( z1 * rot[K_F_ZZ] - z3 * rot[K_F_XZ] );
ScalarCompact r_ZZ = rot[K_F_ZZ] + dt_half*( z2 * rot[K_F_XZ] - z1 * rot[K_F_YZ] );
// 2) solve for new rotation tensor via gauss elimination.
// forward elimination -
const ScalarCompact a12 = - dt_half * z3;
const ScalarCompact a13 = dt_half * z2;
ScalarCompact b32 = - dt_half * z1;
const ScalarCompact a22inv = 1.0 / (1.0 + a12 * a12);
const ScalarCompact a13a12 = a13*a12;
const ScalarCompact a23 = b32 + a13a12;
r_YX += r_XX * a12;
r_YY += r_XY * a12;
r_YZ += r_XZ * a12;
b32 = (b32 - a13a12) * a22inv;
r_ZX += r_XX * a13 + r_YX * b32;
r_ZY += r_XY * a13 + r_YY * b32;
r_ZZ += r_XZ * a13 + r_YZ * b32;
// backward substitution -
const ScalarCompact a33inv = 1.0 / (1.0 + a13 * a13 + a23 * b32);
rot[K_F_ZX] = r_ZX * a33inv;
rot[K_F_ZY] = r_ZY * a33inv;
rot[K_F_ZZ] = r_ZZ * a33inv;
rot[K_F_YX] = ( r_YX - rot[K_F_ZX] * a23 ) * a22inv;
rot[K_F_YY] = ( r_YY - rot[K_F_ZY] * a23 ) * a22inv;
rot[K_F_YZ] = ( r_YZ - rot[K_F_ZZ] * a23 ) * a22inv;
rot[K_F_XX] = r_XX - rot[K_F_ZX] * a13 - rot[K_F_YX] * a12;
rot[K_F_XY] = r_XY - rot[K_F_ZY] * a13 - rot[K_F_YY] * a12;
rot[K_F_XZ] = r_XZ - rot[K_F_ZZ] * a13 - rot[K_F_YZ] * a12;
}
// update stretch tensor in the new configuration -
const ScalarCompact a1 = str_ten[K_S_XY] + vort[K_V_XY];
const ScalarCompact a2 = str_ten[K_S_YZ] + vort[K_V_YZ];
const ScalarCompact a3 = str_ten[K_S_ZX] + vort[K_V_ZX];
const ScalarCompact b1 = str_ten[K_S_ZX] - vort[K_V_ZX];
const ScalarCompact b2 = str_ten[K_S_XY] - vort[K_V_XY];
const ScalarCompact b3 = str_ten[K_S_YZ] - vort[K_V_YZ];
const ScalarCompact s_XX = stretch[K_S_XX];
const ScalarCompact s_YY = stretch[K_S_YY];
const ScalarCompact s_ZZ = stretch[K_S_ZZ];
const ScalarCompact s_XY = stretch[K_S_XY];
const ScalarCompact s_YZ = stretch[K_S_YZ];
const ScalarCompact s_ZX = stretch[K_S_ZX];
stretch[K_S_XX] += dt * (str_ten[K_S_XX] * s_XX + ( a1 + z3 ) * s_XY + ( b1 - z2 ) * s_ZX);
stretch[K_S_YY] += dt * (str_ten[K_S_YY] * s_YY + ( a2 + z1 ) * s_YZ + ( b2 - z3 ) * s_XY);
stretch[K_S_ZZ] += dt * (str_ten[K_S_ZZ] * s_ZZ + ( a3 + z2 ) * s_ZX + ( b3 - z1 ) * s_YZ);
stretch[K_S_XY] += dt * (str_ten[K_S_XX] * s_XY + ( a1 ) * s_YY + ( b1 ) * s_YZ - z3 * s_XX + z1 * s_ZX);
stretch[K_S_YZ] += dt * (str_ten[K_S_YY] * s_YZ + ( a2 ) * s_ZZ + ( b2 ) * s_ZX - z1 * s_YY + z2 * s_XY);
stretch[K_S_ZX] += dt * (str_ten[K_S_ZZ] * s_ZX + ( a3 ) * s_XX + ( b3 ) * s_XY - z2 * s_ZZ + z3 * s_YZ);
}
//--------------------------------------------------------------------------
template< typename ScalarCompact >
static KOKKOS_INLINE_FUNCTION
void rotate_tensor( const ScalarCompact str_ten[] ,
const ScalarCompact rot[] ,
ScalarCompact rot_str[] )
{
ScalarCompact t[9];
t[0] = str_ten[K_S_XX]*rot[K_F_XX] + str_ten[K_S_XY]*rot[K_F_YX] + str_ten[K_S_XZ]*rot[K_F_ZX];
t[1] = str_ten[K_S_YX]*rot[K_F_XX] + str_ten[K_S_YY]*rot[K_F_YX] + str_ten[K_S_YZ]*rot[K_F_ZX];
t[2] = str_ten[K_S_ZX]*rot[K_F_XX] + str_ten[K_S_ZY]*rot[K_F_YX] + str_ten[K_S_ZZ]*rot[K_F_ZX];
t[3] = str_ten[K_S_XX]*rot[K_F_XY] + str_ten[K_S_XY]*rot[K_F_YY] + str_ten[K_S_XZ]*rot[K_F_ZY];
t[4] = str_ten[K_S_YX]*rot[K_F_XY] + str_ten[K_S_YY]*rot[K_F_YY] + str_ten[K_S_YZ]*rot[K_F_ZY];
t[5] = str_ten[K_S_ZX]*rot[K_F_XY] + str_ten[K_S_ZY]*rot[K_F_YY] + str_ten[K_S_ZZ]*rot[K_F_ZY];
t[6] = str_ten[K_S_XX]*rot[K_F_XZ] + str_ten[K_S_XY]*rot[K_F_YZ] + str_ten[K_S_XZ]*rot[K_F_ZZ];
t[7] = str_ten[K_S_YX]*rot[K_F_XZ] + str_ten[K_S_YY]*rot[K_F_YZ] + str_ten[K_S_YZ]*rot[K_F_ZZ];
t[8] = str_ten[K_S_ZX]*rot[K_F_XZ] + str_ten[K_S_ZY]*rot[K_F_YZ] + str_ten[K_S_ZZ]*rot[K_F_ZZ];
rot_str[ K_S_XX ] = rot[K_F_XX] * t[0] + rot[K_F_YX] * t[1] + rot[K_F_ZX] * t[2];
rot_str[ K_S_YY ] = rot[K_F_XY] * t[3] + rot[K_F_YY] * t[4] + rot[K_F_ZY] * t[5];
rot_str[ K_S_ZZ ] = rot[K_F_XZ] * t[6] + rot[K_F_YZ] * t[7] + rot[K_F_ZZ] * t[8];
rot_str[ K_S_XY ] = rot[K_F_XX] * t[3] + rot[K_F_YX] * t[4] + rot[K_F_ZX] * t[5];
rot_str[ K_S_YZ ] = rot[K_F_XY] * t[6] + rot[K_F_YY] * t[7] + rot[K_F_ZY] * t[8];
rot_str[ K_S_ZX ] = rot[K_F_XZ] * t[0] + rot[K_F_YZ] * t[1] + rot[K_F_ZZ] * t[2];
}
//--------------------------------------------------------------------------
template< class ScalarPrecise ,
class ScalarCompact >
static KOKKOS_INLINE_FUNCTION
void rotate_tensor_backward( const ScalarPrecise stress[] ,
const ScalarCompact rot[] ,
ScalarCompact rot_stress[] )
{
ScalarCompact t[9] ;
t[0] = stress[K_S_XX]*rot[K_F_XX]+ stress[K_S_XY]*rot[K_F_XY]+ stress[K_S_XZ]*rot[K_F_XZ];
t[1] = stress[K_S_YX]*rot[K_F_XX]+ stress[K_S_YY]*rot[K_F_XY]+ stress[K_S_YZ]*rot[K_F_XZ];
t[2] = stress[K_S_ZX]*rot[K_F_XX]+ stress[K_S_ZY]*rot[K_F_XY]+ stress[K_S_ZZ]*rot[K_F_XZ];
t[3] = stress[K_S_XX]*rot[K_F_YX]+ stress[K_S_XY]*rot[K_F_YY]+ stress[K_S_XZ]*rot[K_F_YZ];
t[4] = stress[K_S_YX]*rot[K_F_YX]+ stress[K_S_YY]*rot[K_F_YY]+ stress[K_S_YZ]*rot[K_F_YZ];
t[5] = stress[K_S_ZX]*rot[K_F_YX]+ stress[K_S_ZY]*rot[K_F_YY]+ stress[K_S_ZZ]*rot[K_F_YZ];
t[6] = stress[K_S_XX]*rot[K_F_ZX]+ stress[K_S_XY]*rot[K_F_ZY]+ stress[K_S_XZ]*rot[K_F_ZZ];
t[7] = stress[K_S_YX]*rot[K_F_ZX]+ stress[K_S_YY]*rot[K_F_ZY]+ stress[K_S_YZ]*rot[K_F_ZZ];
t[8] = stress[K_S_ZX]*rot[K_F_ZX]+ stress[K_S_ZY]*rot[K_F_ZY]+ stress[K_S_ZZ]*rot[K_F_ZZ];
rot_stress[ K_S_XX ] = rot[K_F_XX]*t[0] + rot[K_F_XY]*t[1] + rot[K_F_XZ]*t[2];
rot_stress[ K_S_YY ] = rot[K_F_YX]*t[3] + rot[K_F_YY]*t[4] + rot[K_F_YZ]*t[5];
rot_stress[ K_S_ZZ ] = rot[K_F_ZX]*t[6] + rot[K_F_ZY]*t[7] + rot[K_F_ZZ]*t[8];
rot_stress[ K_S_XY ] = rot[K_F_XX]*t[3] + rot[K_F_XY]*t[4] + rot[K_F_XZ]*t[5];
rot_stress[ K_S_YZ ] = rot[K_F_YX]*t[6] + rot[K_F_YY]*t[7] + rot[K_F_YZ]*t[8];
rot_stress[ K_S_ZX ] = rot[K_F_ZX]*t[0] + rot[K_F_ZY]*t[1] + rot[K_F_ZZ]*t[2];
}
//--------------------------------------------------------------------------
template< class ScalarPrecise ,
class ScalarCompact >
KOKKOS_INLINE_FUNCTION static
void update_stress( const float dt ,
const float two_mu ,
const float bulk_modulus ,
const ScalarCompact rot_str[] ,
ScalarPrecise stress[] )
{
const ScalarCompact e = rot_str[ K_S_XX ] + rot_str[ K_S_YY ] + rot_str[ K_S_ZZ ] ;
const ScalarCompact eb = e * bulk_modulus ;
const ScalarCompact e3 = e / 3.0 ;
stress[K_S_XX] += dt * ( two_mu * ( rot_str[K_S_XX] - e3 ) + eb );
stress[K_S_YY] += dt * ( two_mu * ( rot_str[K_S_YY] - e3 ) + eb );
stress[K_S_ZZ] += dt * ( two_mu * ( rot_str[K_S_ZZ] - e3 ) + eb );
stress[K_S_XY] += dt * two_mu * rot_str[K_S_XY];
stress[K_S_YZ] += dt * two_mu * rot_str[K_S_YZ];
stress[K_S_ZX] += dt * two_mu * rot_str[K_S_ZX];
}
//--------------------------------------------------------------------------
template< class ScalarPrecise ,
class ScalarCompact >
static KOKKOS_INLINE_FUNCTION
void comp_force( const ScalarPrecise vx[] ,
const ScalarPrecise vy[] ,
const ScalarPrecise vz[] ,
const ScalarCompact grad_x[] ,
const ScalarCompact grad_y[] ,
const ScalarCompact grad_z[] ,
const ScalarCompact total_stress12th[] ,
ScalarCompact force[][ SpatialDim ] ,
ScalarCompact & energy )
{
ScalarPrecise internal_energy = 0 ;
for ( unsigned inode = 0; inode < ElemNodeCount ; ++inode ) {
force[inode][0] = total_stress12th[K_S_XX] * grad_x[inode] +
total_stress12th[K_S_XY] * grad_y[inode] +
total_stress12th[K_S_XZ] * grad_z[inode] ;
force[inode][1] = total_stress12th[K_S_YX] * grad_x[inode] +
total_stress12th[K_S_YY] * grad_y[inode] +
total_stress12th[K_S_YZ] * grad_z[inode] ;
force[inode][2] = total_stress12th[K_S_ZX] * grad_x[inode] +
total_stress12th[K_S_ZY] * grad_y[inode] +
total_stress12th[K_S_ZZ] * grad_z[inode] ;
internal_energy += force[inode][0] * vx[inode] +
force[inode][1] * vy[inode] +
force[inode][2] * vz[inode] ;
}
energy = internal_energy ;
}
//--------------------------------------------------------------------------
};
} // namespace Explicit
#endif /* #ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP */

View File

@ -1,341 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef HYBRIDFEM_IMPLICIT_HPP
#define HYBRIDFEM_IMPLICIT_HPP
#include <utility>
#include <iostream>
#include <iomanip>
#include <Kokkos_Core.hpp>
#include <SparseLinearSystem.hpp>
#include <SparseLinearSystemFill.hpp>
#include <ImplicitFunctors.hpp>
#include <FEMesh.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace HybridFEM {
namespace Implicit {
struct PerformanceData {
double mesh_time ;
double graph_time ;
double elem_time ;
double matrix_gather_fill_time ;
double matrix_boundary_condition_time ;
double cg_iteration_time ;
PerformanceData()
: mesh_time(0)
, graph_time(0)
, elem_time(0)
, matrix_gather_fill_time(0)
, matrix_boundary_condition_time(0)
, cg_iteration_time(0)
{}
void best( const PerformanceData & rhs )
{
mesh_time = std::min( mesh_time , rhs.mesh_time );
graph_time = std::min( graph_time , rhs.graph_time );
elem_time = std::min( elem_time , rhs.elem_time );
matrix_gather_fill_time = std::min( matrix_gather_fill_time , rhs.matrix_gather_fill_time );
matrix_boundary_condition_time = std::min( matrix_boundary_condition_time , rhs.matrix_boundary_condition_time );
cg_iteration_time = std::min( cg_iteration_time , rhs.cg_iteration_time );
}
};
//----------------------------------------------------------------------------
template< typename Scalar , class FixtureType >
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
const int , // global_max_x ,
const int , // global_max_y ,
const int global_max_z ,
const bool print_sample )
{
typedef Scalar scalar_type ;
typedef FixtureType fixture_type ;
typedef typename fixture_type::execution_space execution_space;
//typedef typename execution_space::size_type size_type ; // unused
typedef typename fixture_type::FEMeshType mesh_type ;
typedef typename fixture_type::coordinate_scalar_type coordinate_scalar_type ;
enum { ElementNodeCount = fixture_type::element_node_count };
const comm::Machine machine = mesh.parallel_data_map.machine ;
const size_t element_count = mesh.elem_node_ids.dimension_0();
const size_t iteration_limit = 200 ;
const double residual_tolerance = 1e-14 ;
size_t iteration_count = 0 ;
double residual_norm = 0 ;
PerformanceData perf_data ;
//------------------------------------
// Sparse linear system types:
typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
typedef Kokkos::CrsMatrix< scalar_type , execution_space > matrix_type ;
typedef typename matrix_type::graph_type matrix_graph_type ;
typedef typename matrix_type::coefficients_type matrix_coefficients_type ;
typedef GraphFactory< matrix_graph_type , mesh_type > graph_factory ;
//------------------------------------
// Problem setup types:
typedef ElementComputation< scalar_type , scalar_type , execution_space > ElementFunctor ;
typedef DirichletBoundary< scalar_type , scalar_type , execution_space > BoundaryFunctor ;
typedef typename ElementFunctor::elem_matrices_type elem_matrices_type ;
typedef typename ElementFunctor::elem_vectors_type elem_vectors_type ;
typedef GatherFill< matrix_type ,
mesh_type ,
elem_matrices_type ,
elem_vectors_type > GatherFillFunctor ;
//------------------------------------
const scalar_type elem_coeff_K = 2 ;
const scalar_type elem_load_Q = 1 ;
matrix_type linsys_matrix ;
vector_type linsys_rhs ;
vector_type linsys_solution ;
typename graph_factory::element_map_type element_map ;
Kokkos::Impl::Timer wall_clock ;
//------------------------------------
// Generate sparse matrix graph and element->graph map.
graph_factory::create( mesh , linsys_matrix.graph , element_map );
execution_space::fence();
perf_data.graph_time = comm::max( machine , wall_clock.seconds() );
//------------------------------------
// Allocate linear system coefficients and rhs:
const size_t local_owned_length =
linsys_matrix.graph.row_map.dimension_0() - 1 ;
linsys_matrix.coefficients =
matrix_coefficients_type( "coeff" , linsys_matrix.graph.entries.dimension_0() );
linsys_rhs = vector_type( "rhs" , local_owned_length );
linsys_solution = vector_type( "solution" , local_owned_length );
//------------------------------------
// Fill linear system
{
elem_matrices_type elem_matrices ;
elem_vectors_type elem_vectors ;
if ( element_count ) {
elem_matrices = elem_matrices_type( std::string("elem_matrices"), element_count );
elem_vectors = elem_vectors_type ( std::string("elem_vectors"), element_count );
}
//------------------------------------
// Compute element matrices and vectors:
wall_clock.reset();
ElementFunctor::apply( mesh ,
elem_matrices , elem_vectors ,
elem_coeff_K , elem_load_Q );
execution_space::fence();
perf_data.elem_time = comm::max( machine , wall_clock.seconds() );
//------------------------------------
// Fill linear system coefficients:
wall_clock.reset();
GatherFillFunctor::apply( linsys_matrix , linsys_rhs ,
mesh , element_map , elem_matrices , elem_vectors );
execution_space::fence();
perf_data.matrix_gather_fill_time = comm::max( machine , wall_clock.seconds() );
// Apply boundary conditions:
wall_clock.reset();
BoundaryFunctor::apply( linsys_matrix , linsys_rhs , mesh ,
0 , global_max_z , 0 , global_max_z );
execution_space::fence();
perf_data.matrix_boundary_condition_time = comm::max( machine , wall_clock.seconds() );
}
//------------------------------------
// Solve linear sytem
cgsolve( mesh.parallel_data_map ,
linsys_matrix , linsys_rhs , linsys_solution ,
iteration_count , residual_norm ,
perf_data.cg_iteration_time ,
iteration_limit , residual_tolerance );
//------------------------------------
if ( print_sample ) {
typename mesh_type::node_coords_type::HostMirror coords_h =
Kokkos::create_mirror( mesh.node_coords );
typename vector_type::HostMirror X_h =
Kokkos::create_mirror( linsys_solution );
Kokkos::deep_copy( coords_h , mesh.node_coords );
Kokkos::deep_copy( X_h , linsys_solution );
for ( size_t i = 0 ; i < mesh.parallel_data_map.count_owned ; ++i ) {
const coordinate_scalar_type x = coords_h(i,0);
const coordinate_scalar_type y = coords_h(i,1);
const coordinate_scalar_type z = coords_h(i,2);
if ( x <= 0 && y <= 0 ) {
std::cout << " node( " << x << " " << y << " " << z << " ) = "
<< X_h(i) << std::endl ;
}
}
}
return perf_data ;
}
//----------------------------------------------------------------------------
template< typename Scalar , class Device >
void driver( const char * const label ,
comm::Machine machine ,
const int gang_count ,
const int elem_count_beg ,
const int elem_count_end ,
const int runs )
{
typedef Scalar scalar_type ;
typedef Device execution_space ;
typedef double coordinate_scalar_type ;
typedef FixtureElementHex8 fixture_element_type ;
typedef BoxMeshFixture< coordinate_scalar_type ,
execution_space ,
fixture_element_type > fixture_type ;
typedef typename fixture_type::FEMeshType mesh_type ;
const size_t proc_count = comm::size( machine );
const size_t proc_rank = comm::rank( machine );
if ( elem_count_beg == 0 || elem_count_end == 0 || runs == 0 ) return ;
if ( comm::rank( machine ) == 0 ) {
std::cout << std::endl ;
std::cout << "\"Kokkos::HybridFE::Implicit " << label << "\"" << std::endl;
std::cout << "\"Size\" , \"Graphing\" , \"Element\" , \"Fill\" , \"Boundary\" , \"CG-Iter\"" << std::endl
<< "\"elems\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\"" << std::endl ;
}
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
{
const int ix = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
const int iy = ix + 1 ;
const int iz = 2 * iy ;
const int n = ix * iy * iz ;
mesh_type mesh =
fixture_type::create( proc_count , proc_rank , gang_count ,
ix , iy , iz );
mesh.parallel_data_map.machine = machine ;
PerformanceData perf_data , perf_best ;
for(int j = 0; j < runs; j++){
perf_data = run<scalar_type,fixture_type>(mesh,ix,iy,iz, false );
if( j == 0 ) {
perf_best = perf_data ;
}
else {
perf_best.best( perf_data );
}
}
if ( comm::rank( machine ) == 0 ) {
std::cout << std::setw(8) << n << " , "
<< std::setw(10) << perf_best.graph_time * 1000 << " , "
<< std::setw(10) << perf_best.elem_time * 1000 << " , "
<< std::setw(10) << perf_best.matrix_gather_fill_time * 1000 << " , "
<< std::setw(10) << perf_best.matrix_boundary_condition_time * 1000 << " , "
<< std::setw(10) << perf_best.cg_iteration_time * 1000
<< std::endl ;
}
}
}
//----------------------------------------------------------------------------
} /* namespace Implicit */
} /* namespace HybridFEM */
#endif /* #ifndef HYBRIDFEM_IMPLICIT_HPP */

View File

@ -1,585 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <fstream>
#include <iomanip>
#include <cstdlib>
#include <cmath>
namespace HybridFEM {
namespace Implicit {
//----------------------------------------------------------------------------
template< typename Scalar , unsigned Dim , unsigned N >
struct TensorIntegration ;
template<typename Scalar >
struct TensorIntegration<Scalar,1,1> {
Scalar pts[1] ;
Scalar wts[1] ;
TensorIntegration() { pts[0] = 0 ; wts[0] = 2 ; }
};
template<typename Scalar >
struct TensorIntegration<Scalar,1,2>
{
Scalar pts[2] ;
Scalar wts[2] ;
TensorIntegration()
{
const Scalar x2 = 0.577350269 ;
pts[0] = -x2; wts[0] = 1.0;
pts[1] = x2; wts[1] = 1.0;
}
};
template<typename Scalar >
struct TensorIntegration<Scalar,1,3>
{
Scalar pts[3] ;
Scalar wts[3] ;
TensorIntegration()
{
const Scalar x3 = 0.774596669 ;
const Scalar w1 = 0.555555556 ;
const Scalar w2 = 0.888888889 ;
pts[0] = -x3 ; wts[0] = w1 ;
pts[1] = 0 ; wts[1] = w2 ;
pts[2] = x3 ; wts[2] = w1 ;
}
};
template< typename Scalar , unsigned Order >
struct TensorIntegration<Scalar,3,Order>
{
static const unsigned N = Order * Order * Order ;
Scalar pts[N][3] ;
Scalar wts[N];
TensorIntegration()
{
TensorIntegration<Scalar,1,Order> oneD ;
unsigned n = 0 ;
for ( unsigned k = 0 ; k < Order ; ++k ) {
for ( unsigned j = 0 ; j < Order ; ++j ) {
for ( unsigned i = 0 ; i < Order ; ++i , ++n ) {
pts[n][0] = oneD.pts[i] ;
pts[n][1] = oneD.pts[j] ;
pts[n][2] = oneD.pts[k] ;
wts[n] = oneD.wts[i] * oneD.wts[j] * oneD.wts[k] ;
}}}
}
};
//----------------------------------------------------------------------------
template< typename Scalar >
struct ShapeFunctionEvaluation {
static const unsigned FunctionCount = 8 ;
static const unsigned SpatialDimension = 3 ;
static const unsigned IntegrationOrder = 2 ;
typedef TensorIntegration< Scalar , SpatialDimension , IntegrationOrder >
TensorIntegrationType ;
static const unsigned PointCount = TensorIntegrationType::N ;
Scalar value [ PointCount ][ FunctionCount ] ;
Scalar gradient[ PointCount ][ FunctionCount * SpatialDimension ];
Scalar weight [ PointCount ];
ShapeFunctionEvaluation()
{
const TensorIntegration< Scalar , SpatialDimension , IntegrationOrder >
integration ;
const Scalar ONE8TH = 0.125 ;
for ( unsigned i = 0 ; i < PointCount ; ++i ) {
const Scalar u = 1.0 - integration.pts[i][0];
const Scalar v = 1.0 - integration.pts[i][1];
const Scalar w = 1.0 - integration.pts[i][2];
const Scalar up1 = 1.0 + integration.pts[i][0];
const Scalar vp1 = 1.0 + integration.pts[i][1];
const Scalar wp1 = 1.0 + integration.pts[i][2];
weight[i] = integration.wts[i] ;
// Vaues:
value[i][0] = ONE8TH * u * v * w ;
value[i][1] = ONE8TH * up1 * v * w ;
value[i][2] = ONE8TH * up1 * vp1 * w ;
value[i][3] = ONE8TH * u * vp1 * w ;
value[i][4] = ONE8TH * u * v * wp1 ;
value[i][5] = ONE8TH * up1 * v * wp1 ;
value[i][6] = ONE8TH * up1 * vp1 * wp1 ;
value[i][7] = ONE8TH * u * vp1 * wp1 ;
//fn 0 = u * v * w
gradient[i][ 0] = ONE8TH * -1 * v * w ;
gradient[i][ 1] = ONE8TH * u * -1 * w ;
gradient[i][ 2] = ONE8TH * u * v * -1 ;
//fn 1 = up1 * v * w
gradient[i][ 3] = ONE8TH * 1 * v * w ;
gradient[i][ 4] = ONE8TH * up1 * -1 * w ;
gradient[i][ 5] = ONE8TH * up1 * v * -1 ;
//fn 2 = up1 * vp1 * w
gradient[i][ 6] = ONE8TH * 1 * vp1 * w ;
gradient[i][ 7] = ONE8TH * up1 * 1 * w ;
gradient[i][ 8] = ONE8TH * up1 * vp1 * -1 ;
//fn 3 = u * vp1 * w
gradient[i][ 9] = ONE8TH * -1 * vp1 * w ;
gradient[i][10] = ONE8TH * u * 1 * w ;
gradient[i][11] = ONE8TH * u * vp1 * -1 ;
//fn 4 = u * v * wp1
gradient[i][12] = ONE8TH * -1 * v * wp1 ;
gradient[i][13] = ONE8TH * u * -1 * wp1 ;
gradient[i][14] = ONE8TH * u * v * 1 ;
//fn 5 = up1 * v * wp1
gradient[i][15] = ONE8TH * 1 * v * wp1 ;
gradient[i][16] = ONE8TH * up1 * -1 * wp1 ;
gradient[i][17] = ONE8TH * up1 * v * 1 ;
//fn 6 = up1 * vp1 * wp1
gradient[i][18] = ONE8TH * 1 * vp1 * wp1 ;
gradient[i][19] = ONE8TH * up1 * 1 * wp1 ;
gradient[i][20] = ONE8TH * up1 * vp1 * 1 ;
//fn 7 = u * vp1 * wp1
gradient[i][21] = ONE8TH * -1 * vp1 * wp1 ;
gradient[i][22] = ONE8TH * u * 1 * wp1 ;
gradient[i][23] = ONE8TH * u * vp1 * 1 ;
}
}
};
//----------------------------------------------------------------------------
template< typename ScalarType , typename ScalarCoordType , class DeviceType >
struct ElementComputation
{
typedef DeviceType execution_space;
typedef ScalarType scalar_type ;
typedef typename execution_space::size_type size_type ;
static const size_type ElementNodeCount = 8 ;
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
typedef Kokkos::View< scalar_type[][ElementNodeCount][ElementNodeCount] , execution_space > elem_matrices_type ;
typedef Kokkos::View< scalar_type[][ElementNodeCount] , execution_space > elem_vectors_type ;
typedef ShapeFunctionEvaluation< scalar_type > shape_function_data ;
static const unsigned SpatialDim = shape_function_data::SpatialDimension ;
static const unsigned FunctionCount = shape_function_data::FunctionCount ;
private:
const shape_function_data shape_eval ;
typename mesh_type::elem_node_ids_type elem_node_ids ;
typename mesh_type::node_coords_type node_coords ;
elem_matrices_type element_matrices ;
elem_vectors_type element_vectors ;
scalar_type coeff_K ;
scalar_type coeff_Q ;
ElementComputation( const mesh_type & arg_mesh ,
const elem_matrices_type & arg_element_matrices ,
const elem_vectors_type & arg_element_vectors ,
const scalar_type arg_coeff_K ,
const scalar_type arg_coeff_Q )
: shape_eval()
, elem_node_ids( arg_mesh.elem_node_ids )
, node_coords( arg_mesh.node_coords )
, element_matrices( arg_element_matrices )
, element_vectors( arg_element_vectors )
, coeff_K( arg_coeff_K )
, coeff_Q( arg_coeff_Q )
{}
public:
static void apply( const mesh_type & mesh ,
const elem_matrices_type & elem_matrices ,
const elem_vectors_type & elem_vectors ,
const scalar_type elem_coeff_K ,
const scalar_type elem_coeff_Q )
{
ElementComputation comp( mesh , elem_matrices , elem_vectors , elem_coeff_K , elem_coeff_Q );
const size_t elem_count = mesh.elem_node_ids.dimension_0();
parallel_for( elem_count , comp );
}
//------------------------------------
static const unsigned FLOPS_jacobian =
FunctionCount * SpatialDim * SpatialDim * 2 ;
KOKKOS_INLINE_FUNCTION
void jacobian( const ScalarCoordType * x,
const ScalarCoordType * y,
const ScalarCoordType * z,
const scalar_type * grad_vals,
scalar_type * J) const
{
int i_grad = 0 ;
for( unsigned i = 0; i < ElementNodeCount ; ++i , i_grad += SpatialDim ) {
const scalar_type g0 = grad_vals[ i_grad ];
const scalar_type g1 = grad_vals[ i_grad + 1 ];
const scalar_type g2 = grad_vals[ i_grad + 2 ];
const scalar_type x0 = x[i] ;
const scalar_type x1 = y[i] ;
const scalar_type x2 = z[i] ;
J[0] += g0 * x0 ;
J[1] += g0 * x1 ;
J[2] += g0 * x2 ;
J[3] += g1 * x0 ;
J[4] += g1 * x1 ;
J[5] += g1 * x2 ;
J[6] += g2 * x0 ;
J[7] += g2 * x1 ;
J[8] += g2 * x2 ;
}
}
//------------------------------------
static const unsigned FLOPS_inverse_and_det = 46 ;
KOKKOS_INLINE_FUNCTION
scalar_type inverse_and_determinant3x3( scalar_type * const J ) const
{
const scalar_type J00 = J[0];
const scalar_type J01 = J[1];
const scalar_type J02 = J[2];
const scalar_type J10 = J[3];
const scalar_type J11 = J[4];
const scalar_type J12 = J[5];
const scalar_type J20 = J[6];
const scalar_type J21 = J[7];
const scalar_type J22 = J[8];
const scalar_type term0 = J22*J11 - J21*J12;
const scalar_type term1 = J22*J01 - J21*J02;
const scalar_type term2 = J12*J01 - J11*J02;
const scalar_type detJ = J00*term0 - J10*term1 + J20*term2;
const scalar_type inv_detJ = 1.0/detJ;
J[0] = term0*inv_detJ;
J[1] = -term1*inv_detJ;
J[2] = term2*inv_detJ;
J[3] = -(J22*J10 - J20*J12)*inv_detJ;
J[4] = (J22*J00 - J20*J02)*inv_detJ;
J[5] = -(J12*J00 - J10*J02)*inv_detJ;
J[6] = (J21*J10 - J20*J11)*inv_detJ;
J[7] = -(J21*J00 - J20*J01)*inv_detJ;
J[8] = (J11*J00 - J10*J01)*inv_detJ;
return detJ ;
}
//------------------------------------
KOKKOS_INLINE_FUNCTION
void matTransMat3x3_X_3xn( const scalar_type * A, int n,
const scalar_type * B,
scalar_type * C ) const
{
//A is 3x3, B is 3xn. So C is also 3xn.
//A,B,C are all assumed to be ordered such that columns are contiguous.
scalar_type * Cj = C;
const scalar_type * Bj = B;
for(int j=0; j<n; ++j) {
Cj[0] = A[0]*Bj[0] + A[1]*Bj[1] + A[2]*Bj[2];
Cj[1] = A[3]*Bj[0] + A[4]*Bj[1] + A[5]*Bj[2];
Cj[2] = A[6]*Bj[0] + A[7]*Bj[1] + A[8]*Bj[2];
Bj += 3;
Cj += 3;
}
}
//------------------------------------
static const unsigned FLOPS_contributeDiffusionMatrix = FunctionCount * ( 3 * 5 + FunctionCount * 7 ) ;
KOKKOS_INLINE_FUNCTION
void contributeDiffusionMatrix(
const scalar_type weight ,
const scalar_type grad_vals[] ,
const scalar_type invJ[] ,
scalar_type elem_mat[][8] ) const
{
scalar_type dpsidx[8], dpsidy[8], dpsidz[8];
int i_grad = 0 ;
for( unsigned i = 0; i < FunctionCount ; ++i , i_grad += 3 ) {
const scalar_type g0 = grad_vals[i_grad+0];
const scalar_type g1 = grad_vals[i_grad+1];
const scalar_type g2 = grad_vals[i_grad+2];
dpsidx[i] = g0 * invJ[0] + g1 * invJ[1] + g2 * invJ[2];
dpsidy[i] = g0 * invJ[3] + g1 * invJ[4] + g2 * invJ[5];
dpsidz[i] = g0 * invJ[6] + g1 * invJ[7] + g2 * invJ[8];
}
for( unsigned m = 0; m < FunctionCount; m++) {
for( unsigned n = 0; n < FunctionCount; n++) {
elem_mat[m][n] += weight *
((dpsidx[m] * dpsidx[n]) +
(dpsidy[m] * dpsidy[n]) +
(dpsidz[m] * dpsidz[n]));
}
}
}
//------------------------------------
static const unsigned FLOPS_contributeSourceVector = FunctionCount * 2 ;
KOKKOS_INLINE_FUNCTION
void contributeSourceVector( const scalar_type term ,
const scalar_type psi[] ,
scalar_type elem_vec[] ) const
{
for( unsigned i=0; i< FunctionCount ; ++i) {
elem_vec[i] += psi[i] * term ;
}
}
static const unsigned FLOPS_operator =
shape_function_data::PointCount * ( 3
+ FLOPS_jacobian
+ FLOPS_inverse_and_det
+ FLOPS_contributeDiffusionMatrix
+ FLOPS_contributeSourceVector ) ;
KOKKOS_INLINE_FUNCTION
void operator()( int ielem )const {
scalar_type elem_vec[8] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
scalar_type elem_mat[8][8] =
{ { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } };
ScalarCoordType x[8], y[8], z[8];
for ( int i = 0 ; i < 8 ; ++i ) {
const int node_index = elem_node_ids( ielem , i );
x[i] = node_coords( node_index , 0 );
y[i] = node_coords( node_index , 1 );
z[i] = node_coords( node_index , 2 );
}
// This loop could be parallelized; however,
// it would require additional per-thread temporaries
// of 'elem_vec' and 'elem_mat' which would
// consume more local memory and have to be reduced.
for ( unsigned i = 0 ; i < shape_function_data::PointCount ; ++i ) {
scalar_type J[SpatialDim*SpatialDim] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
jacobian( x, y, z, shape_eval.gradient[i] , J );
// Overwrite J with its inverse to save scratch memory space.
const scalar_type detJ_w = shape_eval.weight[i] * inverse_and_determinant3x3(J);
const scalar_type k_detJ_w = coeff_K * detJ_w ;
const scalar_type Q_detJ_w = coeff_Q * detJ_w ;
contributeDiffusionMatrix( k_detJ_w , shape_eval.gradient[i] , J , elem_mat );
contributeSourceVector( Q_detJ_w , shape_eval.value[i] , elem_vec );
}
for( size_type i=0; i< ElementNodeCount ; ++i) {
element_vectors(ielem, i) = elem_vec[i] ;
}
for( size_type i = 0; i < ElementNodeCount ; i++){
for( size_type j = 0; j < ElementNodeCount ; j++){
element_matrices(ielem, i, j) = elem_mat[i][j] ;
}
}
}
}; /* ElementComputation */
//----------------------------------------------------------------------------
template< typename ScalarType , typename ScalarCoordType , class DeviceType >
struct DirichletBoundary
{
typedef DeviceType execution_space;
typedef typename execution_space::size_type size_type ;
static const size_type ElementNodeCount = 8 ;
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
typename mesh_type::node_coords_type node_coords ;
matrix_type matrix ;
vector_type rhs ;
ScalarCoordType bc_lower_z ;
ScalarCoordType bc_upper_z ;
ScalarType bc_lower_value ;
ScalarType bc_upper_value ;
KOKKOS_INLINE_FUNCTION
void operator()( size_type inode ) const
{
// Apply a dirichlet boundary condition to 'irow'
// to maintain the symmetry of the original
// global stiffness matrix, zero out the columns
// that correspond to boundary conditions, and
// adjust the load vector accordingly
const size_type iBeg = matrix.graph.row_map[inode];
const size_type iEnd = matrix.graph.row_map[inode+1];
const ScalarCoordType z = node_coords(inode,2);
const bool bc_lower = z <= bc_lower_z ;
const bool bc_upper = bc_upper_z <= z ;
if ( bc_lower || bc_upper ) {
const ScalarType bc_value = bc_lower ? bc_lower_value
: bc_upper_value ;
rhs(inode) = bc_value ; // set the rhs vector
// zero each value on the row, and leave a one
// on the diagonal
for( size_type i = iBeg ; i < iEnd ; i++) {
matrix.coefficients(i) =
(int) inode == matrix.graph.entries(i) ? 1 : 0 ;
}
}
else {
// Find any columns that are boundary conditions.
// Clear them and adjust the load vector
for( size_type i = iBeg ; i < iEnd ; i++ ) {
const size_type cnode = matrix.graph.entries(i) ;
const ScalarCoordType zc = node_coords(cnode,2);
const bool c_bc_lower = zc <= bc_lower_z ;
const bool c_bc_upper = bc_upper_z <= zc ;
if ( c_bc_lower || c_bc_upper ) {
const ScalarType c_bc_value = c_bc_lower ? bc_lower_value
: bc_upper_value ;
rhs( inode ) -= c_bc_value * matrix.coefficients(i);
matrix.coefficients(i) = 0 ;
}
}
}
}
static void apply( const matrix_type & linsys_matrix ,
const vector_type & linsys_rhs ,
const mesh_type & mesh ,
const ScalarCoordType bc_lower_z ,
const ScalarCoordType bc_upper_z ,
const ScalarType bc_lower_value ,
const ScalarType bc_upper_value )
{
const size_t row_count = linsys_matrix.graph.row_map.dimension_0() - 1 ;
DirichletBoundary op ;
op.node_coords = mesh.node_coords ;
op.matrix = linsys_matrix ;
op.rhs = linsys_rhs ;
op.bc_lower_z = bc_lower_z ;
op.bc_upper_z = bc_upper_z ;
op.bc_lower_value = bc_lower_value ;
op.bc_upper_value = bc_upper_value ;
parallel_for( row_count , op );
}
};
//----------------------------------------------------------------------------
} /* namespace Implicit */
} /* namespace HybridFEM */

View File

@ -1,567 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef USESCASES_LINALG_BLAS_HPP
#define USESCASES_LINALG_BLAS_HPP
#include <cmath>
#include <utility>
#include <ParallelComm.hpp>
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< class Scalar , class Layout , class DeviceType > struct Dot ;
template< class Scalar , class Layout , class DeviceType > struct Dot1 ;
template< typename ScalarA ,
typename ScalarY ,
class Layout , class Device >
struct Scale ;
template< typename ScalarA ,
typename ScalarY ,
class Layout , class Device >
struct Fill ;
template< typename ScalarA ,
typename ScalarX ,
typename ScalarY ,
class Layout , class Device >
struct AXPY ;
template< typename ScalarX ,
typename ScalarB ,
typename ScalarY ,
class Layout , class Device >
struct XPBY ;
template< typename ScalarA ,
typename ScalarX ,
typename ScalarB ,
typename ScalarY ,
typename ScalarW ,
class Layout , class Device >
struct WAXPBY ;
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_MPI )
template< typename ScalarX /* Allow mix of const and non-const */ ,
typename ScalarY /* Allow mix of const and non-const */ ,
class L , class D ,
class MX /* Allow any management type */ ,
class MY /* Allow any management type */ >
inline
double dot( const size_t n ,
const View< ScalarX * , L , D , MX > & x ,
const View< ScalarY * , L , D , MY > & y ,
comm::Machine machine )
{
double global_result = 0 ;
double local_result = 0 ;
Impl::Dot< ScalarX , L , D >( n , x , y , local_result );
MPI_Allreduce( & local_result , & global_result , 1 ,
MPI_DOUBLE , MPI_SUM , machine.mpi_comm );
return global_result ;
}
#else
template< typename ScalarX /* Allow mix of const and non-const */ ,
typename ScalarY /* Allow mix of const and non-const */ ,
class L , class D ,
class MX /* Allow any management type */ ,
class MY /* Allow any management type */ >
inline
double dot( const size_t n ,
const View< ScalarX * , L , D , MX > & x ,
const View< ScalarY * , L , D , MY > & y ,
comm::Machine )
{
double global_result = 0 ;
Impl::Dot< ScalarX , L , D >( n , x , y , global_result );
return global_result ;
}
#endif
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_MPI )
template< typename ScalarX /* Allow mix of const and non-const */ ,
class L , class D ,
class MX /* Allow any management type */ >
inline
double dot( const size_t n ,
const View< ScalarX * , L , D , MX > & x ,
comm::Machine machine )
{
double global_result = 0 ;
double local_result = 0 ;
Impl::Dot1< ScalarX , L , D >( n , x , local_result );
MPI_Allreduce( & local_result , & global_result , 1 ,
MPI_DOUBLE , MPI_SUM , machine.mpi_comm );
return global_result ;
}
#else
template< typename ScalarX /* Allow mix of const and non-const */ ,
class L , class D ,
class MX /* Allow any management type */ >
inline
double dot( const size_t n ,
const View< ScalarX * , L , D , MX > & x ,
comm::Machine )
{
double global_result = 0 ;
Impl::Dot1< ScalarX , L , D >( n , x , global_result );
return global_result ;
}
#endif
//----------------------------------------------------------------------------
template< typename ScalarX /* Allow mix of const and non-const */ ,
class L , class D ,
class MX /* Allow any management type */ >
inline
double norm2( const size_t n ,
const View< ScalarX * , L , D , MX > & x ,
comm::Machine machine )
{
return std::sqrt( dot( n , x , machine ) );
}
//----------------------------------------------------------------------------
template< typename ScalarA ,
typename ScalarX ,
class L ,
class D ,
class MX >
void scale( const size_t n ,
const ScalarA & alpha ,
const View< ScalarX * , L , D , MX > & x )
{
Impl::Scale< ScalarA , ScalarX , L , D >( n , alpha , x );
}
template< typename ScalarA ,
typename ScalarX ,
class L ,
class D ,
class MX >
void fill( const size_t n ,
const ScalarA & alpha ,
const View< ScalarX * , L , D , MX > & x )
{
Impl::Fill< ScalarA , ScalarX , L , D >( n , alpha , x );
}
//----------------------------------------------------------------------------
template< typename ScalarA ,
typename ScalarX ,
typename ScalarY ,
class L ,
class D ,
class MX ,
class MY >
void axpy( const size_t n ,
const ScalarA & alpha ,
const View< ScalarX *, L , D , MX > & x ,
const View< ScalarY *, L , D , MY > & y )
{
Impl::AXPY< ScalarA, ScalarX, ScalarY , L , D >( n, alpha, x, y );
}
//----------------------------------------------------------------------------
template< typename ScalarX ,
typename ScalarB ,
typename ScalarY ,
class L ,
class D ,
class MX ,
class MY >
void xpby( const size_t n ,
const View< ScalarX *, L , D , MX > & x ,
const ScalarB & beta ,
const View< ScalarY *, L , D , MY > & y )
{
Impl::XPBY< ScalarX, ScalarB, ScalarY , L , D >( n, x, beta, y );
}
//----------------------------------------------------------------------------
// w = alpha * x + beta * y
template< typename ScalarA ,
typename ScalarX ,
typename ScalarB ,
typename ScalarY ,
typename ScalarW ,
class L , class D ,
class MX , class MY , class MW >
void waxpby( const size_t n ,
const ScalarA & alpha ,
const View< ScalarX * , L , D , MX > & x ,
const ScalarB & beta ,
const View< ScalarY * , L , D , MY > & y ,
const View< ScalarW * , L , D , MW > & w )
{
Impl::WAXPBY<ScalarA,ScalarX,ScalarB,ScalarY,ScalarW,L,D>
( n , alpha , x , beta , y , w );
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< typename Scalar , class L , class D >
struct Dot
{
private:
typedef View< const Scalar*, L, D, MemoryUnmanaged > vector_const_type ;
const vector_const_type x ;
const vector_const_type y ;
public:
typedef typename vector_const_type::execution_space execution_space ; // Manycore device
typedef double value_type ; // Reduction value
template< class ArgX , class ArgY >
inline
Dot( const size_t n , const ArgX & arg_x , const ArgY & arg_y , double & result )
: x( arg_x ), y( arg_y )
{
parallel_reduce( n , *this , result );
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
void operator()( const iType & i , value_type & update ) const
{ update += x(i) * y(i); }
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & source )
{ update += source; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
}; // Dot
//----------------------------------------------------------------------------
template< typename Scalar , class L , class D >
struct Dot1
{
private:
typedef View< const Scalar*, L, D , MemoryUnmanaged > vector_const_type ;
const vector_const_type x ;
public:
typedef typename vector_const_type::execution_space execution_space ; // Manycore device
typedef double value_type ; // Reduction value
template< class ArgX >
inline
Dot1( const size_t n , const ArgX & arg_x , double & result )
: x( arg_x )
{
parallel_reduce( n , *this , result );
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
void operator()( const iType & i , value_type & update ) const
{ update += x(i) * x(i) ; }
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & source )
{ update += source ; }
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
}; // Dot
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template < typename ScalarA ,
typename ScalarX ,
typename ScalarB ,
typename ScalarY ,
typename ScalarW ,
class L , class D >
struct WAXPBY
{
private:
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
typedef View< const ScalarY *, L , D , MemoryUnmanaged > ViewY ;
const ViewW w ;
const ViewX x ;
const ViewY y ;
const ScalarA alpha ;
const ScalarB beta ;
public:
typedef typename ViewW::execution_space execution_space ;
template< typename iType >
KOKKOS_INLINE_FUNCTION
void operator()( const iType inode ) const
{
w(inode) = alpha * x(inode) + beta * y(inode);
}
template< class ArgX , class ArgY , class ArgW >
inline
WAXPBY( const size_t n ,
const ScalarA & arg_alpha ,
const ArgX & arg_x ,
const ScalarB & arg_beta ,
const ArgY & arg_y ,
const ArgW & arg_w )
: w( arg_w ), x( arg_x ), y( arg_y )
, alpha( arg_alpha ), beta( arg_beta )
{
parallel_for( n , *this );
}
}; // WAXPBY
//----------------------------------------------------------------------------
template < typename ScalarB ,
typename ScalarW ,
class L , class D >
struct Scale
{
private:
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
const ViewW w ;
const ScalarB beta ;
public:
typedef typename ViewW::execution_space execution_space ;
template< typename iType >
KOKKOS_INLINE_FUNCTION
void operator()( const iType & i ) const
{ w(i) *= beta ; }
template< class ArgW >
inline
Scale( const size_t n , const ScalarB & arg_beta , const ArgW & arg_w )
: w( arg_w )
, beta( arg_beta )
{
parallel_for( n , *this );
}
};
template < typename ScalarB ,
typename ScalarW ,
class L , class D >
struct Fill
{
private:
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
const ViewW w ;
const ScalarB beta ;
public:
typedef typename ViewW::execution_space execution_space ;
template< typename iType >
KOKKOS_INLINE_FUNCTION
void operator()( const iType & i ) const
{ w(i) = beta ; }
template< class ArgW >
inline
Fill( const size_t n , const ScalarB & arg_beta , const ArgW & arg_w )
: w( arg_w )
, beta( arg_beta )
{
parallel_for( n , *this );
}
};
//----------------------------------------------------------------------------
template < typename ScalarA ,
typename ScalarX ,
typename ScalarW ,
class L , class D >
struct AXPY
{
private:
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
const ViewW w ;
const ViewX x ;
const ScalarA alpha ;
public:
typedef typename ViewW::execution_space execution_space ;
template< typename iType >
KOKKOS_INLINE_FUNCTION
void operator()( const iType & i ) const
{ w(i) += alpha * x(i); }
template< class ArgX , class ArgW >
inline
AXPY( const size_t n ,
const ScalarA & arg_alpha ,
const ArgX & arg_x ,
const ArgW & arg_w )
: w( arg_w ), x( arg_x )
, alpha( arg_alpha )
{
parallel_for( n , *this );
}
}; // AXPY
template< typename ScalarX ,
typename ScalarB ,
typename ScalarW ,
class L , class D >
struct XPBY
{
private:
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
const ViewW w ;
const ViewX x ;
const ScalarB beta ;
public:
typedef typename ViewW::execution_space execution_space ;
template< typename iType >
KOKKOS_INLINE_FUNCTION
void operator()( const iType & i ) const
{ w(i) = x(i) + beta * w(i); }
template< class ArgX , class ArgW >
inline
XPBY( const size_t n ,
const ArgX & arg_x ,
const ScalarB & arg_beta ,
const ArgW & arg_w )
: w( arg_w ), x( arg_x )
, beta( arg_beta )
{
parallel_for( n , *this );
}
}; // XPBY
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef USESCASES_LINALG_BLAS_HPP */

View File

@ -1,53 +0,0 @@
KOKKOS_PATH ?= ../..
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
SRC_DIR := $(dir $(MAKEFILE_PATH))
SRC = $(wildcard $(SRC_DIR)/*.cpp)
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
#SRC = $(wildcard *.cpp)
#OBJ = $(SRC:%.cpp=%.o)
default: build
echo "Start Build"
# use installed Makefile.kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(NVCC_WRAPPER)
CXXFLAGS = -I$(SRC_DIR) -I$(CUDA_PATH) -O3
LINK = $(CXX)
LINKFLAGS = -L$(CUDA_PATH)/lib64 -lcusparse
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "Cuda,OpenMP"
#KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "OpenMP"
#KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
LIB =
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean:
rm -f *.a *.o *.cuda *.host
# Compilation rules
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,573 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef HYBRIDFEM_NONLINEAR_HPP
#define HYBRIDFEM_NONLINEAR_HPP
#include <utility>
#include <iostream>
#include <iomanip>
#include <Kokkos_Core.hpp>
#include <SparseLinearSystem.hpp>
#include <SparseLinearSystemFill.hpp>
#include <NonlinearFunctors.hpp>
#include <FEMesh.hpp>
#include <HexElement.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace HybridFEM {
namespace Nonlinear {
struct PerformanceData {
double mesh_time ;
double graph_time ;
double elem_time ;
double matrix_gather_fill_time ;
double matrix_boundary_condition_time ;
double cg_iteration_time ;
size_t cg_iteration_count ;
size_t newton_iteration_count ;
double error_max ;
PerformanceData()
: mesh_time(0)
, graph_time(0)
, elem_time(0)
, matrix_gather_fill_time(0)
, matrix_boundary_condition_time(0)
, cg_iteration_time(0)
, cg_iteration_count(0)
, newton_iteration_count(0)
, error_max(0)
{}
void best( const PerformanceData & rhs )
{
mesh_time = std::min( mesh_time , rhs.mesh_time );
graph_time = std::min( graph_time , rhs.graph_time );
elem_time = std::min( elem_time , rhs.elem_time );
matrix_gather_fill_time = std::min( matrix_gather_fill_time , rhs.matrix_gather_fill_time );
matrix_boundary_condition_time = std::min( matrix_boundary_condition_time , rhs.matrix_boundary_condition_time );
cg_iteration_time = std::min( cg_iteration_time , rhs.cg_iteration_time );
cg_iteration_count = std::min( cg_iteration_count , rhs.cg_iteration_count );
newton_iteration_count = std::min( newton_iteration_count , rhs.newton_iteration_count );
error_max = std::min( error_max , rhs.error_max );
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
class ManufacturedSolution {
public:
// Manufactured solution for one dimensional nonlinear PDE
//
// -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax
//
// Has an analytic solution of the form:
//
// T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 )
//
// Given T_0 and T_L compute K for this analytic solution.
//
// Two analytic solutions:
//
// Solution with singularity:
// , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
// , b( -1.0 / sqrt(T_zmin) )
//
// Solution without singularity:
// , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
// , b( 1.0 / sqrt(T_zmin) )
const double zmin ;
const double zmax ;
const double T_zmin ;
const double T_zmax ;
const double a ;
const double b ;
const double K ;
ManufacturedSolution( const double arg_zmin ,
const double arg_zmax ,
const double arg_T_zmin ,
const double arg_T_zmax )
: zmin( arg_zmin )
, zmax( arg_zmax )
, T_zmin( arg_T_zmin )
, T_zmax( arg_T_zmax )
, a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
, b( 1.0 / sqrt(T_zmin) )
, K( 1.0 / ( 6.0 * a * a ) )
{}
double operator()( const double z ) const
{
const double tmp = a * ( z - zmin ) + b ;
return 1.0 / ( tmp * tmp );
}
};
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
template< typename Scalar , class FixtureType >
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
const int , // global_max_x ,
const int , // global_max_y ,
const int global_max_z ,
const bool print_error )
{
typedef Scalar scalar_type ;
typedef FixtureType fixture_type ;
typedef typename fixture_type::execution_space execution_space;
//typedef typename execution_space::size_type size_type ; // unused
typedef typename fixture_type::FEMeshType mesh_type ;
typedef typename fixture_type::coordinate_scalar_type coordinate_scalar_type ;
enum { ElementNodeCount = fixture_type::element_node_count };
const comm::Machine machine = mesh.parallel_data_map.machine ;
const size_t element_count = mesh.elem_node_ids.dimension_0();
//------------------------------------
// The amount of nonlinearity is proportional to the ratio
// between T(zmax) and T(zmin). For the manufactured solution
// 0 < T(zmin) and 0 < T(zmax)
const ManufacturedSolution
exact_solution( /* zmin */ 0 ,
/* zmax */ global_max_z ,
/* T(zmin) */ 1 ,
/* T(zmax) */ 20 );
//-----------------------------------
// Convergence Criteria and perf data:
const size_t cg_iteration_limit = 200 ;
const double cg_tolerance = 1e-14 ;
const size_t newton_iteration_limit = 150 ;
const double newton_tolerance = 1e-14 ;
size_t cg_iteration_count_total = 0 ;
double cg_iteration_time = 0 ;
size_t newton_iteration_count = 0 ;
double residual_norm_init = 0 ;
double residual_norm = 0 ;
PerformanceData perf_data ;
//------------------------------------
// Sparse linear system types:
typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
typedef Kokkos::CrsMatrix< scalar_type , execution_space > matrix_type ;
typedef typename matrix_type::graph_type matrix_graph_type ;
typedef typename matrix_type::coefficients_type matrix_coefficients_type ;
typedef GraphFactory< matrix_graph_type , mesh_type > graph_factory ;
//------------------------------------
// Problem setup types:
typedef ElementComputation < mesh_type , scalar_type > ElementFunctor ;
typedef DirichletSolution < mesh_type , scalar_type > DirichletSolutionFunctor ;
typedef DirichletResidual < mesh_type , scalar_type > DirichletResidualFunctor ;
typedef typename ElementFunctor::elem_matrices_type elem_matrices_type ;
typedef typename ElementFunctor::elem_vectors_type elem_vectors_type ;
typedef GatherFill< matrix_type ,
mesh_type ,
elem_matrices_type ,
elem_vectors_type > GatherFillFunctor ;
//------------------------------------
matrix_type jacobian ;
vector_type residual ;
vector_type delta ;
vector_type nodal_solution ;
typename graph_factory::element_map_type element_map ;
//------------------------------------
// Generate mesh and corresponding sparse matrix graph
Kokkos::Impl::Timer wall_clock ;
//------------------------------------
// Generate sparse matrix graph and element->graph map.
wall_clock.reset();
graph_factory::create( mesh , jacobian.graph , element_map );
execution_space::fence();
perf_data.graph_time = comm::max( machine , wall_clock.seconds() );
//------------------------------------
// Allocate linear system coefficients and rhs:
const size_t local_owned_length = jacobian.graph.row_map.dimension_0() - 1 ;
const size_t local_total_length = mesh.node_coords.dimension_0();
jacobian.coefficients =
matrix_coefficients_type( "jacobian_coeff" , jacobian.graph.entries.dimension_0() );
// Nonlinear residual for owned nodes:
residual = vector_type( "residual" , local_owned_length );
// Nonlinear solution for owned and ghosted nodes:
nodal_solution = vector_type( "solution" , local_total_length );
// Nonlinear solution update for owned nodes:
delta = vector_type( "delta" , local_owned_length );
//------------------------------------
// Allocation of arrays to fill the linear system
elem_matrices_type elem_matrices ; // Jacobian matrices
elem_vectors_type elem_vectors ; // Residual vectors
if ( element_count ) {
elem_matrices = elem_matrices_type( std::string("elem_matrices"), element_count );
elem_vectors = elem_vectors_type( std::string("elem_vectors"), element_count );
}
//------------------------------------
// For boundary condition set the correct values in the solution vector
// The 'zmin' face is assigned to 'T_zmin'.
// The 'zmax' face is assigned to 'T_zmax'.
// The resulting solution is one dimensional along the 'Z' axis.
DirichletSolutionFunctor::apply( nodal_solution , mesh ,
exact_solution.zmin ,
exact_solution.zmax ,
exact_solution.T_zmin ,
exact_solution.T_zmax );
for(;;) { // Nonlinear loop
#if defined( KOKKOS_HAVE_MPI )
{ //------------------------------------
// Import off-processor nodal solution values
// for residual and jacobian computations
Kokkos::AsyncExchange< typename vector_type::value_type , execution_space ,
Kokkos::ParallelDataMap >
exchange( mesh.parallel_data_map , 1 );
Kokkos::PackArray< vector_type >
::pack( exchange.buffer() ,
mesh.parallel_data_map.count_interior ,
mesh.parallel_data_map.count_send ,
nodal_solution );
exchange.setup();
exchange.send_receive();
Kokkos::UnpackArray< vector_type >
::unpack( nodal_solution , exchange.buffer() ,
mesh.parallel_data_map.count_owned ,
mesh.parallel_data_map.count_receive );
}
#endif
//------------------------------------
// Compute element matrices and vectors:
wall_clock.reset();
ElementFunctor( mesh ,
elem_matrices ,
elem_vectors ,
nodal_solution ,
exact_solution.K );
execution_space::fence();
perf_data.elem_time += comm::max( machine , wall_clock.seconds() );
//------------------------------------
// Fill linear system coefficients:
wall_clock.reset();
fill( jacobian.coefficients.dimension_0(), 0 , jacobian.coefficients );
fill( residual.dimension_0() , 0 , residual );
GatherFillFunctor::apply( jacobian ,
residual ,
mesh ,
element_map ,
elem_matrices ,
elem_vectors );
execution_space::fence();
perf_data.matrix_gather_fill_time += comm::max( machine , wall_clock.seconds() );
// Apply boundary conditions:
wall_clock.reset();
// Updates jacobian matrix to 1 on the diagonal, zero elsewhere,
// and 0 in the residual due to the solution vector having the correct value
DirichletResidualFunctor::apply( jacobian, residual, mesh ,
exact_solution.zmin ,
exact_solution.zmax );
execution_space::fence();
perf_data.matrix_boundary_condition_time +=
comm::max( machine , wall_clock.seconds() );
//------------------------------------
// Has the residual converged?
residual_norm = norm2( mesh.parallel_data_map.count_owned,
residual,
mesh.parallel_data_map.machine );
if ( 0 == newton_iteration_count ) {
residual_norm_init = residual_norm ;
}
if ( residual_norm / residual_norm_init < newton_tolerance ) {
break ;
}
//------------------------------------
// Solve linear sytem
size_t cg_iteration_count = 0 ;
double cg_residual_norm = 0 ;
cgsolve( mesh.parallel_data_map ,
jacobian , residual , delta ,
cg_iteration_count ,
cg_residual_norm ,
cg_iteration_time ,
cg_iteration_limit , cg_tolerance ) ;
perf_data.cg_iteration_time += cg_iteration_time ;
cg_iteration_count_total += cg_iteration_count ;
// Update non-linear solution with delta...
// delta is : - Dx = [Jacobian]^1 * Residual which is the negative update
// LaTeX:
// \vec {x}_{n+1} = \vec {x}_{n} - ( - \Delta \vec{x}_{n} )
// text:
// x[n+1] = x[n] + Dx
axpy( mesh.parallel_data_map.count_owned ,
-1.0, delta, nodal_solution);
++newton_iteration_count ;
if ( newton_iteration_limit < newton_iteration_count ) {
break ;
}
};
if ( newton_iteration_count ) {
perf_data.elem_time /= newton_iteration_count ;
perf_data.matrix_gather_fill_time /= newton_iteration_count ;
perf_data.matrix_boundary_condition_time /= newton_iteration_count ;
}
if ( cg_iteration_count_total ) {
perf_data.cg_iteration_time /= cg_iteration_count_total ;
}
perf_data.newton_iteration_count = newton_iteration_count ;
perf_data.cg_iteration_count = cg_iteration_count_total ;
//------------------------------------
{
// For extracting the nodal solution and its coordinates:
typename mesh_type::node_coords_type::HostMirror node_coords_host =
Kokkos::create_mirror( mesh.node_coords );
typename vector_type::HostMirror nodal_solution_host =
Kokkos::create_mirror( nodal_solution );
Kokkos::deep_copy( node_coords_host , mesh.node_coords );
Kokkos::deep_copy( nodal_solution_host , nodal_solution );
double tmp = 0 ;
for ( size_t i = 0 ; i < mesh.parallel_data_map.count_owned ; ++i ) {
const coordinate_scalar_type x = node_coords_host(i,0);
const coordinate_scalar_type y = node_coords_host(i,1);
const coordinate_scalar_type z = node_coords_host(i,2);
const double Tx = exact_solution(z);
const double Ts = nodal_solution_host(i);
const double Te = std::abs( Tx - Ts ) / std::abs( Tx );
tmp = std::max( tmp , Te );
if ( print_error && 0.02 < Te ) {
std::cout << " node( " << x << " " << y << " " << z << " ) = "
<< Ts << " != exact_solution " << Tx
<< std::endl ;
}
}
perf_data.error_max = comm::max( machine , tmp );
}
return perf_data ;
}
//----------------------------------------------------------------------------
template< typename Scalar , class Device , class FixtureElement >
void driver( const char * const label ,
comm::Machine machine ,
const int gang_count ,
const int elem_count_beg ,
const int elem_count_end ,
const int runs )
{
typedef Scalar scalar_type ;
typedef Device execution_space ;
typedef double coordinate_scalar_type ;
typedef FixtureElement fixture_element_type ;
typedef BoxMeshFixture< coordinate_scalar_type ,
execution_space ,
fixture_element_type > fixture_type ;
typedef typename fixture_type::FEMeshType mesh_type ;
const size_t proc_count = comm::size( machine );
const size_t proc_rank = comm::rank( machine );
if ( elem_count_beg == 0 || elem_count_end == 0 || runs == 0 ) return ;
if ( comm::rank( machine ) == 0 ) {
std::cout << std::endl ;
std::cout << "\"Kokkos::HybridFE::Nonlinear " << label << "\"" << std::endl;
std::cout
<< "\"Size\" , \"Size\" , \"Graphing\" , \"Element\" , \"Fill\" , \"Boundary\" , \"CG-Iter\" , \"CG-Iter\" , \"Newton-Iter\" , \"Max-node-error\""
<< std::endl
<< "\"elems\" , \"nodes\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"total-count\" , \"total-count\" , \"ratio\""
<< std::endl ;
}
const bool print_sample = 0 ;
const double x_curve = 1.0 ;
const double y_curve = 1.0 ;
const double z_curve = 0.8 ;
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
{
const int ix = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
const int iy = 1 + ix ;
const int iz = 2 * iy ;
const int global_elem_count = ix * iy * iz ;
const int global_node_count = ( 2 * ix + 1 ) *
( 2 * iy + 1 ) *
( 2 * iz + 1 );
mesh_type mesh =
fixture_type::create( proc_count , proc_rank , gang_count ,
ix , iy , iz ,
x_curve , y_curve , z_curve );
mesh.parallel_data_map.machine = machine ;
PerformanceData perf_data , perf_best ;
for(int j = 0; j < runs; j++){
perf_data = run<scalar_type,fixture_type>(mesh,ix,iy,iz, print_sample );
if( j == 0 ) {
perf_best = perf_data ;
}
else {
perf_best.best( perf_data );
}
}
if ( comm::rank( machine ) == 0 ) {
std::cout << std::setw(8) << global_elem_count << " , "
<< std::setw(8) << global_node_count << " , "
<< std::setw(10) << perf_best.graph_time * 1000 << " , "
<< std::setw(10) << perf_best.elem_time * 1000 << " , "
<< std::setw(10) << perf_best.matrix_gather_fill_time * 1000 << " , "
<< std::setw(10) << perf_best.matrix_boundary_condition_time * 1000 << " , "
<< std::setw(10) << perf_best.cg_iteration_time * 1000 << " , "
<< std::setw(7) << perf_best.cg_iteration_count << " , "
<< std::setw(3) << perf_best.newton_iteration_count << " , "
<< std::setw(10) << perf_best.error_max
<< std::endl ;
}
}
}
//----------------------------------------------------------------------------
} /* namespace Nonlinear */
} /* namespace HybridFEM */
#endif /* #ifndef HYBRIDFEM_IMPLICIT_HPP */

View File

@ -1,390 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#include <stdio.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <cstdlib>
#include <cmath>
#include <Kokkos_Core.hpp>
#include <HexElement.hpp>
#include <FEMesh.hpp>
namespace HybridFEM {
namespace Nonlinear {
template< class MeshType , typename ScalarType > struct ElementComputation ;
//----------------------------------------------------------------------------
template<>
struct ElementComputation< FEMesh< double , 27 , Kokkos::Cuda > , double >
{
typedef Kokkos::Cuda execution_space ;
static const unsigned ElementNodeCount = 27 ;
typedef HexElement_Data< ElementNodeCount > element_data_type ;
typedef FEMesh< double , ElementNodeCount , execution_space > mesh_type ;
static const unsigned SpatialDim = element_data_type::spatial_dimension ;
static const unsigned FunctionCount = element_data_type::function_count ;
static const unsigned IntegrationCount = element_data_type::integration_count ;
static const unsigned TensorDim = SpatialDim * SpatialDim ;
typedef Kokkos::View< double[][FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
typedef Kokkos::View< double[][FunctionCount] , execution_space > elem_vectors_type ;
typedef Kokkos::View< double[] , execution_space > value_vector_type ;
private:
const element_data_type elem_data ;
const typename mesh_type::elem_node_ids_type elem_node_ids ;
const typename mesh_type::node_coords_type node_coords ;
const value_vector_type nodal_values ;
const elem_matrices_type element_matrices ;
const elem_vectors_type element_vectors ;
const float coeff_K ;
const unsigned elem_count ;
unsigned invJacIndex[9][4] ;
static const unsigned j11 = 0 , j12 = 1 , j13 = 2 ,
j21 = 3 , j22 = 4 , j23 = 5 ,
j31 = 6 , j32 = 7 , j33 = 8 ;
// Can only handle up to 16 warps:
static const unsigned BlockDimX = 32 ;
static const unsigned BlockDimY = 7 ;
struct WorkSpace {
double sum[ BlockDimY ][ BlockDimX ];
double value_at_integ[ IntegrationCount ];
double gradx_at_integ[ IntegrationCount ];
double grady_at_integ[ IntegrationCount ];
double gradz_at_integ[ IntegrationCount ];
float spaceJac[ BlockDimY ][ 9 ];
float spaceInvJac[ BlockDimY ][ 9 ];
float detJweight[ IntegrationCount ];
float dpsidx[ FunctionCount ][ IntegrationCount ];
float dpsidy[ FunctionCount ][ IntegrationCount ];
float dpsidz[ FunctionCount ][ IntegrationCount ];
};
public:
ElementComputation ( const mesh_type & arg_mesh ,
const elem_matrices_type & arg_element_matrices ,
const elem_vectors_type & arg_element_vectors ,
const value_vector_type & arg_nodal_values ,
const float arg_coeff_K )
: elem_data()
, elem_node_ids( arg_mesh.elem_node_ids )
, node_coords( arg_mesh.node_coords )
, nodal_values( arg_nodal_values )
, element_matrices( arg_element_matrices )
, element_vectors( arg_element_vectors )
, coeff_K( arg_coeff_K )
, elem_count( arg_mesh.elem_node_ids.dimension_0() )
{
const unsigned jInvJ[9][4] =
{ { j22 , j33 , j23 , j32 } ,
{ j13 , j32 , j12 , j33 } ,
{ j12 , j23 , j13 , j22 } ,
{ j23 , j31 , j21 , j33 } ,
{ j11 , j33 , j13 , j31 } ,
{ j13 , j21 , j11 , j23 } ,
{ j21 , j32 , j22 , j31 } ,
{ j12 , j31 , j11 , j32 } ,
{ j11 , j22 , j12 , j21 } };
for ( unsigned i = 0 ; i < 9 ; ++i ) {
for ( unsigned j = 0 ; j < 4 ; ++j ) {
invJacIndex[i][j] = jInvJ[i][j] ;
}
}
const unsigned shmem = sizeof(WorkSpace);
const unsigned grid_max = 65535 ;
const unsigned grid_count = std::min( grid_max , elem_count );
// For compute capability 2.x up to 1024 threads per block
const dim3 block( BlockDimX , BlockDimY , 1 );
const dim3 grid( grid_count , 1 , 1 );
Kokkos::Impl::CudaParallelLaunch< ElementComputation >( *this , grid , block , shmem );
}
public:
//------------------------------------
// Sum among the threadIdx.x
template< typename Type >
__device__ inline static
void sum_x( Type & result , const double value )
{
extern __shared__ WorkSpace work_data[] ;
volatile double * const base_sum =
& work_data->sum[ threadIdx.y ][ threadIdx.x ] ;
base_sum[ 0] = value ;
if ( threadIdx.x < 16 ) {
base_sum[0] += base_sum[16];
base_sum[0] += base_sum[ 8];
base_sum[0] += base_sum[ 4];
base_sum[0] += base_sum[ 2];
base_sum[0] += base_sum[ 1];
}
if ( 0 == threadIdx.x ) {
result = base_sum[0] ;
}
}
__device__ inline static
void sum_x_clear()
{
extern __shared__ WorkSpace work_data[] ;
work_data->sum[ threadIdx.y ][ threadIdx.x ] = 0 ;
}
//------------------------------------
//------------------------------------
__device__ inline
void evaluateFunctions( const unsigned ielem ) const
{
extern __shared__ WorkSpace work_data[] ;
// Each warp (threadIdx.y) computes an integration point
// Each thread is responsible for a node / function.
const unsigned iFunc = threadIdx.x ;
const bool hasFunc = iFunc < FunctionCount ;
//------------------------------------
// Each warp gathers a different variable into 'elem_mat' shared memory.
if ( hasFunc ) {
const unsigned node = elem_node_ids( ielem , iFunc );
for ( unsigned iy = threadIdx.y ; iy < 4 ; iy += blockDim.y ) {
switch( iy ) {
case 0 : work_data->sum[0][iFunc] = node_coords(node,0); break ;
case 1 : work_data->sum[1][iFunc] = node_coords(node,1); break ;
case 2 : work_data->sum[2][iFunc] = node_coords(node,2); break ;
case 3 : work_data->sum[3][iFunc] = nodal_values(node); break ;
default: break ;
}
}
}
__syncthreads(); // Wait for all warps to finish gathering
// now get local 'const' copies in register space:
const double x = work_data->sum[0][ iFunc ];
const double y = work_data->sum[1][ iFunc ];
const double z = work_data->sum[2][ iFunc ];
const double dof_val = work_data->sum[3][ iFunc ];
__syncthreads(); // Wait for all warps to finish extracting
sum_x_clear(); // Make sure summation scratch is zero
//------------------------------------
// Each warp is now on its own computing an integration point
// so no further explicit synchronizations are required.
if ( hasFunc ) {
float * const J = work_data->spaceJac[ threadIdx.y ];
float * const invJ = work_data->spaceInvJac[ threadIdx.y ];
for ( unsigned iInt = threadIdx.y ;
iInt < IntegrationCount ; iInt += blockDim.y ) {
const float val = elem_data.values[iInt][iFunc] ;
const float gx = elem_data.gradients[iInt][0][iFunc] ;
const float gy = elem_data.gradients[iInt][1][iFunc] ;
const float gz = elem_data.gradients[iInt][2][iFunc] ;
sum_x( J[j11], gx * x );
sum_x( J[j12], gx * y );
sum_x( J[j13], gx * z );
sum_x( J[j21], gy * x );
sum_x( J[j22], gy * y );
sum_x( J[j23], gy * z );
sum_x( J[j31], gz * x );
sum_x( J[j32], gz * y );
sum_x( J[j33], gz * z );
// Inverse jacobian, only enough parallel work for 9 threads in the warp
if ( iFunc < TensorDim ) {
invJ[ iFunc ] =
J[ invJacIndex[iFunc][0] ] * J[ invJacIndex[iFunc][1] ] -
J[ invJacIndex[iFunc][2] ] * J[ invJacIndex[iFunc][3] ] ;
// Let all threads in the warp compute determinant into a register
const float detJ = J[j11] * invJ[j11] +
J[j21] * invJ[j12] +
J[j31] * invJ[j13] ;
invJ[ iFunc ] /= detJ ;
if ( 0 == iFunc ) {
work_data->detJweight[ iInt ] = detJ * elem_data.weights[ iInt ] ;
}
}
// Transform bases gradients and compute value and gradient
const float dx = gx * invJ[j11] + gy * invJ[j12] + gz * invJ[j13];
const float dy = gx * invJ[j21] + gy * invJ[j22] + gz * invJ[j23];
const float dz = gx * invJ[j31] + gy * invJ[j32] + gz * invJ[j33];
work_data->dpsidx[iFunc][iInt] = dx ;
work_data->dpsidy[iFunc][iInt] = dy ;
work_data->dpsidz[iFunc][iInt] = dz ;
sum_x( work_data->gradx_at_integ[iInt] , dof_val * dx );
sum_x( work_data->grady_at_integ[iInt] , dof_val * dy );
sum_x( work_data->gradz_at_integ[iInt] , dof_val * dz );
sum_x( work_data->value_at_integ[iInt] , dof_val * val );
}
}
__syncthreads(); // All shared data must be populated at return.
}
__device__ inline
void contributeResidualJacobian( const unsigned ielem ) const
{
extern __shared__ WorkSpace work_data[] ;
sum_x_clear(); // Make sure summation scratch is zero
// $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
// $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
const unsigned iInt = threadIdx.x ;
if ( iInt < IntegrationCount ) {
const double value_at_integ = work_data->value_at_integ[ iInt ] ;
const double gradx_at_integ = work_data->gradx_at_integ[ iInt ] ;
const double grady_at_integ = work_data->grady_at_integ[ iInt ] ;
const double gradz_at_integ = work_data->gradz_at_integ[ iInt ] ;
const float detJweight = work_data->detJweight[ iInt ] ;
const float coeff_K_detJweight = coeff_K * detJweight ;
for ( unsigned iRow = threadIdx.y ;
iRow < FunctionCount ; iRow += blockDim.y ) {
const float value_row = elem_data.values[ iInt ][ iRow ] * detJweight ;
const float dpsidx_row = work_data->dpsidx[ iRow ][ iInt ] * coeff_K_detJweight ;
const float dpsidy_row = work_data->dpsidy[ iRow ][ iInt ] * coeff_K_detJweight ;
const float dpsidz_row = work_data->dpsidz[ iRow ][ iInt ] * coeff_K_detJweight ;
const double res_del = dpsidx_row * gradx_at_integ +
dpsidy_row * grady_at_integ +
dpsidz_row * gradz_at_integ ;
const double res_val = value_at_integ * value_at_integ * value_row ;
const double jac_val_row = 2 * value_at_integ * value_row ;
sum_x( element_vectors( ielem , iRow ) , res_del + res_val );
for ( unsigned iCol = 0 ; iCol < FunctionCount ; ++iCol ) {
const float jac_del =
dpsidx_row * work_data->dpsidx[iCol][iInt] +
dpsidy_row * work_data->dpsidy[iCol][iInt] +
dpsidz_row * work_data->dpsidz[iCol][iInt] ;
const double jac_val =
jac_val_row * elem_data.values[ iInt ][ iCol ] ;
sum_x( element_matrices( ielem , iRow , iCol ) , jac_del + jac_val );
}
}
}
__syncthreads(); // All warps finish before refilling shared data
}
__device__ inline
void operator()(void) const
{
extern __shared__ WorkSpace work_data[] ;
for ( unsigned ielem = blockIdx.x ; ielem < elem_count ; ielem += gridDim.x ) {
evaluateFunctions( ielem );
contributeResidualJacobian( ielem );
}
}
}; /* ElementComputation */
} /* namespace Nonlinear */
} /* namespace HybridFEM */

View File

@ -1,482 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_NONLINEARFUNCTORS_HPP
#define KOKKOS_NONLINEARFUNCTORS_HPP
#include <iostream>
#include <fstream>
#include <iomanip>
#include <cstdlib>
#include <cmath>
namespace HybridFEM {
namespace Nonlinear {
template< class MeshType , typename ScalarType > struct ElementComputation ;
template< class MeshType , typename ScalarType > struct DirichletSolution ;
template< class MeshType , typename ScalarType > struct DirichletResidual ;
}
}
/* A Cuda-specific specialization for the element computation functor. */
#if defined( __CUDACC__ )
#include <NonlinearElement_Cuda.hpp>
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace HybridFEM {
namespace Nonlinear {
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
typename ScalarType >
struct ElementComputation<
FEMesh< ScalarCoordType , ElemNode , DeviceType > , ScalarType >
{
typedef DeviceType execution_space;
typedef ScalarType scalar_type ;
static const unsigned ElementNodeCount = ElemNode ;
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
typedef HexElement_Data< ElementNodeCount > element_data_type ;
static const unsigned SpatialDim = element_data_type::spatial_dimension ;
static const unsigned FunctionCount = element_data_type::function_count ;
static const unsigned IntegrationCount = element_data_type::integration_count ;
static const unsigned TensorDim = SpatialDim * SpatialDim ;
typedef Kokkos::View< scalar_type[][FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
typedef Kokkos::View< scalar_type[][FunctionCount] , execution_space > elem_vectors_type ;
typedef Kokkos::View< scalar_type[] , execution_space > value_vector_type ;
private:
const element_data_type elem_data ;
typename mesh_type::elem_node_ids_type elem_node_ids ;
typename mesh_type::node_coords_type node_coords ;
value_vector_type nodal_values ;
elem_matrices_type element_matrices ;
elem_vectors_type element_vectors ;
scalar_type coeff_K ;
public:
ElementComputation( const mesh_type & arg_mesh ,
const elem_matrices_type & arg_element_matrices ,
const elem_vectors_type & arg_element_vectors ,
const value_vector_type & arg_nodal_values ,
const scalar_type arg_coeff_K )
: elem_data()
, elem_node_ids( arg_mesh.elem_node_ids )
, node_coords( arg_mesh.node_coords )
, nodal_values( arg_nodal_values )
, element_matrices( arg_element_matrices )
, element_vectors( arg_element_vectors )
, coeff_K( arg_coeff_K )
{
const size_t elem_count = arg_mesh.elem_node_ids.dimension_0();
parallel_for( elem_count , *this );
}
//------------------------------------
static const unsigned FLOPS_transform_gradients =
/* Jacobian */ FunctionCount * TensorDim * 2 +
/* Inverse jacobian */ TensorDim * 6 + 6 +
/* Gradient transform */ FunctionCount * 15 ;
KOKKOS_INLINE_FUNCTION
float transform_gradients(
const float grad[][ FunctionCount ] , // Gradient of bases master element
const double x[] ,
const double y[] ,
const double z[] ,
float dpsidx[] ,
float dpsidy[] ,
float dpsidz[] ) const
{
enum { j11 = 0 , j12 = 1 , j13 = 2 ,
j21 = 3 , j22 = 4 , j23 = 5 ,
j31 = 6 , j32 = 7 , j33 = 8 };
// Jacobian accumulation:
double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
for( unsigned i = 0; i < FunctionCount ; ++i ) {
const double x1 = x[i] ;
const double x2 = y[i] ;
const double x3 = z[i] ;
const float g1 = grad[0][i] ;
const float g2 = grad[1][i] ;
const float g3 = grad[2][i] ;
J[j11] += g1 * x1 ;
J[j12] += g1 * x2 ;
J[j13] += g1 * x3 ;
J[j21] += g2 * x1 ;
J[j22] += g2 * x2 ;
J[j23] += g2 * x3 ;
J[j31] += g3 * x1 ;
J[j32] += g3 * x2 ;
J[j33] += g3 * x3 ;
}
// Inverse jacobian:
float invJ[ TensorDim ] = {
static_cast<float>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
static_cast<float>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
static_cast<float>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
static_cast<float>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
static_cast<float>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
static_cast<float>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
static_cast<float>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
static_cast<float>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
static_cast<float>( J[j11] * J[j22] - J[j12] * J[j21] ) };
const float detJ = J[j11] * invJ[j11] +
J[j21] * invJ[j12] +
J[j31] * invJ[j13] ;
const float detJinv = 1.0 / detJ ;
for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
// Transform gradients:
for( unsigned i = 0; i < FunctionCount ; ++i ) {
const float g0 = grad[0][i];
const float g1 = grad[1][i];
const float g2 = grad[2][i];
dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
}
return detJ ;
}
KOKKOS_INLINE_FUNCTION
void contributeResidualJacobian(
const float coeff_k ,
const double dof_values[] ,
const float dpsidx[] ,
const float dpsidy[] ,
const float dpsidz[] ,
const float detJ ,
const float integ_weight ,
const float bases_vals[] ,
double elem_res[] ,
double elem_mat[][ FunctionCount ] ) const
{
double value_at_pt = 0 ;
double gradx_at_pt = 0 ;
double grady_at_pt = 0 ;
double gradz_at_pt = 0 ;
for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
value_at_pt += dof_values[m] * bases_vals[m] ;
gradx_at_pt += dof_values[m] * dpsidx[m] ;
grady_at_pt += dof_values[m] * dpsidy[m] ;
gradz_at_pt += dof_values[m] * dpsidz[m] ;
}
const scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
const double res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
const double mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
// $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
// $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
for ( unsigned m = 0; m < FunctionCount; m++) {
double * const mat = elem_mat[m] ;
const float bases_val_m = bases_vals[m];
const float dpsidx_m = dpsidx[m] ;
const float dpsidy_m = dpsidy[m] ;
const float dpsidz_m = dpsidz[m] ;
elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
dpsidy_m * grady_at_pt +
dpsidz_m * gradz_at_pt ) +
res_val * bases_val_m ;
for( unsigned n = 0; n < FunctionCount; n++) {
mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
dpsidy_m * dpsidy[n] +
dpsidz_m * dpsidz[n] ) +
mat_val * bases_val_m * bases_vals[n];
}
}
}
KOKKOS_INLINE_FUNCTION
void operator()( const unsigned ielem ) const
{
// Gather nodal coordinates and solution vector:
double x[ FunctionCount ] ;
double y[ FunctionCount ] ;
double z[ FunctionCount ] ;
double val[ FunctionCount ] ;
for ( unsigned i = 0 ; i < ElementNodeCount ; ++i ) {
const unsigned node_index = elem_node_ids( ielem , i );
x[i] = node_coords( node_index , 0 );
y[i] = node_coords( node_index , 1 );
z[i] = node_coords( node_index , 2 );
val[i] = nodal_values( node_index );
}
double elem_vec[ FunctionCount ] ;
double elem_mat[ FunctionCount ][ FunctionCount ] ;
for( unsigned i = 0; i < FunctionCount ; i++ ) {
elem_vec[i] = 0 ;
for( unsigned j = 0; j < FunctionCount ; j++){
elem_mat[i][j] = 0 ;
}
}
for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
float dpsidx[ FunctionCount ] ;
float dpsidy[ FunctionCount ] ;
float dpsidz[ FunctionCount ] ;
const float detJ =
transform_gradients( elem_data.gradients[i] , x , y , z ,
dpsidx , dpsidy , dpsidz );
contributeResidualJacobian( coeff_K ,
val , dpsidx , dpsidy , dpsidz ,
detJ ,
elem_data.weights[i] ,
elem_data.values[i] ,
elem_vec , elem_mat );
}
for( unsigned i = 0; i < FunctionCount ; i++){
element_vectors(ielem, i) = elem_vec[i] ;
for( unsigned j = 0; j < FunctionCount ; j++){
element_matrices(ielem, i, j) = elem_mat[i][j] ;
}
}
}
}; /* ElementComputation */
//----------------------------------------------------------------------------
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
typename ScalarType >
struct DirichletSolution<
FEMesh< ScalarCoordType , ElemNode , DeviceType > ,
ScalarType >
{
typedef DeviceType execution_space;
static const unsigned ElementNodeCount = ElemNode ;
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
typename mesh_type::node_coords_type node_coords ;
vector_type solution ;
ScalarCoordType bc_lower_z ;
ScalarCoordType bc_upper_z ;
ScalarType bc_lower_value ;
ScalarType bc_upper_value ;
KOKKOS_INLINE_FUNCTION
void operator()( const unsigned inode ) const
{
// Apply dirichlet boundary condition on the Solution vector.
// Define boundary node values to be either bc_lower_value or
// bc_upper_value, depending on which boundary face they lie on.
// Non-boundary terms will be left at their previous value.
const ScalarCoordType z = node_coords(inode,2);
const bool bc_lower = z <= bc_lower_z ;
const bool bc_upper = bc_upper_z <= z ;
if ( bc_lower || bc_upper ) {
const ScalarType bc_value = bc_lower ? bc_lower_value
: bc_upper_value ;
solution(inode) = bc_value ; // set the solution vector
}
}
static void apply( const vector_type & solution ,
const mesh_type & mesh ,
const ScalarCoordType bc_lower_z ,
const ScalarCoordType bc_upper_z ,
const ScalarType bc_lower_value ,
const ScalarType bc_upper_value )
{
DirichletSolution op ;
op.node_coords = mesh.node_coords ;
op.solution = solution ;
op.bc_lower_z = bc_lower_z ;
op.bc_upper_z = bc_upper_z ;
op.bc_lower_value = bc_lower_value ;
op.bc_upper_value = bc_upper_value ;
parallel_for( solution.dimension_0() , op );
}
};
//----------------------------------------------------------------------------
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
typename ScalarType >
struct DirichletResidual<
FEMesh< ScalarCoordType , ElemNode , DeviceType > , ScalarType >
{
typedef DeviceType execution_space;
typedef typename execution_space::size_type size_type ;
static const unsigned ElementNodeCount = ElemNode ;
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
typename mesh_type::node_coords_type node_coords ;
matrix_type matrix ;
vector_type rhs ;
ScalarCoordType bc_lower_z ;
ScalarCoordType bc_upper_z ;
KOKKOS_INLINE_FUNCTION
void operator()( const unsigned inode ) const
{
// Apply a dirichlet boundary condition to 'irow'
// to maintain the symmetry of the original
// global stiffness matrix, zero out the columns
// that correspond to boundary conditions, and
// adjust the load vector accordingly
const size_type iBeg = matrix.graph.row_map[inode];
const size_type iEnd = matrix.graph.row_map[inode+1];
const ScalarCoordType z = node_coords(inode,2);
const bool bc_lower = z <= bc_lower_z ;
const bool bc_upper = bc_upper_z <= z ;
if ( bc_lower || bc_upper ) {
rhs(inode) = 0 ; // set the residual vector
// zero each value on the row, and leave a one
// on the diagonal
for( size_type i = iBeg ; i < iEnd ; i++) {
matrix.coefficients(i) =
(int) inode == matrix.graph.entries(i) ? 1 : 0 ;
}
}
else {
// Find any columns that are boundary conditions.
// Clear them and adjust the load vector
for( size_type i = iBeg ; i < iEnd ; i++ ) {
const size_type cnode = matrix.graph.entries(i) ;
const ScalarCoordType zc = node_coords(cnode,2);
const bool c_bc_lower = zc <= bc_lower_z ;
const bool c_bc_upper = bc_upper_z <= zc ;
if ( c_bc_lower || c_bc_upper ) {
matrix.coefficients(i) = 0 ;
}
}
}
}
static void apply( const matrix_type & linsys_matrix ,
const vector_type & linsys_rhs ,
const mesh_type & mesh ,
const ScalarCoordType bc_lower_z ,
const ScalarCoordType bc_upper_z)
{
const size_t row_count = linsys_matrix.graph.row_map.dimension_0() - 1 ;
DirichletResidual op ;
op.node_coords = mesh.node_coords ;
op.matrix = linsys_matrix ;
op.rhs = linsys_rhs ;
op.bc_lower_z = bc_lower_z ;
op.bc_upper_z = bc_upper_z ;
parallel_for( row_count , op );
}
};
//----------------------------------------------------------------------------
} /* namespace Nonlinear */
} /* namespace HybridFEM */
#endif /* #ifndef KOKKOS_NONLINEARFUNCTORS_HPP */

View File

@ -1,167 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef PARALLELCOMM_HPP
#define PARALLELCOMM_HPP
//------------------------------------------------------------------------
#include <Kokkos_Macros.hpp>
//------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_MPI )
#include <mpi.h>
#include <string>
namespace comm {
struct Machine {
MPI_Comm mpi_comm ;
Machine() : mpi_comm( MPI_COMM_NULL ) {}
Machine( const Machine & rhs )
: mpi_comm( rhs.mpi_comm ) {}
Machine( MPI_Comm c ) : mpi_comm( c ) {}
static Machine init( int * argc , char *** argv )
{
MPI_Init( argc , argv );
return Machine( MPI_COMM_WORLD );
}
static void finalize() { MPI_Finalize(); }
};
inline
unsigned size( Machine machine )
{
int np ; MPI_Comm_size( machine.mpi_comm , & np ); return np ;
}
inline
unsigned rank( Machine machine )
{
int ip ; MPI_Comm_rank( machine.mpi_comm , & ip ); return ip ;
}
inline
double max( Machine machine , double local )
{
double global = 0;
MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , machine.mpi_comm );
return global ;
}
inline
std::string command_line( Machine machine , const int argc , const char * const * const argv )
{
std::string argline ;
if ( 0 == rank( machine ) ) {
for ( int i = 1 ; i < argc ; ++i ) {
argline.append(" ").append( argv[i] );
}
}
int length = argline.length();
MPI_Bcast( & length , 1 , MPI_INT , 0 , machine.mpi_comm );
argline.resize( length , ' ' );
MPI_Bcast( (void*) argline.data() , length , MPI_CHAR , 0 , machine.mpi_comm );
return argline ;
}
}
#else /* ! defined( KOKKOS_HAVE_MPI ) */
#include <string>
namespace comm {
// Stub for non-parallel
struct Machine {
static Machine init( int * , char *** )
{ return Machine(); }
static void finalize() {}
};
inline
unsigned size( Machine ) { return 1 ; }
inline
unsigned rank( Machine ) { return 0 ; }
inline
double max( Machine , double local )
{ return local ; }
inline
std::string command_line( Machine machine , const int argc , const char * const * const argv )
{
std::string argline ;
if ( 0 == rank( machine ) ) {
for ( int i = 1 ; i < argc ; ++i ) {
argline.append(" ").append( argv[i] );
}
}
return argline ;
}
}
#endif /* ! defined( KOKKOS_HAVE_MPI ) */
//------------------------------------------------------------------------
#endif /* #ifndef PARALLELCOMM_HPP */

View File

@ -1,517 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_PARALLELDATAMAP_HPP
#define KOKKOS_PARALLELDATAMAP_HPP
#include <utility>
#include <limits>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <Kokkos_Core.hpp>
#include <ParallelComm.hpp>
namespace Kokkos {
//----------------------------------------------------------------------------
/** \brief Parallel distributed data mapping
*
* ordering { interior : { owned items not sent elsewhere }
* send : { owned items sent }
* receive : { not-owned items received } }
*
* recv { { N ghosted items from process P : ( P , N ) } }
*
* send { { N send items to process P : ( P , N ) } }
*
* send_item { send item offsets within 'send' range }
*/
struct ParallelDataMap {
typedef View< unsigned*[2], HostSpace > host_recv_type ;
typedef View< unsigned*[2], HostSpace > host_send_type ;
typedef View< unsigned* , HostSpace > host_send_item_type ;
comm::Machine machine ;
host_recv_type host_recv ;
host_send_type host_send ;
host_send_item_type host_send_item ;
unsigned count_interior ;
unsigned count_send ;
unsigned count_owned ; // = count_interior + count_send
unsigned count_receive ;
void assign( const unsigned arg_count_interior ,
const unsigned arg_count_owned ,
const unsigned arg_count_total ,
const unsigned arg_recv_msg ,
const unsigned arg_send_msg ,
const unsigned arg_send_count )
{
const std::string label("Kokkos::ParallelDataMap buffer");
count_interior = arg_count_interior ;
count_owned = arg_count_owned ;
count_send = arg_count_owned - arg_count_interior ;
count_receive = arg_count_total - arg_count_owned ;
host_recv = host_recv_type( label , arg_recv_msg );
host_send = host_send_type( label , arg_send_msg );
host_send_item = host_send_item_type( label , arg_send_count );
}
};
//----------------------------------------------------------------------------
//PackArray
//----------------------------------------------------------------------------
template< class ArrayType , class Rank = void >
struct PackArray ;
template< typename DeviceType, typename ValueType >
struct PackArray< View< ValueType* , DeviceType > , void >
{
typedef DeviceType execution_space ;
typedef typename DeviceType::size_type size_type ;
typedef View< ValueType* , execution_space > array_type ;
typedef View< ValueType* , execution_space > buffer_type ;
private:
buffer_type output ;
array_type input ;
size_type base ;
public:
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i ) const
{ output[i] = input(base+i); }
inline
static
void pack( const buffer_type & arg_output ,
const size_type arg_begin ,
const size_type arg_count ,
const array_type & arg_input )
{
PackArray op ;
op.output = arg_output ;
op.input = arg_input ;
op.base = arg_begin ;
parallel_for( arg_count , op );
}
};
template< typename DeviceType, typename ValueType , unsigned N1 >
struct PackArray< View< ValueType*[N1] , DeviceType > , void >
{
typedef DeviceType execution_space ;
typedef typename DeviceType::size_type size_type ;
typedef View< ValueType*[N1] , execution_space > array_type ;
typedef View< ValueType* , execution_space > buffer_type ;
private:
buffer_type output ;
array_type input ;
size_type base ;
public:
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i ) const
{
for ( size_type j = 0 , k = i * N1 ; j < N1 ; ++j , ++k ) {
output[k] = input(base+i,j);
}
}
inline static
void pack( const buffer_type & arg_output ,
const size_type arg_begin ,
const size_type arg_count ,
const array_type & arg_input )
{
if ( arg_count ) {
PackArray op ;
op.output = arg_output ;
op.input = arg_input ;
op.base = arg_begin ;
parallel_for( arg_count , op );
}
}
};
//----------------------------------------------------------------------------
//UnpackArray
//----------------------------------------------------------------------------
template< class ArrayType , class Rank = void > struct UnpackArray ;
template< typename DeviceType, typename ValueType >
struct UnpackArray< View< ValueType* , DeviceType > , void >
{
typedef DeviceType execution_space ;
typedef typename DeviceType::size_type size_type ;
typedef View< ValueType* , execution_space > array_type ;
typedef View< ValueType* , execution_space > buffer_type ;
private:
array_type output ;
buffer_type input ;
size_type base ;
public:
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i ) const
{ output(base+i) = input[i]; }
inline
static
void unpack( const array_type & arg_output ,
const buffer_type & arg_input ,
const size_type arg_begin ,
const size_type arg_count )
{
UnpackArray op ;
op.output = arg_output ;
op.input = arg_input ;
op.base = arg_begin ;
parallel_for( arg_count , op );
}
};
template< typename DeviceType, typename ValueType , unsigned N1 >
struct UnpackArray< View< ValueType*[N1] , DeviceType > , void >
{
typedef DeviceType execution_space ;
typedef typename DeviceType::size_type size_type ;
typedef View< ValueType* , execution_space > buffer_type ;
typedef View< ValueType*[N1] , execution_space > array_type ;
private:
array_type output ;
buffer_type input ;
size_type base ;
public:
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i ) const
{
for ( size_type j = 0 , k = i * N1 ; j < N1 ; ++j , ++k ) {
output(base+i,j) = input(k);
}
}
inline
static
void unpack( const array_type & arg_output ,
const buffer_type & arg_input ,
const size_type arg_begin ,
const size_type arg_count )
{
if ( arg_count ) {
UnpackArray op ;
op.output = arg_output ;
op.input = arg_input ;
op.base = arg_begin ;
parallel_for( arg_count , op );
}
}
};
//----------------------------------------------------------------------------
template< class ValueType , class Device , class DataMap >
class AsyncExchange ;
} // namespace Kokkos
//----------------------------------------------------------------------------
// Application call procedure:
//
// construct: AsyncExchange object
// * pack send buffer on device
// initiate: copy send buffer from device to host
// * dispatch asynchronous local work
// complete: send/receive on host, copy receive buffer to device
// * unpack receive buffer on device
// destroy: AsyncExchange object
//
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_MPI
namespace Kokkos {
template< class ValueType , class Device >
class AsyncExchange< ValueType, Device , Kokkos::ParallelDataMap > {
public:
typedef Device execution_space ;
typedef Kokkos::ParallelDataMap data_map_type ;
typedef Kokkos::View< ValueType* , execution_space > buffer_dev_type ;
typedef typename buffer_dev_type::HostMirror buffer_host_type ;
private:
static const int mpi_tag = 11 ;
const data_map_type data_map ;
unsigned chunk_size ;
unsigned send_count_max ;
buffer_host_type host_recv_buffer ;
buffer_host_type host_send_buffer ;
buffer_host_type send_msg_buffer ;
buffer_dev_type dev_buffer ;
buffer_dev_type dev_send_buffer ; // Subview for send
buffer_dev_type dev_recv_buffer ; // Subview for receive
std::vector< MPI_Request > recv_request ;
public:
const buffer_dev_type & buffer() const { return dev_buffer ; }
AsyncExchange( const data_map_type & arg_data_map ,
const size_t arg_chunk )
: data_map( arg_data_map )
, chunk_size( arg_chunk )
, send_count_max( 0 )
, host_recv_buffer()
, host_send_buffer()
, send_msg_buffer()
, dev_buffer()
, dev_send_buffer()
, dev_recv_buffer()
, recv_request()
{
const size_t send_msg_count = arg_data_map.host_send.dimension_0();
const size_t recv_msg_count = arg_data_map.host_recv.dimension_0();
const size_t send_msg_length = arg_chunk * arg_data_map.count_send ;
const size_t recv_msg_length = arg_chunk * arg_data_map.count_receive ;
for ( size_t i = 0 ; i < send_msg_count ; ++i ) {
send_count_max = std::max( send_count_max ,
(unsigned) arg_data_map.host_send(i,1) );
}
// A single shared buffer on the device can be used for
// send and receive message buffers.
dev_buffer = buffer_dev_type(
std::string("AsyncExchange dev_buffer") ,
std::max( send_msg_length , recv_msg_length ) );
// Total send subview of the device buffer
dev_send_buffer =
Kokkos::subview( dev_buffer , std::pair<size_t,size_t>( 0 , send_msg_length ) );
// Total receive subview of the device buffer
dev_recv_buffer =
Kokkos::subview( dev_buffer , std::pair<size_t,size_t>( 0 , recv_msg_length ) );
// Total receive message buffer on the host:
host_recv_buffer = buffer_host_type(
std::string("AsyncExchange host_recv_buffer") ,
recv_msg_length );
// Total send message buffer on the host:
host_send_buffer = buffer_host_type(
std::string("AsyncExchange host_send_buffer") ,
send_msg_length );
// Individual send message buffer on the host:
send_msg_buffer = buffer_host_type(
std::string("AsyncExchange send_msg_buffer") ,
arg_chunk * send_count_max );
// MPI asynchronous receive request handles:
recv_request.assign( recv_msg_count , MPI_REQUEST_NULL );
}
//------------------------------------------------------------------------
void setup()
{
{ // Post receives:
const size_t recv_msg_count = data_map.host_recv.dimension_0();
ValueType * ptr = host_recv_buffer.ptr_on_device();
for ( size_t i = 0 ; i < recv_msg_count ; ++i ) {
const int proc = data_map.host_recv(i,0);
const int count = data_map.host_recv(i,1) * chunk_size ;
MPI_Irecv( ptr , count * sizeof(ValueType) , MPI_BYTE ,
proc , mpi_tag , data_map.machine.mpi_comm ,
& recv_request[i] );
ptr += count ;
}
}
// Copy send buffer from the device to host memory for sending
Kokkos::deep_copy( host_send_buffer , dev_send_buffer );
// Done with the device until communication is complete.
// Application can dispatch asynchronous work on the device.
}
// Application can dispatch local work to device ...
// No communication progress until main thread calls 'send_receive'
void send_receive()
{
const size_t recv_msg_count = data_map.host_recv.dimension_0();
const size_t send_msg_count = data_map.host_send.dimension_0();
// Pack and send:
for ( size_t i = 0 , j = 0 ; i < send_msg_count ; ++i ) {
const int proc = data_map.host_send(i,0);
const int count = data_map.host_send(i,1);
for ( int k = 0 , km = 0 ; k < count ; ++k , ++j ) {
const int km_end = km + chunk_size ;
int ki = chunk_size * data_map.host_send_item(j);
for ( ; km < km_end ; ++km , ++ki ) {
send_msg_buffer[km] = host_send_buffer[ki];
}
}
// MPI_Ssend blocks until
// (1) a receive is matched for the message and
// (2) the send buffer can be re-used.
//
// It is suggested that MPI_Ssend will have the best performance:
// http://www.mcs.anl.gov/research/projects/mpi/sendmode.html .
MPI_Ssend( send_msg_buffer.ptr_on_device(),
count * chunk_size * sizeof(ValueType) , MPI_BYTE ,
proc , mpi_tag , data_map.machine.mpi_comm );
}
// Wait for receives and verify:
for ( size_t i = 0 ; i < recv_msg_count ; ++i ) {
MPI_Status recv_status ;
int recv_which = 0 ;
int recv_size = 0 ;
MPI_Waitany( recv_msg_count , & recv_request[0] ,
& recv_which , & recv_status );
const int recv_proc = recv_status.MPI_SOURCE ;
MPI_Get_count( & recv_status , MPI_BYTE , & recv_size );
// Verify message properly received:
const int expected_proc = data_map.host_recv(recv_which,0);
const int expected_size = data_map.host_recv(recv_which,1) *
chunk_size * sizeof(ValueType);
if ( ( expected_proc != recv_proc ) ||
( expected_size != recv_size ) ) {
std::ostringstream msg ;
msg << "AsyncExchange error:"
<< " P" << comm::rank( data_map.machine )
<< " received from P" << recv_proc
<< " size " << recv_size
<< " expected " << expected_size
<< " from P" << expected_proc ;
throw std::runtime_error( msg.str() );
}
}
// Copy received data to device memory.
Kokkos::deep_copy( dev_recv_buffer , host_recv_buffer );
}
};
} // namespace Kokkos
#else /* ! #ifdef KOKKOS_HAVE_MPI */
namespace Kokkos {
template< class ValueType , class Device >
class AsyncExchange< ValueType, Device , Kokkos::ParallelDataMap > {
public:
typedef Device execution_space ;
typedef Kokkos::ParallelDataMap data_map_type ;
typedef Kokkos::View< ValueType* , execution_space > buffer_dev_type ;
typedef typename buffer_dev_type::HostMirror buffer_host_type ;
buffer_dev_type dev_buffer ;
public:
const buffer_dev_type & buffer() const { return dev_buffer ; }
AsyncExchange( const data_map_type & , const size_t )
: dev_buffer()
{ }
//------------------------------------------------------------------------
void setup() { }
void send_receive() { }
};
} // namespace Kokkos
#endif /* ! #ifdef KOKKOS_HAVE_MPI */
//----------------------------------------------------------------------------
#endif /* #ifndef KOKKOS_PARALLELDATAMAP_HPP */

View File

@ -1,178 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#if 0
#include <stdlib.h>
#include <string.h>
#include <ParallelMachine.hpp>
#include <Kokkos_Core.hpp>
#if ! defined( KOKKOS_HAVE_MPI )
#define MPI_COMM_NULL 0
#endif
//------------------------------------------------------------------------
namespace Parallel {
Machine::Machine( int * argc , char *** argv )
: m_mpi_comm( MPI_COMM_NULL )
, m_mpi_size(0)
, m_mpi_rank(0)
, m_mpi_gpu(0)
{
#if defined( KOKKOS_HAVE_CUDA )
//------------------------------------
// Might be using a Cuda aware version of MPI.
// Must select Cuda device before initializing MPI.
{
int i = 1 ;
for ( ; i < *argc && strcmp((*argv)[i],"mpi_cuda") ; ++i );
if ( i < *argc ) {
// Determine, if possible, what will be the node-local
// rank of the MPI process once MPI has been initialized.
// This rank is needed to set the Cuda device before 'mvapich'
// is initialized.
const char * const mvapich_local_rank = getenv("MV2_COMM_WORLD_LOCAL_RANK");
const char * const slurm_local_rank = getenv("SLURM_LOCALID");
const int pre_mpi_local_rank =
0 != mvapich_local_rank ? atoi( mvapich_local_rank ) : (
0 != slurm_local_rank ? atoi( slurm_local_rank ) : (
-1 ) );
if ( 0 <= pre_mpi_local_rank ) {
const int ngpu = Kokkos::Cuda::detect_device_count();
const int cuda_device_rank = pre_mpi_local_rank % ngpu ;
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cuda_device_rank ) );
m_mpi_gpu = 1 ;
}
}
}
#endif
//------------------------------------
#if defined( KOKKOS_HAVE_MPI )
MPI_Init( argc , argv );
m_mpi_comm = MPI_COMM_WORLD ;
MPI_Comm_size( m_mpi_comm , & m_mpi_size );
MPI_Comm_rank( m_mpi_comm , & m_mpi_rank );
#endif
// Query hwloc after MPI initialization to allow MPI binding:
//------------------------------------
// Request to use host device:
{
int i = 1 ;
for ( ; i < *argc && strcmp((*argv)[i],"host") ; ++i );
if ( i < *argc ) {
unsigned team_count = Kokkos::hwloc::get_available_numa_count();
unsigned threads_per_team = Kokkos::hwloc::get_available_cores_per_numa() *
Kokkos::hwloc::get_available_threads_per_core();
if ( i + 2 < *argc ) {
team_count = atoi( (*argv)[i+1] );
threads_per_team = atoi( (*argv)[i+2] );
}
Kokkos::Threads::initialize( team_count * threads_per_team );
}
}
#if defined( KOKKOS_HAVE_CUDA )
//------------------------------------
// Request to use Cuda device and not already initialized.
if ( ! m_mpi_gpu ) {
int i = 1 ;
for ( ; i < *argc && strcmp((*argv)[i],"mpi_cuda") && strcmp((*argv)[i],"cuda") ; ++i );
if ( i < *argc ) {
const int ngpu = Kokkos::Cuda::detect_device_count();
const int cuda_device_rank = m_mpi_rank % ngpu ;
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cuda_device_rank ) );
}
}
#endif
}
Machine::~Machine()
{
Kokkos::Threads::finalize();
#if defined( KOKKOS_HAVE_CUDA )
Kokkos::Cuda::finalize();
#endif
#if defined( KOKKOS_HAVE_MPI )
MPI_Finalize();
#endif
}
void Machine::print_configuration( std::ostream & msg ) const
{
msg << "MPI [ " << m_mpi_rank << " / " << m_mpi_size << " ]" << std::endl ;
Kokkos::Threads::print_configuration( msg );
#if defined( KOKKOS_HAVE_CUDA )
Kokkos::Cuda::print_configuration( msg );
#endif
}
}
#endif /* #if 0 */

View File

@ -1,118 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#error "ParallelMachine"
#ifndef PARALLELMACHINE_HPP
#define PARALLELMACHINE_HPP
//------------------------------------------------------------------------
#include <iosfwd>
#include <Kokkos_Core.hpp>
//------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_MPI )
#include <mpi.h>
#else
typedef int MPI_Comm ;
#endif
//------------------------------------------------------------------------
//------------------------------------------------------------------------
namespace Parallel {
/** \brief Hybrid parallel machine with MPI+Kokkos::Threads or MPI+Kokkos::Cuda.
*
* Initialization of MPI and Kokkos device has interdependencies which this
* class manages. The command line and environment variables are queried to initialize
* the Threads or Cuda device:
*
* 1) cuda : initializes Cuda device
* 2) host : initializes Threads device with all hwloc detected cores.
* 3) host #gang #worker : initializes Threads with specified
*/
class Machine {
private:
MPI_Comm m_mpi_comm ;
int m_mpi_size ;
int m_mpi_rank ;
unsigned m_mpi_gpu ;
unsigned m_gpu_arch ;
Machine();
Machine( const Machine & );
Machine & operator = ( const Machine & );
public:
/** \brief Coordinated initialize MPI, Cuda, or Threads devices from 'main'. */
Machine( int * argc , char *** argv );
~Machine();
MPI_Comm mpi_comm() const { return m_mpi_comm ; }
int mpi_size() const { return m_mpi_size ; }
int mpi_rank() const { return m_mpi_rank ; }
/** \brief If using MPI that can directly operate on GPU memory */
bool mpi_gpu() const { return m_mpi_gpu ; }
/** \brief If using GPU then what architecture */
unsigned gpu_arch() const { return m_gpu_arch ; }
void print_configuration( std::ostream & ) const ;
};
}
//------------------------------------------------------------------------
#endif /* #ifndef PARALLELMACHINE_HPP */

View File

@ -1,400 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef SPARSELINEARSYSTEM_HPP
#define SPARSELINEARSYSTEM_HPP
#include <cmath>
#include <impl/Kokkos_Timer.hpp>
#include <Kokkos_Core.hpp>
#include <Kokkos_StaticCrsGraph.hpp>
#include <LinAlgBLAS.hpp>
namespace Kokkos {
template< typename ScalarType , class Device >
struct CrsMatrix {
typedef Device execution_space ;
typedef ScalarType value_type ;
typedef StaticCrsGraph< int , execution_space , void , int > graph_type ;
typedef View< value_type* , execution_space > coefficients_type ;
graph_type graph ;
coefficients_type coefficients ;
};
//----------------------------------------------------------------------------
namespace Impl {
template< class Matrix , class OutputVector , class InputVector >
struct Multiply ;
}
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template< typename AScalarType ,
typename VScalarType ,
class DeviceType >
struct Multiply< CrsMatrix<AScalarType,DeviceType> ,
View<VScalarType*,DeviceType > ,
View<VScalarType*,DeviceType > >
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
typedef View< VScalarType*, execution_space, MemoryUnmanaged > vector_type ;
typedef View< const VScalarType*, execution_space, MemoryUnmanaged > vector_const_type ;
typedef CrsMatrix< AScalarType , execution_space > matrix_type ;
private:
matrix_type m_A ;
vector_const_type m_x ;
vector_type m_y ;
public:
//--------------------------------------------------------------------------
KOKKOS_INLINE_FUNCTION
void operator()( const size_type iRow ) const
{
const size_type iEntryBegin = m_A.graph.row_map[iRow];
const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
double sum = 0 ;
#if defined( __INTEL_COMPILER )
#pragma simd reduction(+:sum)
#pragma ivdep
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
}
#else
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
}
#endif
m_y(iRow) = sum ;
}
Multiply( const matrix_type & A ,
const size_type nrow ,
const size_type , // ncol ,
const vector_type & x ,
const vector_type & y )
: m_A( A ), m_x( x ), m_y( y )
{
parallel_for( nrow , *this );
}
};
//----------------------------------------------------------------------------
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
//----------------------------------------------------------------------------
template< typename AScalarType ,
typename VScalarType ,
class Device >
class Operator {
typedef CrsMatrix<AScalarType,Device> matrix_type ;
typedef View<VScalarType*,Device> vector_type ;
private:
const CrsMatrix<AScalarType,Device> A ;
ParallelDataMap data_map ;
AsyncExchange< VScalarType , Device , ParallelDataMap > exchange ;
public:
Operator( const ParallelDataMap & arg_data_map ,
const CrsMatrix<AScalarType,Device> & arg_A )
: A( arg_A )
, data_map( arg_data_map )
, exchange( arg_data_map , 1 )
{}
void apply( const View<VScalarType*,Device> & x ,
const View<VScalarType*,Device> & y )
{
// Gather off-processor data for 'x'
PackArray< vector_type >::pack( exchange.buffer() ,
data_map.count_interior ,
data_map.count_send , x );
exchange.setup();
// If interior & boundary matrices then could launch interior multiply
exchange.send_receive();
UnpackArray< vector_type >::unpack( x , exchange.buffer() ,
data_map.count_owned ,
data_map.count_receive );
const typename Device::size_type nrow = data_map.count_owned ;
const typename Device::size_type ncol = data_map.count_owned +
data_map.count_receive ;
Impl::Multiply<matrix_type,vector_type,vector_type>( A, nrow, ncol, x, y);
}
};
//----------------------------------------------------------------------------
template< typename AScalarType , typename VScalarType , class Device >
void cgsolve(
const ParallelDataMap data_map ,
const CrsMatrix<AScalarType,Device> A ,
const View<VScalarType*,Device> b ,
const View<VScalarType*,Device> x ,
size_t & iteration ,
double & normr ,
double & iter_time ,
const size_t maximum_iteration = 200 ,
const double tolerance = std::numeric_limits<VScalarType>::epsilon() )
{
typedef View<VScalarType*,Device> vector_type ;
//typedef View<VScalarType, Device> value_type ; // unused
const size_t count_owned = data_map.count_owned ;
const size_t count_total = data_map.count_owned + data_map.count_receive ;
Operator<AScalarType,VScalarType,Device> matrix_operator( data_map , A );
// Need input vector to matvec to be owned + received
vector_type pAll ( "cg::p" , count_total );
vector_type p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
vector_type r ( "cg::r" , count_owned );
vector_type Ap( "cg::Ap", count_owned );
/* r = b - A * x ; */
/* p = x */ deep_copy( p , x );
/* Ap = A * p */ matrix_operator.apply( pAll , Ap );
/* r = b - Ap */ waxpby( count_owned , 1.0 , b , -1.0 , Ap , r );
/* p = r */ deep_copy( p , r );
double old_rdot = dot( count_owned , r , data_map.machine );
normr = sqrt( old_rdot );
iteration = 0 ;
Kokkos::Impl::Timer wall_clock ;
while ( tolerance < normr && iteration < maximum_iteration ) {
/* pAp_dot = dot( p , Ap = A * p ) */
/* Ap = A * p */ matrix_operator.apply( pAll , Ap );
const double pAp_dot = dot( count_owned , p , Ap , data_map.machine );
const double alpha = old_rdot / pAp_dot ;
/* x += alpha * p ; */ axpy( count_owned, alpha, p , x );
/* r -= alpha * Ap ; */ axpy( count_owned, -alpha, Ap, r );
const double r_dot = dot( count_owned , r , data_map.machine );
const double beta = r_dot / old_rdot ;
/* p = r + beta * p ; */ xpby( count_owned , r , beta , p );
normr = sqrt( old_rdot = r_dot );
++iteration ;
}
iter_time = wall_clock.seconds();
}
//----------------------------------------------------------------------------
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#if defined( KOKKOS_HAVE_CUDA )
#if ( CUDA_VERSION < 6000 )
#pragma message "cusparse_v2.h"
#include <cusparse_v2.h>
#else
#pragma message "cusparse.h"
#include <cusparse.h>
#endif
namespace Kokkos {
namespace Impl {
struct CudaSparseSingleton {
cusparseHandle_t handle;
cusparseMatDescr_t descra;
CudaSparseSingleton()
{
cusparseCreate( & handle );
cusparseCreateMatDescr( & descra );
cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL );
cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO );
}
static CudaSparseSingleton & singleton();
};
template<>
struct Multiply< CrsMatrix<double,Cuda> ,
View<double*,Cuda > ,
View<double*,Cuda > >
{
typedef Cuda execution_space ;
typedef execution_space::size_type size_type ;
typedef double scalar_type ;
typedef View< scalar_type* , execution_space > vector_type ;
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
public:
Multiply( const matrix_type & A ,
const size_type nrow ,
const size_type ncol ,
const vector_type & x ,
const vector_type & y )
{
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
const scalar_type alpha = 1 , beta = 0 ;
cusparseStatus_t status =
cusparseDcsrmv( s.handle ,
CUSPARSE_OPERATION_NON_TRANSPOSE ,
nrow , ncol , A.coefficients.dimension_0() ,
&alpha ,
s.descra ,
A.coefficients.ptr_on_device() ,
A.graph.row_map.ptr_on_device() ,
A.graph.entries.ptr_on_device() ,
x.ptr_on_device() ,
&beta ,
y.ptr_on_device() );
if ( CUSPARSE_STATUS_SUCCESS != status ) {
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
}
}
};
template<>
struct Multiply< CrsMatrix<float,Cuda> ,
View<float*,Cuda > ,
View<float*,Cuda > >
{
typedef Cuda execution_space ;
typedef execution_space::size_type size_type ;
typedef float scalar_type ;
typedef View< scalar_type* , execution_space > vector_type ;
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
public:
Multiply( const matrix_type & A ,
const size_type nrow ,
const size_type ncol ,
const vector_type & x ,
const vector_type & y )
{
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
const scalar_type alpha = 1 , beta = 0 ;
cusparseStatus_t status =
cusparseScsrmv( s.handle ,
CUSPARSE_OPERATION_NON_TRANSPOSE ,
nrow , ncol , A.coefficients.dimension_0() ,
&alpha ,
s.descra ,
A.coefficients.ptr_on_device() ,
A.graph.row_map.ptr_on_device() ,
A.graph.entries.ptr_on_device() ,
x.ptr_on_device() ,
&beta ,
y.ptr_on_device() );
if ( CUSPARSE_STATUS_SUCCESS != status ) {
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
}
}
};
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef SPARSELINEARSYSTEM_HPP */

View File

@ -1,276 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef SPARSELINEARSYSTEMFILL_HPP
#define SPARSELINEARSYSTEMFILL_HPP
#include <vector>
#include <algorithm>
#include <limits>
#include <FEMesh.hpp>
#include <SparseLinearSystem.hpp>
//----------------------------------------------------------------------------
namespace HybridFEM {
template< class MatrixType , class MeshType ,
class elem_matrices_type ,
class elem_vectors_type > struct GatherFill ;
template< typename ScalarType ,
class DeviceType ,
unsigned ElemNode ,
typename CoordScalarType ,
class elem_matrices_type ,
class elem_vectors_type >
struct GatherFill<
Kokkos::CrsMatrix< ScalarType , DeviceType > ,
FEMesh< CoordScalarType , ElemNode , DeviceType > ,
elem_matrices_type , elem_vectors_type >
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
static const size_type ElemNodeCount = ElemNode ;
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
typedef typename matrix_type::coefficients_type coefficients_type ;
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
typedef Kokkos::View< size_type[][ElemNodeCount][ElemNodeCount] , execution_space > elem_graph_type ;
typedef FEMesh< CoordScalarType , ElemNodeCount , execution_space > mesh_type ;
typedef typename mesh_type::node_elem_ids_type node_elem_ids_type ;
private:
node_elem_ids_type node_elem_ids ;
elem_graph_type elem_graph ;
elem_matrices_type elem_matrices ;
elem_vectors_type elem_vectors ;
coefficients_type system_coeff ;
vector_type system_rhs ;
public:
KOKKOS_INLINE_FUNCTION
void operator()( size_type irow ) const
{
const size_type node_elem_begin = node_elem_ids.row_map[irow];
const size_type node_elem_end = node_elem_ids.row_map[irow+1];
// for each element that a node belongs to
for ( size_type i = node_elem_begin ; i < node_elem_end ; i++ ) {
const size_type elem_id = node_elem_ids.entries( i, 0);
const size_type row_index = node_elem_ids.entries( i, 1);
system_rhs(irow) += elem_vectors(elem_id, row_index);
// for each node in a particular related element
// gather the contents of the element stiffness
// matrix that belong in irow
for ( size_type j = 0 ; j < ElemNodeCount ; ++j ){
const size_type A_index = elem_graph( elem_id , row_index , j );
system_coeff( A_index ) += elem_matrices( elem_id, row_index, j );
}
}
}
static void apply( const matrix_type & matrix ,
const vector_type & rhs ,
const mesh_type & mesh ,
const elem_graph_type & elem_graph ,
const elem_matrices_type & elem_matrices ,
const elem_vectors_type & elem_vectors )
{
const size_t row_count = matrix.graph.row_map.dimension_0() - 1 ;
GatherFill op ;
op.node_elem_ids = mesh.node_elem_ids ;
op.elem_graph = elem_graph ;
op.elem_matrices = elem_matrices ;
op.elem_vectors = elem_vectors ;
op.system_coeff = matrix.coefficients ;
op.system_rhs = rhs ;
parallel_for( row_count , op );
}
};
} /* namespace HybridFEM */
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace HybridFEM {
template< class GraphType , class MeshType >
struct GraphFactory {
typedef GraphType graph_type ;
typedef MeshType mesh_type ;
typedef typename graph_type::execution_space execution_space ;
typedef typename execution_space::size_type size_type ;
static const unsigned ElemNodeCount = mesh_type::element_node_count ;
typedef Kokkos::View< size_type[][ElemNodeCount][ElemNodeCount] , execution_space > element_map_type ;
static
void
create( const mesh_type & mesh ,
graph_type & graph ,
element_map_type & elem_map )
{
typename mesh_type::node_elem_ids_type::HostMirror
node_elem_ids = create_mirror( mesh.node_elem_ids );
typename mesh_type::elem_node_ids_type::HostMirror
elem_node_ids = create_mirror( mesh.elem_node_ids );
typedef typename element_map_type::HostMirror element_map_host_type ;
deep_copy( elem_node_ids , mesh.elem_node_ids );
deep_copy( node_elem_ids.entries , mesh.node_elem_ids.entries );
const size_t owned_node = mesh.parallel_data_map.count_owned ;
const size_t total_elem = mesh.elem_node_ids.dimension_0();
if ( total_elem ) {
elem_map = element_map_type( std::string("element_map"), total_elem );
}
element_map_host_type elem_map_host = create_mirror( elem_map );
//------------------------------------
// Node->node mapping for the CrsMatrix graph
std::vector< std::vector< unsigned > > node_node_ids( owned_node );
std::vector< unsigned > node_node_begin( owned_node );
size_t offset = 0 ;
for ( size_t i = 0 ; i < owned_node ; ++i ) {
const size_t j_end = node_elem_ids.row_map[i+1];
size_t j = node_elem_ids.row_map[i];
node_node_begin[i] = offset ;
std::vector< unsigned > & work = node_node_ids[i] ;
for ( ; j < j_end ; ++j ) {
const size_t elem_id = node_elem_ids.entries(j,0);
for ( size_t k = 0 ; k < ElemNodeCount ; ++k ) {
work.push_back( elem_node_ids( elem_id , k ) );
}
}
std::sort( work.begin() , work.end() );
work.erase( std::unique( work.begin() , work.end() ) , work.end() );
offset += work.size();
}
graph = Kokkos::create_staticcrsgraph< graph_type >( "node_node_ids" , node_node_ids );
//------------------------------------
// ( element , node_row , node_column ) -> matrix_crs_column
for ( size_t elem_id = 0 ; elem_id < total_elem ; ++elem_id ) {
for ( size_t i = 0 ; i < ElemNodeCount ; ++i ) {
const size_t node_row = elem_node_ids( elem_id , i );
const size_t node_row_begin = node_node_begin[ node_row ];
const std::vector< unsigned > & column = node_node_ids[ node_row ] ;
if ( owned_node <= node_row ) {
for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) {
elem_map_host( elem_id , i , j ) = std::numeric_limits<size_type>::max();
}
}
else {
for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) {
const size_type node_col = elem_node_ids( elem_id , j );
int col_search = 0 ;
for ( int len = column.size() ; 0 < len ; ) {
const int half = len >> 1;
const int middle = col_search + half ;
if ( column[middle] < node_col ){
col_search = middle + 1 ;
len -= half + 1 ;
}
else {
len = half ;
}
}
if ( node_col != column[col_search] ) {
throw std::runtime_error(std::string("Failed"));
}
elem_map_host( elem_id , i , j ) = col_search + node_row_begin ;
}
}
}
}
deep_copy( elem_map , elem_map_host );
}
};
} // namespace HybridFEM
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #ifndef SPARSELINEARSYSTEMFILL_HPP */

View File

@ -1,164 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef SPARSELINEARSYSTEM_CUDA_HPP
#define SPARSELINEARSYSTEM_CUDA_HPP
#if defined( BUILD_FROM_CU_FILE )
#include <cusparse_v2.h>
#include <Kokkos_Core.hpp>
namespace Kokkos {
namespace Impl {
struct CudaSparseSingleton {
cusparseHandle_t handle;
cusparseMatDescr_t descra;
CudaSparseSingleton()
{
cusparseCreate( & handle );
cusparseCreateMatDescr( & descra );
cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL );
cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO );
}
static CudaSparseSingleton & singleton();
};
CudaSparseSingleton & CudaSparseSingleton::singleton()
{ static CudaSparseSingleton s ; return s ; }
template<>
struct Multiply< CrsMatrix<double,Cuda> ,
View<double*,Cuda > ,
View<double*,Cuda > >
{
typedef Cuda execution_space ;
typedef execution_space::size_type size_type ;
typedef double scalar_type ;
typedef View< scalar_type* , execution_space > vector_type ;
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
public:
Multiply( const matrix_type & A ,
const size_type nrow ,
const size_type ncol ,
const vector_type & x ,
const vector_type & y )
{
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
const scalar_type alpha = 1 , beta = 0 ;
cusparseStatus_t status =
cusparseDcsrmv( s.handle ,
CUSPARSE_OPERATION_NON_TRANSPOSE ,
nrow , ncol , A.coefficients.dimension_0() ,
&alpha ,
s.descra ,
A.coefficients.ptr_on_device() ,
A.graph.row_map.ptr_on_device() ,
A.graph.entries.ptr_on_device() ,
x.ptr_on_device() ,
&beta ,
y.ptr_on_device() );
if ( CUSPARSE_STATUS_SUCCESS != status ) {
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
}
}
};
template<>
struct Multiply< CrsMatrix<float,Cuda> ,
View<float*,Cuda > ,
View<float*,Cuda > >
{
typedef Cuda execution_space ;
typedef execution_space::size_type size_type ;
typedef float scalar_type ;
typedef View< scalar_type* , execution_space > vector_type ;
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
public:
Multiply( const matrix_type & A ,
const size_type nrow ,
const size_type ncol ,
const vector_type & x ,
const vector_type & y )
{
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
const scalar_type alpha = 1 , beta = 0 ;
cusparseStatus_t status =
cusparseScsrmv( s.handle ,
CUSPARSE_OPERATION_NON_TRANSPOSE ,
nrow , ncol , A.coefficients.dimension_0() ,
&alpha ,
s.descra ,
A.coefficients.ptr_on_device() ,
A.graph.row_map.ptr_on_device() ,
A.graph.entries.ptr_on_device() ,
x.ptr_on_device() ,
&beta ,
y.ptr_on_device() );
if ( CUSPARSE_STATUS_SUCCESS != status ) {
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
}
}
};
} /* namespace Impl */
} /* namespace Kokkos */
#endif /* #if defined( __CUDACC__ ) */
#endif /* #ifndef SPARSELINEARSYSTEM_CUDA_HPP */

View File

@ -1,242 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef TESTFEMESHBOXFIXTURE_HPP
#define TESTFEMESHBOXFIXTURE_HPP
#include <stdio.h>
#include <iostream>
#include <stdexcept>
#include <limits>
#include <utility>
#include <BoxMeshFixture.hpp>
#include <ParallelComm.hpp>
//----------------------------------------------------------------------------
namespace TestFEMesh {
template< class ViewType >
struct VerifyUnpack ;
template< typename DeviceType, typename T >
struct VerifyUnpack< Kokkos::View< T*[3] , DeviceType > >
{
typedef DeviceType execution_space ;
typedef typename execution_space::size_type size_type ;
typedef size_type value_type ;
typedef Kokkos::View< T* , execution_space > buffer_type ;
typedef Kokkos::View< T*[3] , execution_space > array_type ;
private:
array_type node_coords ;
buffer_type buffer ;
size_type node_begin ;
public:
KOKKOS_INLINE_FUNCTION
static void init( value_type & update )
{ update = 0 ; }
KOKKOS_INLINE_FUNCTION
static void join( volatile value_type & update ,
const volatile value_type & source )
{ update += source ; }
KOKKOS_INLINE_FUNCTION
void operator()( const size_type i , value_type & update ) const
{
const size_type node_id = i + node_begin ;
const size_type k = i * 3 ;
const long xb = buffer[k];
const long yb = buffer[k+1];
const long zb = buffer[k+2];
const long xn = node_coords(node_id,0);
const long yn = node_coords(node_id,1);
const long zn = node_coords(node_id,2);
if ( xb != xn || yb != yn || zb != zn ) {
printf("TestFEMesh::VerifyUnpack failed at %d : node %d : { %ld %ld %ld } != { %ld %ld %ld }\n",
(int)i,(int)node_id, xb,yb,zb, xn, yn, zn );
++update ;
}
}
static inline
size_type unpack( const array_type & arg_node_coords ,
const size_type arg_node_begin ,
const size_type arg_node_count ,
const buffer_type & arg_buffer )
{
VerifyUnpack op ;
op.node_coords = arg_node_coords ;
op.buffer = arg_buffer ;
op.node_begin = arg_node_begin ;
size_type count = 0 ;
Kokkos::parallel_reduce( arg_node_count , op , count );
return count ;
}
};
}
//----------------------------------------------------------------------------
#ifdef KOKKOS_HAVE_MPI
namespace TestFEMesh {
template< typename coordinate_scalar_type ,
unsigned ElemNodeCount ,
class Device >
void verify_parallel(
const HybridFEM::FEMesh< coordinate_scalar_type ,
ElemNodeCount ,
Device > & mesh )
{
typedef HybridFEM::FEMesh< coordinate_scalar_type, ElemNodeCount, Device > femesh_type ;
typedef typename femesh_type::node_coords_type node_coords_type ;
comm::Machine machine = mesh.parallel_data_map.machine ;
// Communicate node coordinates to verify communication and setup.
const size_t chunk_size = 3 ;
Kokkos::AsyncExchange< coordinate_scalar_type, Device, Kokkos::ParallelDataMap >
exchange( mesh.parallel_data_map , chunk_size );
const size_t send_begin = mesh.parallel_data_map.count_interior ;
const size_t send_count = mesh.parallel_data_map.count_send ;
const size_t recv_begin = mesh.parallel_data_map.count_owned ;
const size_t recv_count = mesh.parallel_data_map.count_receive ;
typedef Kokkos::PackArray< node_coords_type > pack_type ;
pack_type::pack( exchange.buffer(), send_begin, send_count, mesh.node_coords );
exchange.setup();
// Launch local-action device kernels
exchange.send_receive();
unsigned long local[3] ;
local[0] = mesh.parallel_data_map.count_owned ;
local[1] = mesh.parallel_data_map.count_receive ;
local[2] = TestFEMesh::VerifyUnpack< node_coords_type >::unpack( mesh.node_coords, recv_begin, recv_count, exchange.buffer() );
unsigned long global[3] = { 0 , 0 , 0 };
MPI_Allreduce( local , global ,
3 , MPI_UNSIGNED_LONG , MPI_SUM , machine.mpi_comm );
if ( 0 == comm::rank( machine ) ) {
std::cout << ( global[2] ? "FAILED" : "PASSED" )
<< ": TestFEMesh::verify_parallel "
<< "NP(" << comm::size( machine )
<< ") total_node(" << global[0]
<< ") verified_nodes(" << global[1]
<< ") failed_nodes(" << global[2]
<< ")" << std::endl ;
}
}
} // namespace TestFEMesh
#else /* ! #ifdef KOKKOS_HAVE_MPI */
namespace TestFEMesh {
template< typename coordinate_scalar_type ,
unsigned ElemNodeCount ,
class Device >
void verify_parallel(
const HybridFEM::FEMesh< coordinate_scalar_type ,
ElemNodeCount ,
Device > & )
{}
} // namespace TestFEMesh
#endif /* ! #ifdef KOKKOS_HAVE_MPI */
//----------------------------------------------------------------------------
template< class Device >
void test_box_fixture( comm::Machine machine ,
const size_t gang_count ,
const size_t nodes_nx ,
const size_t nodes_ny ,
const size_t nodes_nz )
{
typedef long coordinate_scalar_type ;
typedef FixtureElementHex8 fixture_element_type ;
typedef BoxMeshFixture< coordinate_scalar_type ,
Device ,
fixture_element_type > fixture_type ;
typedef typename fixture_type::FEMeshType mesh_type ;
const size_t proc_count = comm::size( machine );
const size_t proc_local = comm::rank( machine ) ;
mesh_type mesh =
fixture_type::create( proc_count , proc_local , gang_count ,
nodes_nx - 1 , nodes_ny - 1 , nodes_nz - 1 );
mesh.parallel_data_map.machine = machine ;
TestFEMesh::verify_parallel( mesh );
}
#endif /* #ifndef TESTFEMESHBOXFIXTURE_HPP */

View File

@ -1,172 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
#include <iostream>
#include <stdexcept>
#include <limits>
#include <utility>
#include <BoxMeshPartition.hpp>
//----------------------------------------------------------------------------
void test_box_partition( bool print )
{
const size_t np_max = 10000 ;
const BoxBoundsLinear use_box ;
BoxType root_box ;
root_box[0][0] = 0 ; root_box[0][1] = 100 ;
root_box[1][0] = 0 ; root_box[1][1] = 200 ;
root_box[2][0] = 0 ; root_box[2][1] = 300 ;
const size_t cell_total =
( root_box[0][1] - root_box[0][0] ) *
( root_box[1][1] - root_box[1][0] ) *
( root_box[2][1] - root_box[2][0] );
for ( size_t np = 2 ; np < np_max ; np = 2 * ( np + 1 ) ) {
std::vector<BoxType> part_boxes( np );
box_partition_rcb( root_box , part_boxes );
size_t cell_goal = ( cell_total + np - 1 ) / np ;
size_t cell_max = 0 ;
for ( size_t i = 0 ; i < np ; ++i ) {
cell_max = std::max( cell_max , count( part_boxes[i] ) );
}
if ( print ) {
std::cout << std::endl
<< "box_part( " << np
<< " ) max( " << cell_max
<< " ) goal( " << cell_goal
<< " ) ratio( " << double(cell_max) / double(cell_goal)
<< " )" << std::endl ;
}
const size_t nsample = std::min(np,(size_t)4);
const size_t stride = ( np + nsample - 1 ) / nsample ;
for ( size_t my_part = 0 ; my_part < np ; my_part += stride ) {
BoxType my_use_box ;
std::vector<size_t> my_use_id_map ;
size_t my_count_interior ;
size_t my_count_owned ;
size_t my_count_uses ;
std::vector<size_t> my_recv_counts ;
std::vector<std::vector<size_t> > my_send_map ;
size_t count_verify = 0 ;
box_partition_maps( root_box , part_boxes ,
use_box , my_part ,
my_use_box , my_use_id_map ,
my_count_interior ,
my_count_owned ,
my_count_uses ,
my_recv_counts ,
my_send_map );
count_verify = my_count_owned ;
if ( print ) {
std::cout << " my_part(" << my_part << ") layout { "
<< "P" << my_part
<< "(" << my_count_interior
<< "," << ( my_count_owned - my_count_interior )
<< ")" ;
}
for ( size_t i = 1 ; i < np ; ++i ) {
if ( my_recv_counts[i] ) {
count_verify += my_recv_counts[i] ;
const size_t ip = ( my_part + i ) % np ;
if ( print ) {
std::cout << " P" << ip << "(" << my_recv_counts[i] << ")" ;
}
// Compare recv & send lists
BoxType ip_use_box ;
std::vector<size_t> ip_use_id_map ;
size_t ip_count_interior ;
size_t ip_count_owned ;
size_t ip_count_uses ;
std::vector<size_t> ip_recv_counts ;
std::vector<std::vector<size_t> > ip_send_map ;
box_partition_maps( root_box , part_boxes ,
use_box , ip ,
ip_use_box , ip_use_id_map ,
ip_count_interior ,
ip_count_owned ,
ip_count_uses ,
ip_recv_counts ,
ip_send_map );
// Sent by ip, received by my_part:
const BoxType recv_send = intersect( part_boxes[ip] , my_use_box );
const size_t recv_send_count = count( recv_send );
const size_t j = ( my_part + np - ip ) % np ;
if ( recv_send_count != my_recv_counts[i] ||
recv_send_count != ip_send_map[j].size() ) {
throw std::runtime_error( std::string("bad recv/send map") );
}
}
}
if ( print ) { std::cout << " }" << std::endl ; }
if ( count_verify != my_count_uses ) {
throw std::runtime_error( std::string("bad partition map") );
}
}
}
}

View File

@ -1,192 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <TestBoxMeshFixture.hpp>
#include <Implicit.hpp>
#include <Nonlinear.hpp>
#include <Explicit.hpp>
#include <SparseLinearSystem.hpp>
#if defined( KOKKOS_HAVE_CUDA )
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
CudaSparseSingleton & CudaSparseSingleton::singleton()
{ static CudaSparseSingleton s ; return s ; }
}
}
//----------------------------------------------------------------------------
void test_cuda_query( comm::Machine machine )
{
const size_t comm_rank = comm::rank( machine );
std::cout << "P" << comm_rank
<< ": Cuda device_count = "
<< Kokkos::Cuda::detect_device_count()
<< std::endl ;
}
//----------------------------------------------------------------------------
void test_cuda_fixture( comm::Machine machine ,
size_t nx , size_t ny , size_t nz )
{
const size_t comm_rank = comm::rank( machine );
const size_t comm_size = comm::size( machine );
const size_t dev_count = Kokkos::Cuda::detect_device_count();
const size_t dev_rank =
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
const size_t gang_count = 0 ;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::SelectDevice select_device( dev_rank );
Kokkos::Cuda::initialize( select_device );
test_box_fixture<Kokkos::Cuda>( machine , gang_count , nx , ny , nz );
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
//----------------------------------------------------------------------------
void test_cuda_implicit( comm::Machine machine ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run )
{
const size_t comm_rank = comm::rank( machine );
const size_t comm_size = comm::size( machine );
const size_t dev_count = Kokkos::Cuda::detect_device_count();
const size_t dev_rank =
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
const size_t gang_count = 0 ;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::SelectDevice select_device( dev_rank );
Kokkos::Cuda::initialize( select_device );
HybridFEM::Implicit::driver<double,Kokkos::Cuda>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
//----------------------------------------------------------------------------
void test_cuda_explicit( comm::Machine machine ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run )
{
const size_t comm_rank = comm::rank( machine );
const size_t comm_size = comm::size( machine );
const size_t dev_count = Kokkos::Cuda::detect_device_count();
const size_t dev_rank =
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
const size_t gang_count = 0 ;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::SelectDevice select_device( dev_rank );
Kokkos::Cuda::initialize( select_device );
Explicit::driver<double,Kokkos::Cuda>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
//----------------------------------------------------------------------------
void test_cuda_nonlinear( comm::Machine machine ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run )
{
const size_t comm_rank = comm::rank( machine );
const size_t comm_size = comm::size( machine );
const size_t dev_count = Kokkos::Cuda::detect_device_count();
const size_t dev_rank =
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
const size_t gang_count = 0 ;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::SelectDevice select_device( dev_rank );
Kokkos::Cuda::initialize( select_device );
typedef Kokkos::Cuda device ;
typedef FixtureElementHex8 hex8 ;
HybridFEM::Nonlinear::driver<double,device,hex8>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
void test_cuda_nonlinear_quadratic( comm::Machine machine ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run )
{
const size_t comm_rank = comm::rank( machine );
const size_t comm_size = comm::size( machine );
const size_t dev_count = Kokkos::Cuda::detect_device_count();
const size_t dev_rank =
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
const size_t gang_count = 0 ;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::SelectDevice select_device( dev_rank );
Kokkos::Cuda::initialize( select_device );
typedef Kokkos::Cuda device ;
typedef FixtureElementHex27 hex27 ;
HybridFEM::Nonlinear::driver<double,device,hex27>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */

View File

@ -1,137 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
// Must be included first on Intel-Phi systems due to
// redefinition of SEEK_SET in <mpi.h>.
#include <ParallelComm.hpp>
#include <iostream>
#include <stdexcept>
#include <limits>
#include <utility>
//----------------------------------------------------------------------------
#include <Kokkos_Core.hpp>
#include <BoxMeshFixture.hpp>
#include <TestBoxMeshFixture.hpp>
#include <Implicit.hpp>
#include <Nonlinear.hpp>
#include <Explicit.hpp>
#include <SparseLinearSystem.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
void test_host_fixture( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t nx , size_t ny , size_t nz )
{
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
test_box_fixture<Kokkos::HostSpace::execution_space>( machine , gang_count , nx , ny , nz );
Kokkos::HostSpace::execution_space::finalize();
}
//----------------------------------------------------------------------------
void test_host_implicit( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run )
{
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
HybridFEM::Implicit::driver<double,Kokkos::HostSpace::execution_space>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
Kokkos::HostSpace::execution_space::finalize();
}
//----------------------------------------------------------------------------
void test_host_explicit( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run )
{
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
Explicit::driver<double,Kokkos::HostSpace::execution_space>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
Kokkos::HostSpace::execution_space::finalize();
}
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
void test_host_nonlinear( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run )
{
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
typedef FixtureElementHex8 hex8 ;
typedef Kokkos::HostSpace::execution_space device ;
HybridFEM::Nonlinear::driver<double,device,hex8>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
Kokkos::HostSpace::execution_space::finalize();
}
void test_host_nonlinear_quadratic( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run )
{
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
typedef FixtureElementHex27 hex27 ;
typedef Kokkos::HostSpace::execution_space device ;
HybridFEM::Nonlinear::driver<double,device,hex27>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
Kokkos::HostSpace::execution_space::finalize();
}
//----------------------------------------------------------------------------

View File

@ -1,348 +0,0 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
// Must be included first on Intel-Phi systems due to
// redefinition of SEEK_SET in <mpi.h>.
#include <ParallelComm.hpp>
#include <string>
#include <sstream>
#include <iostream>
#include <Kokkos_hwloc.hpp>
//----------------------------------------------------------------------------
void test_box_partition( bool print );
//----------------------------------------------------------------------------
void test_host_fixture( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t nx , size_t ny , size_t nz );
void test_host_implicit( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run );
void test_host_explicit( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run );
void test_host_nonlinear( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run );
void test_host_nonlinear_quadratic( comm::Machine machine ,
size_t gang_count ,
size_t gang_worker_count ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run );
//----------------------------------------------------------------------------
void test_cuda_query( comm::Machine );
void test_cuda_fixture( comm::Machine machine ,
size_t nx , size_t ny , size_t nz );
void test_cuda_implicit( comm::Machine machine ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run );
void test_cuda_explicit( comm::Machine machine ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run );
void test_cuda_nonlinear( comm:: Machine machine ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run );
void test_cuda_nonlinear_quadratic( comm::Machine machine ,
size_t elem_count_begin ,
size_t elem_count_end ,
size_t count_run );
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace {
bool run_host( std::istream & input ,
comm::Machine machine ,
const size_t host_gang_count ,
const size_t host_gang_worker_count )
{
bool cmd_error = false ;
std::string which ; input >> which ;
if ( which == std::string("fixture") ) {
size_t nx = 0 , ny = 0 , nz = 0 ;
input >> nx >> ny >> nz ;
test_host_fixture( machine , host_gang_count , host_gang_worker_count , nx , ny , nz );
}
else if ( which == std::string("explicit") ) {
size_t mesh_node_begin = 100 ;
size_t mesh_node_end = 300 ;
size_t run = 1 ;
input >> mesh_node_begin >> mesh_node_end >> run ;
test_host_explicit( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
}
else if ( which == std::string("implicit") ) {
size_t mesh_node_begin = 100 ;
size_t mesh_node_end = 300 ;
size_t run = 1 ;
input >> mesh_node_begin >> mesh_node_end >> run ;
test_host_implicit( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
}
else if ( which == std::string("nonlinear") ) {
size_t mesh_node_begin = 100 ;
size_t mesh_node_end = 300 ;
size_t run = 1 ;
input >> mesh_node_begin >> mesh_node_end >> run ;
test_host_nonlinear( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
}
else if ( which == std::string("nonlinear_quadratic") ) {
size_t mesh_node_begin = 100 ;
size_t mesh_node_end = 300 ;
size_t run = 1 ;
input >> mesh_node_begin >> mesh_node_end >> run ;
test_host_nonlinear_quadratic( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
}
else {
cmd_error = true ;
}
return cmd_error ;
}
#if defined( KOKKOS_HAVE_CUDA )
bool run_cuda( std::istream & input , comm::Machine machine )
{
bool cmd_error = false ;
std::string which ; input >> which ;
if ( which == std::string("fixture") ) {
size_t nx = 0 , ny = 0 , nz = 0 ;
input >> nx >> ny >> nz ;
test_cuda_fixture( machine , nx , ny , nz );
}
else if ( which == std::string("explicit") ) {
size_t mesh_node_begin = 100 ;
size_t mesh_node_end = 300 ;
size_t run = 1 ;
input >> mesh_node_begin >> mesh_node_end >> run ;
test_cuda_explicit( machine , mesh_node_begin , mesh_node_end , run );
}
else if ( which == std::string("implicit") ) {
size_t mesh_node_begin = 100 ;
size_t mesh_node_end = 300 ;
size_t run = 1 ;
input >> mesh_node_begin >> mesh_node_end >> run ;
test_cuda_implicit( machine , mesh_node_begin , mesh_node_end , run );
}
else if ( which == std::string("nonlinear") ) {
size_t mesh_node_begin = 100 ;
size_t mesh_node_end = 300 ;
size_t run = 1 ;
input >> mesh_node_begin >> mesh_node_end >> run ;
test_cuda_nonlinear( machine , mesh_node_begin , mesh_node_end , run );
}
else if ( which == std::string("nonlinear_quadratic") ) {
size_t mesh_node_begin = 100 ;
size_t mesh_node_end = 300 ;
size_t run = 1 ;
input >> mesh_node_begin >> mesh_node_end >> run ;
test_cuda_nonlinear_quadratic( machine , mesh_node_begin , mesh_node_end , run );
}
else {
cmd_error = true ;
}
return cmd_error ;
}
#endif
void run( const std::string & argline , comm::Machine machine )
{
const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
std::istringstream input( argline );
bool cmd_error = false ;
std::string which ; input >> which ;
if ( which == std::string("query") ) {
std::cout << "P" << comm::rank( machine )
<< ": hwloc { NUMA[" << numa_count << "]"
<< " CORE[" << cores_per_numa << "]"
<< " PU[" << threads_per_core << "] }"
<< std::endl ;
#if defined( KOKKOS_HAVE_CUDA )
test_cuda_query( machine );
#endif
}
else if ( which == std::string("partition") ) {
if ( 0 == comm::rank( machine ) ) {
test_box_partition( false /* print flag */ );
}
}
else {
if ( which == std::string("host") ) {
size_t host_gang_count = 0 ;
size_t host_gang_worker_count = 1 ;
input >> host_gang_count ;
input >> host_gang_worker_count ;
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
}
else if ( which == std::string("host-all") ) {
size_t host_gang_count = numa_count ;
size_t host_gang_worker_count = cores_per_numa * threads_per_core ;
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
}
else if ( which == std::string("host-most") ) {
size_t host_gang_count = numa_count ;
size_t host_gang_worker_count = ( cores_per_numa - 1 ) * threads_per_core ;
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
}
#if defined( KOKKOS_HAVE_CUDA )
else if ( which == std::string("cuda") ) {
cmd_error = run_cuda( input , machine );
}
#endif
else {
cmd_error = true ;
}
}
if ( cmd_error && 0 == comm::rank( machine ) ) {
std::cout << "Expecting command line with" << std::endl
<< " query" << std::endl
<< " partition" << std::endl
<< " host NumNumaNode NumThreadPerNode <test>" << std::endl
<< " host-all <test>" << std::endl
<< " host-most <test>" << std::endl
<< " cuda <test>" << std::endl
<< "where <test> is" << std::endl
<< " fixture NumElemX NumElemY NumElemZ" << std::endl
<< " implicit NumElemBegin NumElemEnd NumRun" << std::endl
<< " explicit NumElemBegin NumElemEnd NumRun" << std::endl
<< " nonlinear NumElemBegin NumElemEnd NumRun" << std::endl
<< " nonlinear_quadratic NumElemBegin NumElemEnd NumRun" << std::endl ;
}
}
} // namespace
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
int main( int argc , char ** argv )
{
comm::Machine machine = comm::Machine::init( & argc , & argv );
const unsigned comm_rank = comm::rank( machine );
const std::string argline = comm::command_line( machine , argc , argv );
try {
run( argline , machine );
}
catch( const std::exception & x ) {
std::cerr << "P" << comm_rank << " throw: " << x.what() << std::endl ;
}
catch( ... ) {
std::cerr << "P" << comm_rank << " throw: unknown exception" << std::endl ;
}
comm::Machine::finalize();
return 0 ;
}

View File

@ -1,14 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SOURCES "")
FILE(GLOB SOURCES *.cpp)
TRIBITS_ADD_EXECUTABLE(
query_device
SOURCES ${SOURCES}
COMM serial mpi
)

View File

@ -1,53 +0,0 @@
KOKKOS_PATH ?= ../..
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
SRC_DIR := $(dir $(MAKEFILE_PATH))
SRC = $(wildcard $(SRC_DIR)/*.cpp)
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
#SRC = $(wildcard *.cpp)
#OBJ = $(SRC:%.cpp=%.o)
default: build
echo "Start Build"
# use installed Makefile.kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(NVCC_WRAPPER)
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "Cuda,OpenMP"
#KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "OpenMP"
#KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
LIB =
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean:
rm -f *.a *.o *.cuda *.host
# Compilation rules
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,100 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <iostream>
#include <sstream>
#include <Kokkos_Macros.hpp>
#if defined( KOKKOS_HAVE_MPI )
#include <mpi.h>
#endif
#include <Kokkos_Core.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
int main( int argc , char ** argv )
{
std::ostringstream msg ;
#if defined( KOKKOS_HAVE_MPI )
MPI_Init( & argc , & argv );
int mpi_rank = 0 ;
MPI_Comm_rank( MPI_COMM_WORLD , & mpi_rank );
msg << "MPI rank(" << mpi_rank << ") " ;
#endif
msg << "{" << std::endl ;
if ( Kokkos::hwloc::available() ) {
msg << "hwloc( NUMA[" << Kokkos::hwloc::get_available_numa_count()
<< "] x CORE[" << Kokkos::hwloc::get_available_cores_per_numa()
<< "] x HT[" << Kokkos::hwloc::get_available_threads_per_core()
<< "] )"
<< std::endl ;
}
#if defined( KOKKOS_HAVE_CUDA )
Kokkos::Cuda::print_configuration( msg );
#endif
msg << "}" << std::endl ;
std::cout << msg.str();
#if defined( KOKKOS_HAVE_MPI )
MPI_Finalize();
#endif
return 0 ;
}

View File

@ -1,15 +0,0 @@
INCLUDE(TribitsAddExecutableAndTest)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
SET(SOURCES "")
FILE(GLOB SOURCES *.cpp)
TRIBITS_ADD_EXECUTABLE(
sort_array
SOURCES ${SOURCES}
COMM serial mpi
)

View File

@ -1,53 +0,0 @@
KOKKOS_PATH ?= ../..
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
SRC_DIR := $(dir $(MAKEFILE_PATH))
SRC = $(wildcard $(SRC_DIR)/*.cpp)
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
#SRC = $(wildcard *.cpp)
#OBJ = $(SRC:%.cpp=%.o)
default: build
echo "Start Build"
# use installed Makefile.kokkos
include $(KOKKOS_PATH)/Makefile.kokkos
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(NVCC_WRAPPER)
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "Cuda,OpenMP"
#KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -I$(SRC_DIR) -O3
LINK = $(CXX)
LINKFLAGS =
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
#KOKKOS_DEVICES = "OpenMP"
#KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
LIB =
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean:
rm -f *.a *.o *.cuda *.host
# Compilation rules
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,95 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <string.h>
#include <stdlib.h>
#include <iostream>
#include <sstream>
#include <Kokkos_Core.hpp>
#include <sort_array.hpp>
int main( int argc , char ** argv )
{
#if defined( KOKKOS_HAVE_CUDA ) || defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_OPENMP )
Kokkos::initialize( argc , argv );
int length_array = 100000 ;
for ( int i = 0 ; i < argc ; ++i ) {
if ( 0 == strcmp( argv[i] , "length_array" ) ) {
length_array = atoi( argv[i+1] );
}
}
int length_total_array = length_array * 100;
#if defined( KOKKOS_HAVE_CUDA )
if ( Kokkos::Cuda::is_initialized() ) {
std::cout << "Kokkos::Cuda" << std::endl ;
Example::sort_array< Kokkos::Cuda >( length_array , length_total_array );
}
#endif
#if defined( KOKKOS_HAVE_PTHREAD )
if ( Kokkos::Threads::is_initialized() ) {
std::cout << "Kokkos::Threads" << std::endl ;
Example::sort_array< Kokkos::Threads >( length_array , length_total_array );
}
#endif
#if defined( KOKKOS_HAVE_OPENMP )
if ( Kokkos::OpenMP::is_initialized() ) {
std::cout << "Kokkos::OpenMP" << std::endl ;
Example::sort_array< Kokkos::OpenMP >( length_array , length_total_array );
}
#endif
Kokkos::finalize();
#endif
return 0 ;
}

View File

@ -1,190 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef EXAMPLE_SORT_ARRAY
#define EXAMPLE_SORT_ARRAY
#include <stdlib.h>
#include <algorithm>
#include <Kokkos_Core.hpp>
#include <impl/Kokkos_Timer.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Example {
template< class Device >
struct SortView {
template< typename ValueType >
SortView( const Kokkos::View<ValueType*,Device> v , int begin , int end )
{
std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end );
}
};
}
#if defined(KOKKOS_HAVE_CUDA)
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
namespace Example {
template<>
struct SortView< Kokkos::Cuda > {
template< typename ValueType >
SortView( const Kokkos::View<ValueType*,Kokkos::Cuda> v , int begin , int end )
{
thrust::sort( thrust::device_ptr<ValueType>( v.ptr_on_device() + begin )
, thrust::device_ptr<ValueType>( v.ptr_on_device() + end ) );
}
};
}
#endif
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Example {
template< class Device >
void sort_array( const size_t array_length /* length of spans of array to sort */
, const size_t total_length /* total length of array */
, const int print = 1 )
{
typedef Device execution_space ;
typedef Kokkos::View<int*,Device> device_array_type ;
#if defined( KOKKOS_HAVE_CUDA )
typedef typename
Kokkos::Impl::if_c< Kokkos::Impl::is_same< Device , Kokkos::Cuda >::value
, Kokkos::View<int*,Kokkos::Cuda::array_layout,Kokkos::CudaHostPinnedSpace>
, typename device_array_type::HostMirror
>::type host_array_type ;
#else
typedef typename device_array_type::HostMirror host_array_type ;
#endif
Kokkos::Impl::Timer timer;
const device_array_type work_array("work_array" , array_length );
const host_array_type host_array("host_array" , total_length );
std::cout << "sort_array length( " << total_length << " )"
<< " in chunks( " << array_length << " )"
<< std::endl ;
double sec = timer.seconds();
std::cout << "declaring Views took "
<< sec << " seconds" << std::endl;
timer.reset();
for ( size_t i = 0 ; i < total_length ; ++i ) {
host_array(i) = ( lrand48() * total_length ) >> 31 ;
}
sec = timer.seconds();
std::cout << "initializing " << total_length << " elements on host took "
<< sec << " seconds" << std::endl;
timer.reset();
double sec_copy_in = 0 ;
double sec_sort = 0 ;
double sec_copy_out = 0 ;
double sec_error = 0 ;
size_t error_count = 0 ;
for ( size_t begin = 0 ; begin < total_length ; begin += array_length ) {
const size_t end = begin + array_length < total_length
? begin + array_length : total_length ;
const std::pair<size_t,size_t> host_range(begin,end);
const host_array_type host_subarray = Kokkos::subview( host_array , host_range );
timer.reset();
Kokkos::deep_copy( work_array , host_subarray );
sec_copy_in += timer.seconds(); timer.reset();
SortView< execution_space >( work_array , 0 , end - begin );
sec_sort += timer.seconds(); timer.reset();
Kokkos::deep_copy( host_subarray , work_array );
sec_copy_out += timer.seconds(); timer.reset();
for ( size_t i = begin + 1 ; i < end ; ++i ) {
if ( host_array(i) < host_array(i-1) ) ++error_count ;
}
sec_error += timer.seconds(); timer.reset();
}
std::cout << "copy to device " << sec_copy_in << " seconds" << std::endl
<< "sort on device " << sec_sort << " seconds" << std::endl
<< "copy from device " << sec_copy_out << " seconds" << std::endl
<< "errors " << error_count << " took " << sec_error << " seconds" << std::endl
;
}
} // namespace Example
//----------------------------------------------------------------------------
#endif /* #ifndef EXAMPLE_SORT_ARRAY */

View File

@ -1,11 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
TRIBITS_ADD_EXECUTABLE(
tutorial_01_hello_world
SOURCES hello_world.cpp
COMM serial mpi
)

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,130 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
#include <typeinfo>
//
// "Hello world" parallel_for example:
// 1. Start up Kokkos
// 2. Execute a parallel for loop in the default execution space,
// using a functor to define the loop body
// 3. Shut down Kokkos
//
// If Kokkos was built with C++11 enabled, try comparing this example
// to 01_hello_world_lambda. The latter uses C++11 lambdas (anonymous
// functions) to define the loop body of the parallel_for. That makes
// the code much more concise and readable. On the other hand,
// breaking out the loop body into an explicit functor makes it easier
// to test the loop independently of the parallel pattern.
//
// Functor that defines the parallel_for's loop body.
//
// A "functor" is just a class or struct with a public operator()
// instance method.
struct hello_world {
// If a functor has an "execution_space" (or "execution_space", for
// backwards compatibility) public typedef, parallel_* will only run
// the functor in that execution space. That's a good way to mark a
// functor as specific to an execution space. If the functor lacks
// this typedef, parallel_for will run it in the default execution
// space, unless you tell it otherwise (that's an advanced topic;
// see "execution policies").
// The functor's operator() defines the loop body. It takes an
// integer argument which is the parallel for loop index. Other
// arguments are possible; see the "hierarchical parallelism" part
// of the tutorial.
//
// The operator() method must be const, and must be marked with the
// KOKKOS_INLINE_FUNCTION macro. If building with CUDA, this macro
// will mark your method as suitable for running on the CUDA device
// (as well as on the host). If not building with CUDA, the macro
// is unnecessary but harmless.
KOKKOS_INLINE_FUNCTION
void operator() (const int i) const {
printf ("Hello from i = %i\n", i);
}
};
int main (int argc, char* argv[]) {
// You must call initialize() before you may call Kokkos.
//
// With no arguments, this initializes the default execution space
// (and potentially its host execution space) with default
// parameters. You may also pass in argc and argv, analogously to
// MPI_Init(). It reads and removes command-line arguments that
// start with "--kokkos-".
Kokkos::initialize (argc, argv);
// Print the name of Kokkos' default execution space. We're using
// typeid here, so the name might get a bit mangled by the linker,
// but you should still be able to figure out what it is.
printf ("Hello World on Kokkos execution space %s\n",
typeid (Kokkos::DefaultExecutionSpace).name ());
// Run the above functor on the default Kokkos execution space in
// parallel, with a parallel for loop count of 15.
//
// The Kokkos::DefaultExecutionSpace typedef gives the default
// execution space. Depending on how Kokkos was configured, this
// could be OpenMP, Threads, Cuda, Serial, or even some other
// execution space.
//
// The following line of code would look like this in OpenMP:
//
// #pragma omp parallel for
// for (int i = 0; i < 15; ++i) {
// printf ("Hello from i = %i\n", i);
// }
//
// You may notice that the printed numbers do not print out in
// order. Parallel for loops may execute in any order.
Kokkos::parallel_for ("HelloWorld",15, hello_world ());
// You must call finalize() after you are done using Kokkos.
Kokkos::finalize ();
}

View File

@ -1,13 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
IF (Kokkos_ENABLE_CXX11)
# This is a tutorial, not a test, so we don't ask CTest to run it.
TRIBITS_ADD_EXECUTABLE(
tutorial_01_hello_world_lambda
SOURCES hello_world_lambda.cpp
COMM serial mpi
)
ENDIF ()

View File

@ -1,44 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
KOKKOS_CUDA_OPTIONS = "enable_lambda"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,109 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
#include <typeinfo>
//
// "Hello world" parallel_for example:
// 1. Start up Kokkos
// 2. Execute a parallel for loop in the default execution space,
// using a C++11 lambda to define the loop body
// 3. Shut down Kokkos
//
// This example only builds if C++11 is enabled. Compare this example
// to 01_hello_world, which uses functors (explicitly defined classes)
// to define the loop body of the parallel_for. Both functors and
// lambdas have their places.
//
int main (int argc, char* argv[]) {
// You must call initialize() before you may call Kokkos.
//
// With no arguments, this initializes the default execution space
// (and potentially its host execution space) with default
// parameters. You may also pass in argc and argv, analogously to
// MPI_Init(). It reads and removes command-line arguments that
// start with "--kokkos-".
Kokkos::initialize (argc, argv);
// Print the name of Kokkos' default execution space. We're using
// typeid here, so the name might get a bit mangled by the linker,
// but you should still be able to figure out what it is.
printf ("Hello World on Kokkos execution space %s\n",
typeid (Kokkos::DefaultExecutionSpace).name ());
// Run lambda on the default Kokkos execution space in parallel,
// with a parallel for loop count of 15. The lambda's argument is
// an integer which is the parallel for's loop index. As you learn
// about different kinds of parallelism, you will find out that
// there are other valid argument types as well.
//
// For a single level of parallelism, we prefer that you use the
// KOKKOS_LAMBDA macro. If CUDA is disabled, this just turns into
// [=]. That captures variables from the surrounding scope by
// value. Do NOT capture them by reference! If CUDA is enabled,
// this macro may have a special definition that makes the lambda
// work correctly with CUDA. Compare to the KOKKOS_INLINE_FUNCTION
// macro, which has a special meaning if CUDA is enabled.
//
// The following parallel_for would look like this if we were using
// OpenMP by itself, instead of Kokkos:
//
// #pragma omp parallel for
// for (int i = 0; i < 15; ++i) {
// printf ("Hello from i = %i\n", i);
// }
//
// You may notice that the printed numbers do not print out in
// order. Parallel for loops may execute in any order.
Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) {
// printf works in a CUDA parallel kernel; std::ostream does not.
printf ("Hello from i = %i\n", i);
});
// You must call finalize() after you are done using Kokkos.
Kokkos::finalize ();
}

View File

@ -1,10 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
TRIBITS_ADD_EXECUTABLE(
tutorial_02_simple_reduce
SOURCES simple_reduce.cpp
COMM serial mpi
)

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,101 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
//
// First reduction (parallel_reduce) example:
// 1. Start up Kokkos
// 2. Execute a parallel_reduce loop in the default execution space,
// using a functor to define the loop body
// 3. Shut down Kokkos
//
// Compare this example to 02_simple_reduce_lambda, which uses a C++11
// lambda to define the loop body of the parallel_reduce.
//
// Reduction functor for computing the sum of squares.
//
// More advanced reduction examples will show how to control the
// reduction's "join" operator. If the join operator is not provided,
// it defaults to binary operator+ (adding numbers together).
struct squaresum {
// Specify the type of the reduction value with a "value_type"
// typedef. In this case, the reduction value has type int.
typedef int value_type;
// The reduction functor's operator() looks a little different than
// the parallel_for functor's operator(). For the reduction, we
// pass in both the loop index i, and the intermediate reduction
// value lsum. The latter MUST be passed in by nonconst reference.
// (If the reduction type is an array like int[], indicating an
// array reduction result, then the second argument is just int[].)
KOKKOS_INLINE_FUNCTION
void operator () (const int i, int& lsum) const {
lsum += i*i; // compute the sum of squares
}
};
int main (int argc, char* argv[]) {
Kokkos::initialize (argc, argv);
const int n = 10;
// Compute the sum of squares of integers from 0 to n-1, in
// parallel, using Kokkos.
int sum = 0;
Kokkos::parallel_reduce (n, squaresum (), sum);
printf ("Sum of squares of integers from 0 to %i, "
"computed in parallel, is %i\n", n - 1, sum);
// Compare to a sequential loop.
int seqSum = 0;
for (int i = 0; i < n; ++i) {
seqSum += i*i;
}
printf ("Sum of squares of integers from 0 to %i, "
"computed sequentially, is %i\n", n - 1, seqSum);
Kokkos::finalize ();
return (sum == seqSum) ? 0 : -1;
}

View File

@ -1,12 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
IF (Kokkos_ENABLE_CXX11)
# This is a tutorial, not a test, so we don't ask CTest to run it.
TRIBITS_ADD_EXECUTABLE(
tutorial_02_simple_reduce_lambda
SOURCES simple_reduce_lambda.cpp
COMM serial mpi
)
ENDIF ()

View File

@ -1,44 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
KOKKOS_CUDA_OPTIONS = "enable_lambda"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,86 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_Core.hpp>
#include <cstdio>
//
// First reduction (parallel_reduce) example:
// 1. Start up Kokkos
// 2. Execute a parallel_reduce loop in the default execution space,
// using a C++11 lambda to define the loop body
// 3. Shut down Kokkos
//
// This example only builds if C++11 is enabled. Compare this example
// to 02_simple_reduce, which uses a functor to define the loop body
// of the parallel_reduce.
//
int main (int argc, char* argv[]) {
Kokkos::initialize (argc, argv);
const int n = 10;
// Compute the sum of squares of integers from 0 to n-1, in
// parallel, using Kokkos. This time, use a lambda instead of a
// functor. The lambda takes the same arguments as the functor's
// operator().
int sum = 0;
// The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=].
// It also handles any other syntax needed for CUDA.
Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) {
lsum += i*i;
}, sum);
printf ("Sum of squares of integers from 0 to %i, "
"computed in parallel, is %i\n", n - 1, sum);
// Compare to a sequential loop.
int seqSum = 0;
for (int i = 0; i < n; ++i) {
seqSum += i*i;
}
printf ("Sum of squares of integers from 0 to %i, "
"computed sequentially, is %i\n", n - 1, seqSum);
Kokkos::finalize ();
return (sum == seqSum) ? 0 : -1;
}

View File

@ -1,10 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# This is a tutorial, not a test, so we don't ask CTest to run it.
TRIBITS_ADD_EXECUTABLE(
tutorial_03_simple_view
SOURCES simple_view.cpp
COMM serial mpi
)

View File

@ -1,43 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -1,142 +0,0 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
//
// First Kokkos::View (multidimensional array) example:
// 1. Start up Kokkos
// 2. Allocate a Kokkos::View
// 3. Execute a parallel_for and a parallel_reduce over that View's data
// 4. Shut down Kokkos
//
// Compare this example to 03_simple_view_lambda, which uses C++11
// lambdas to define the loop bodies of the parallel_for and
// parallel_reduce.
//
#include <Kokkos_Core.hpp>
#include <cstdio>
// A Kokkos::View is an array of zero or more dimensions. The number
// of dimensions is specified at compile time, as part of the type of
// the View. This array has two dimensions. The first one
// (represented by the asterisk) is a run-time dimension, and the
// second (represented by [3]) is a compile-time dimension. Thus,
// this View type is an N x 3 array of type double, where N is
// specified at run time in the View's constructor.
//
// The first dimension of the View is the dimension over which it is
// efficient for Kokkos to parallelize.
typedef Kokkos::View<double*[3]> view_type;
// parallel_for functor that fills the View given to its constructor.
// The View must already have been allocated.
struct InitView {
view_type a;
// Views have "view semantics." This means that they behave like
// pointers, not like std::vector. Their copy constructor and
// operator= only do shallow copies. Thus, you can pass View
// objects around by "value"; they won't do a deep copy unless you
// explicitly ask for a deep copy.
InitView (view_type a_) :
a (a_)
{}
// Fill the View with some data. The parallel_for loop will iterate
// over the View's first dimension N.
KOKKOS_INLINE_FUNCTION
void operator () (const int i) const {
// Acesss the View just like a Fortran array. The layout depends
// on the View's memory space, so don't rely on the View's
// physical memory layout unless you know what you're doing.
a(i,0) = 1.0*i;
a(i,1) = 1.0*i*i;
a(i,2) = 1.0*i*i*i;
}
};
// Reduction functor that reads the View given to its constructor.
struct ReduceFunctor {
view_type a;
// Constructor takes View by "value"; this does a shallow copy.
ReduceFunctor (view_type a_) : a (a_) {}
// If you write a functor to do a reduction, you must specify the
// type of the reduction result via a public 'value_type' typedef.
typedef double value_type;
KOKKOS_INLINE_FUNCTION
void operator() (int i, double &lsum) const {
lsum += a(i,0)*a(i,1)/(a(i,2)+0.1);
}
};
int main (int argc, char* argv[]) {
Kokkos::initialize (argc, argv);
const int N = 10;
// Allocate the View. The first dimension is a run-time parameter
// N. We set N = 10 here. The second dimension is a compile-time
// parameter, 3. We don't specify it here because we already set it
// by declaring the type of the View.
//
// Views get initialized to zero by default. This happens in
// parallel, using the View's memory space's default execution
// space. Parallel initialization ensures first-touch allocation.
// There is a way to shut off default initialization.
//
// You may NOT allocate a View inside of a parallel_{for, reduce,
// scan}. Treat View allocation as a "thread collective."
//
// The string "A" is just the label; it only matters for debugging.
// Different Views may have the same label.
view_type a ("A", N);
Kokkos::parallel_for (N, InitView (a));
double sum = 0;
Kokkos::parallel_reduce (N, ReduceFunctor (a), sum);
printf ("Result: %f\n", sum);
Kokkos::finalize ();
}

View File

@ -1,12 +0,0 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
IF (Kokkos_ENABLE_CXX11)
# This is a tutorial, not a test, so we don't ask CTest to run it.
TRIBITS_ADD_EXECUTABLE(
tutorial_03_simple_view_lambda
SOURCES simple_view_lambda.cpp
COMM serial mpi
)
ENDIF ()

View File

@ -1,44 +0,0 @@
KOKKOS_PATH = ../../..
SRC = $(wildcard *.cpp)
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = nvcc_wrapper
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.cuda)
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
KOKKOS_CUDA_OPTIONS = "enable_lambda"
else
CXX = g++
CXXFLAGS = -O3
LINK = ${CXX}
LINKFLAGS =
EXE = $(SRC:.cpp=.host)
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
DEPFLAGS = -M
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

Some files were not shown because too many files have changed in this diff Show More