Updating kokkos lib
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14918 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,20 +0,0 @@
|
||||
|
||||
|
||||
# Subpackage name must match what appears in kokkos/cmake/Dependencies.cmake
|
||||
#
|
||||
TRIBITS_SUBPACKAGE(Example)
|
||||
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(query_device)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(fixture)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(feint)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(fenl)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(multi_fem)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(md_skeleton)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(global_2_local_ids)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(grow_array)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(sort_array)
|
||||
if(NOT Kokkos_ENABLE_Cuda)
|
||||
TRIBITS_ADD_EXAMPLE_DIRECTORIES(tutorial)
|
||||
endif()
|
||||
TRIBITS_SUBPACKAGE_POSTPROCESS()
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
This directory contains example application proxies that use different
|
||||
parts of Kokkos. If you are looking for the FENL ("finite element
|
||||
nonlinear" solve) example, it has moved into the LinAlg subpackage of
|
||||
Tpetra.
|
||||
|
||||
MANIFEST:
|
||||
|
||||
- common: Header files used by different examples
|
||||
- feint: Unstructured finite-element method
|
||||
- fixture: Some other finite-element method example
|
||||
- global_2_local_ids: Example of global-to-local index lookup
|
||||
- grow_array: Parallel dynamic memory allocation
|
||||
- md_skeleton: Molecular dynamics
|
||||
- query_device: Kokkos' HWLOC wrapper for querying device topology
|
||||
- sort_array: Parallel sort
|
||||
- tutorial: Kokkos tutorial (START HERE)
|
||||
@ -1,4 +0,0 @@
|
||||
TRIBITS_PACKAGE_DEFINE_DEPENDENCIES(
|
||||
LIB_REQUIRED_DEP_PACKAGES KokkosCore KokkosContainers KokkosAlgorithms
|
||||
TEST_OPTIONAL_DEP_TPLS CUSPARSE MKL
|
||||
)
|
||||
@ -1,294 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_VECTORIMPORT_HPP
|
||||
#define KOKKOS_VECTORIMPORT_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <WrapMPI.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template< class CommMessageType , class CommIdentType , class VectorType >
|
||||
struct VectorImport ;
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
#if ! defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template< class CommMessageType , class CommIdentType , class VectorType >
|
||||
struct VectorImport {
|
||||
|
||||
const MPI_Comm comm ;
|
||||
const unsigned count_owned ;
|
||||
const unsigned count_receive ;
|
||||
|
||||
VectorImport( MPI_Comm arg_comm ,
|
||||
const CommMessageType & ,
|
||||
const CommMessageType & ,
|
||||
const CommIdentType & ,
|
||||
const unsigned arg_count_owned ,
|
||||
const unsigned arg_count_receive )
|
||||
: comm( arg_comm )
|
||||
, count_owned( arg_count_owned )
|
||||
, count_receive( arg_count_receive )
|
||||
{}
|
||||
|
||||
inline
|
||||
void operator()( const VectorType & ) const {}
|
||||
};
|
||||
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#else /* defined( KOKKOS_HAVE_MPI ) */
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template< class CommMessageType , class CommIdentType , class VectorType >
|
||||
class VectorImport {
|
||||
private:
|
||||
|
||||
// rank == 1 or array_layout == LayoutRight
|
||||
enum { OK = Kokkos::Impl::StaticAssert<
|
||||
( VectorType::rank == 1 ) ||
|
||||
Kokkos::Impl::is_same< typename VectorType::array_layout , Kokkos::LayoutRight >::value
|
||||
>::value };
|
||||
|
||||
typedef typename VectorType::HostMirror HostVectorType ;
|
||||
|
||||
enum { ReceiveInPlace =
|
||||
Kokkos::Impl::is_same< typename VectorType::memory_space ,
|
||||
typename HostVectorType::memory_space >::value };
|
||||
|
||||
const CommMessageType recv_msg ;
|
||||
const CommMessageType send_msg ;
|
||||
const CommIdentType send_nodeid ;
|
||||
VectorType send_buffer ;
|
||||
HostVectorType host_send_buffer ;
|
||||
HostVectorType host_recv_buffer ;
|
||||
unsigned chunk ;
|
||||
|
||||
public:
|
||||
|
||||
const MPI_Comm comm ;
|
||||
const unsigned count_owned ;
|
||||
const unsigned count_receive ;
|
||||
|
||||
struct Pack {
|
||||
typedef typename VectorType::execution_space execution_space ;
|
||||
const CommIdentType index ;
|
||||
const VectorType source ;
|
||||
const VectorType buffer ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const unsigned i ) const
|
||||
{ buffer( i ) = source( index(i) ); }
|
||||
|
||||
Pack( const CommIdentType & arg_index ,
|
||||
const VectorType & arg_source ,
|
||||
const VectorType & arg_buffer )
|
||||
: index( arg_index )
|
||||
, source( arg_source )
|
||||
, buffer( arg_buffer )
|
||||
{
|
||||
Kokkos::parallel_for( index.dimension_0() , *this );
|
||||
execution_space::fence();
|
||||
}
|
||||
};
|
||||
|
||||
VectorImport( MPI_Comm arg_comm ,
|
||||
const CommMessageType & arg_recv_msg ,
|
||||
const CommMessageType & arg_send_msg ,
|
||||
const CommIdentType & arg_send_nodeid ,
|
||||
const unsigned arg_count_owned ,
|
||||
const unsigned arg_count_receive )
|
||||
: recv_msg( arg_recv_msg )
|
||||
, send_msg( arg_send_msg )
|
||||
, send_nodeid( arg_send_nodeid )
|
||||
, send_buffer()
|
||||
, host_send_buffer()
|
||||
, host_recv_buffer()
|
||||
, comm( arg_comm )
|
||||
, count_owned( arg_count_owned )
|
||||
, count_receive( arg_count_receive )
|
||||
{
|
||||
if ( ! ReceiveInPlace ) {
|
||||
host_recv_buffer = HostVectorType("recv_buffer",count_receive);
|
||||
}
|
||||
|
||||
unsigned send_count = 0 ;
|
||||
for ( unsigned i = 0 ; i < send_msg.dimension_0() ; ++i ) { send_count += send_msg(i,1); }
|
||||
send_buffer = VectorType("send_buffer",send_count);
|
||||
host_send_buffer = Kokkos::create_mirror_view( send_buffer );
|
||||
}
|
||||
|
||||
inline
|
||||
void operator()( const VectorType & v ) const
|
||||
{
|
||||
typedef typename VectorType::value_type scalar_type ;
|
||||
|
||||
const int mpi_tag = 42 ;
|
||||
const unsigned chunk = v.dimension_1();
|
||||
|
||||
// Subvector for receives
|
||||
const std::pair<unsigned,unsigned> recv_range( count_owned , count_owned + count_receive );
|
||||
const VectorType recv_vector = Kokkos::subview( v , recv_range );
|
||||
|
||||
std::vector< MPI_Request > recv_request( recv_msg.dimension_0() , MPI_REQUEST_NULL );
|
||||
|
||||
{ // Post receives
|
||||
scalar_type * ptr =
|
||||
ReceiveInPlace ? recv_vector.ptr_on_device() : host_recv_buffer.ptr_on_device();
|
||||
|
||||
for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) {
|
||||
const int proc = recv_msg(i,0);
|
||||
const int count = recv_msg(i,1) * chunk ;
|
||||
|
||||
MPI_Irecv( ptr , count * sizeof(scalar_type) , MPI_BYTE ,
|
||||
proc , mpi_tag , comm , & recv_request[i] );
|
||||
|
||||
ptr += count ;
|
||||
}
|
||||
}
|
||||
|
||||
MPI_Barrier( comm );
|
||||
|
||||
{ // Pack and send
|
||||
const Pack pack( send_nodeid , v , send_buffer );
|
||||
|
||||
Kokkos::deep_copy( host_send_buffer , send_buffer );
|
||||
|
||||
scalar_type * ptr = host_send_buffer.ptr_on_device();
|
||||
|
||||
for ( size_t i = 0 ; i < send_msg.dimension_0() ; ++i ) {
|
||||
const int proc = send_msg(i,0);
|
||||
const int count = send_msg(i,1) * chunk ;
|
||||
|
||||
// MPI_Ssend blocks until
|
||||
// (1) a receive is matched for the message and
|
||||
// (2) the send buffer can be re-used.
|
||||
//
|
||||
// It is suggested that MPI_Ssend will have the best performance:
|
||||
// http://www.mcs.anl.gov/research/projects/mpi/sendmode.html .
|
||||
|
||||
MPI_Ssend( ptr ,
|
||||
count * sizeof(scalar_type) , MPI_BYTE ,
|
||||
proc , mpi_tag , comm );
|
||||
|
||||
ptr += count ;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for receives and verify:
|
||||
|
||||
for ( size_t i = 0 ; i < recv_msg.dimension_0() ; ++i ) {
|
||||
MPI_Status recv_status ;
|
||||
int recv_which = 0 ;
|
||||
int recv_size = 0 ;
|
||||
|
||||
MPI_Waitany( recv_msg.dimension_0() , & recv_request[0] , & recv_which , & recv_status );
|
||||
|
||||
const int recv_proc = recv_status.MPI_SOURCE ;
|
||||
|
||||
MPI_Get_count( & recv_status , MPI_BYTE , & recv_size );
|
||||
|
||||
// Verify message properly received:
|
||||
|
||||
const int expected_proc = recv_msg(recv_which,0);
|
||||
const int expected_size = recv_msg(recv_which,1) * chunk * sizeof(scalar_type);
|
||||
|
||||
if ( ( expected_proc != recv_proc ) ||
|
||||
( expected_size != recv_size ) ) {
|
||||
|
||||
int local_rank = 0 ;
|
||||
|
||||
MPI_Comm_rank( comm , & local_rank );
|
||||
|
||||
std::ostringstream msg ;
|
||||
msg << "VectorImport error:"
|
||||
<< " P" << local_rank
|
||||
<< " received from P" << recv_proc
|
||||
<< " size " << recv_size
|
||||
<< " expected " << expected_size
|
||||
<< " from P" << expected_proc ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
}
|
||||
|
||||
// Copy received data to device memory.
|
||||
|
||||
if ( ! ReceiveInPlace ) { Kokkos::deep_copy( recv_vector , host_recv_buffer ); }
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_VECTORIMPORT_HPP */
|
||||
|
||||
|
||||
@ -1,103 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_WRAP_MPI
|
||||
#define KOKKOS_EXAMPLE_WRAP_MPI
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
#include <string>
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
inline
|
||||
double all_reduce( double value , MPI_Comm comm )
|
||||
{
|
||||
double local = value ;
|
||||
MPI_Allreduce( & local , & value , 1 , MPI_DOUBLE , MPI_SUM , comm );
|
||||
return value ;
|
||||
}
|
||||
|
||||
inline
|
||||
double all_reduce_max( double value , MPI_Comm comm )
|
||||
{
|
||||
double local = value ;
|
||||
MPI_Allreduce( & local , & value , 1 , MPI_DOUBLE , MPI_MAX , comm );
|
||||
return value ;
|
||||
}
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
#elif ! defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
/* Wrap the the MPI_Comm type and heavily used MPI functions
|
||||
* to reduce the number of '#if defined( KOKKOS_HAVE_MPI )'
|
||||
* blocks which have to be sprinkled throughout the examples.
|
||||
*/
|
||||
|
||||
typedef int MPI_Comm ;
|
||||
|
||||
inline int MPI_Comm_size( MPI_Comm , int * size ) { *size = 1 ; return 0 ; }
|
||||
inline int MPI_Comm_rank( MPI_Comm , int * rank ) { *rank = 0 ; return 0 ; }
|
||||
inline int MPI_Barrier( MPI_Comm ) { return 0; }
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
inline
|
||||
double all_reduce( double value , MPI_Comm ) { return value ; }
|
||||
|
||||
inline
|
||||
double all_reduce_max( double value , MPI_Comm ) { return value ; }
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* ! defined( KOKKOS_HAVE_MPI ) */
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_WRAP_MPI */
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture)
|
||||
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE(GLOB SOURCES *.cpp)
|
||||
|
||||
LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
feint
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
)
|
||||
|
||||
@ -1,489 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP
|
||||
#define KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP
|
||||
|
||||
#include <stdio.h>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <BoxElemFixture.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
/** \brief Numerically integrate a function on a finite element mesh and
|
||||
* project the integrated values to nodes.
|
||||
*/
|
||||
template< class FixtureType ,
|
||||
class FunctionType ,
|
||||
bool PerformScatterAddWithAtomic >
|
||||
struct FiniteElementIntegration ;
|
||||
|
||||
// Specialized for an 'Example::BoxElemFixture' finite element mesh
|
||||
template< class Device , BoxElemPart::ElemOrder ElemOrder , class GridMap ,
|
||||
class FunctionType ,
|
||||
bool PerformScatterAddWithAtomic >
|
||||
struct FiniteElementIntegration<
|
||||
Kokkos::Example::BoxElemFixture< Device , ElemOrder , GridMap > ,
|
||||
FunctionType ,
|
||||
PerformScatterAddWithAtomic >
|
||||
{
|
||||
// Element mesh types:
|
||||
typedef Kokkos::Example::BoxElemFixture< Device , ElemOrder >
|
||||
BoxFixtureType ;
|
||||
|
||||
typedef Kokkos::Example::HexElement_Data< BoxFixtureType::ElemNode >
|
||||
HexElemDataType ;
|
||||
|
||||
enum { ElemNodeCount = HexElemDataType::element_node_count };
|
||||
enum { IntegrationCount = HexElemDataType::integration_count };
|
||||
enum { ValueCount = FunctionType::value_count };
|
||||
|
||||
// Dictionary of view types:
|
||||
typedef View<int*, Device> ElemErrorType ;
|
||||
typedef View<double*[ElemNodeCount][ValueCount],Device> ElemValueType ;
|
||||
typedef View<double*[ValueCount], Device> NodeValueType ;
|
||||
|
||||
// Data members for this Functor:
|
||||
const HexElemDataType m_hex_elem_data ; ///< Master element
|
||||
const BoxFixtureType m_box_fixture ; ///< Unstructured mesh data
|
||||
const FunctionType m_function ; ///< Function to integrate
|
||||
const ElemErrorType m_elem_error ; ///< Flags for element errors
|
||||
const ElemValueType m_elem_integral ; ///< Per-element quantities
|
||||
const NodeValueType m_node_lumped ; ///< Quantities lumped to nodes
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
FiniteElementIntegration(
|
||||
const BoxFixtureType & box_fixture ,
|
||||
const FunctionType & function )
|
||||
: m_hex_elem_data()
|
||||
, m_box_fixture( box_fixture ) // Shallow copy of the mesh fixture
|
||||
, m_function( function )
|
||||
, m_elem_error( "elem_error" , box_fixture.elem_count() )
|
||||
, m_elem_integral( "elem_integral" , box_fixture.elem_count() )
|
||||
, m_node_lumped( "node_lumped" , box_fixture.node_count() )
|
||||
{}
|
||||
|
||||
//----------------------------------------
|
||||
// Device for parallel dispatch.
|
||||
typedef typename Device::execution_space execution_space;
|
||||
|
||||
// Value type for global parallel reduction.
|
||||
struct value_type {
|
||||
double value[ ValueCount ]; ///< Integrated quantitie
|
||||
int error ; ///< Element inversion flag
|
||||
};
|
||||
|
||||
//----------------------------------------
|
||||
// Transform element interpolation function gradients and
|
||||
// compute determinant of spatial jacobian.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float transform_gradients(
|
||||
const float grad[][ ElemNodeCount ] , // Gradient of bases master element
|
||||
const double coord[][ ElemNodeCount ] ,
|
||||
float dpsi[][ ElemNodeCount ] ) const
|
||||
{
|
||||
enum { TensorDim = 9 };
|
||||
enum { j11 = 0 , j12 = 1 , j13 = 2 ,
|
||||
j21 = 3 , j22 = 4 , j23 = 5 ,
|
||||
j31 = 6 , j32 = 7 , j33 = 8 };
|
||||
|
||||
// Temporary for jacobian accumulation is double for summation accuracy.
|
||||
double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
for( int i = 0; i < ElemNodeCount ; ++i ) {
|
||||
J[j11] += grad[0][i] * coord[0][i] ;
|
||||
J[j12] += grad[0][i] * coord[1][i] ;
|
||||
J[j13] += grad[0][i] * coord[2][i] ;
|
||||
|
||||
J[j21] += grad[1][i] * coord[0][i] ;
|
||||
J[j22] += grad[1][i] * coord[1][i] ;
|
||||
J[j23] += grad[1][i] * coord[2][i] ;
|
||||
|
||||
J[j31] += grad[2][i] * coord[0][i] ;
|
||||
J[j32] += grad[2][i] * coord[1][i] ;
|
||||
J[j33] += grad[2][i] * coord[2][i] ;
|
||||
}
|
||||
|
||||
// Inverse jacobian, compute as double and store as float.
|
||||
float invJ[ TensorDim ] = {
|
||||
float( J[j22] * J[j33] - J[j23] * J[j32] ) ,
|
||||
float( J[j13] * J[j32] - J[j12] * J[j33] ) ,
|
||||
float( J[j12] * J[j23] - J[j13] * J[j22] ) ,
|
||||
|
||||
float( J[j23] * J[j31] - J[j21] * J[j33] ) ,
|
||||
float( J[j11] * J[j33] - J[j13] * J[j31] ) ,
|
||||
float( J[j13] * J[j21] - J[j11] * J[j23] ) ,
|
||||
|
||||
float( J[j21] * J[j32] - J[j22] * J[j31] ) ,
|
||||
float( J[j12] * J[j31] - J[j11] * J[j32] ) ,
|
||||
float( J[j11] * J[j22] - J[j12] * J[j21] ) };
|
||||
|
||||
const float detJ = J[j11] * invJ[j11] +
|
||||
J[j21] * invJ[j12] +
|
||||
J[j31] * invJ[j13] ;
|
||||
|
||||
{
|
||||
const float detJinv = 1.0 / detJ ;
|
||||
for ( int i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
|
||||
}
|
||||
|
||||
// Transform gradients:
|
||||
for ( int i = 0; i < ElemNodeCount ; ++i ) {
|
||||
dpsi[0][i] = grad[0][i] * invJ[j11] +
|
||||
grad[1][i] * invJ[j12] +
|
||||
grad[2][i] * invJ[j13];
|
||||
dpsi[1][i] = grad[0][i] * invJ[j21] +
|
||||
grad[1][i] * invJ[j22] +
|
||||
grad[2][i] * invJ[j23];
|
||||
dpsi[2][i] = grad[0][i] * invJ[j31] +
|
||||
grad[1][i] * invJ[j32] +
|
||||
grad[2][i] * invJ[j33];
|
||||
}
|
||||
|
||||
return detJ ;
|
||||
}
|
||||
|
||||
// Functor's function called for each element in the mesh
|
||||
// to numerically integrate the function and add element quantities
|
||||
// to the global integral.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int ielem , value_type & update ) const
|
||||
{
|
||||
// Local temporaries for gathering nodal data.
|
||||
double node_coord[3][ ElemNodeCount ];
|
||||
|
||||
int inode[ ElemNodeCount ] ;
|
||||
|
||||
// Gather indices of element's node from global memory to local memory.
|
||||
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
|
||||
inode[i] = m_box_fixture.elem_node( ielem , i );
|
||||
}
|
||||
|
||||
// Gather coordinates of element's nodes from global memory to local memory.
|
||||
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
|
||||
node_coord[0][i] = m_box_fixture.node_coord( inode[i] , 0 );
|
||||
node_coord[1][i] = m_box_fixture.node_coord( inode[i] , 1 );
|
||||
node_coord[2][i] = m_box_fixture.node_coord( inode[i] , 2 );
|
||||
}
|
||||
|
||||
// Local temporary to accumulate numerical integration
|
||||
// of vector valued function.
|
||||
double accum[ ValueCount ];
|
||||
|
||||
for ( int j = 0 ; j < ValueCount ; ++j ) { accum[j] = 0 ; }
|
||||
|
||||
int error = 0 ;
|
||||
|
||||
// Numerical integration loop for this element:
|
||||
for ( int k = 0 ; k < IntegrationCount ; ++k ) {
|
||||
|
||||
// Integration point in space as interpolated from nodal coordinates:
|
||||
double point[3] = { 0 , 0 , 0 };
|
||||
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
|
||||
point[0] += node_coord[0][i] * m_hex_elem_data.values[k][i] ;
|
||||
point[1] += node_coord[1][i] * m_hex_elem_data.values[k][i] ;
|
||||
point[2] += node_coord[2][i] * m_hex_elem_data.values[k][i] ;
|
||||
}
|
||||
|
||||
// Example function vector value at cubature point:
|
||||
double val_at_pt[ ValueCount ];
|
||||
m_function( point , val_at_pt );
|
||||
|
||||
// Temporary array for transformed element basis functions' gradient.
|
||||
// Not used in this example, but computed anyway by the more general
|
||||
// deformation function.
|
||||
float dpsi[3][ ElemNodeCount ];
|
||||
|
||||
// Compute deformation jacobian, transform basis function gradient,
|
||||
// and return determinant of deformation jacobian.
|
||||
float detJ = transform_gradients( m_hex_elem_data.gradients[k] ,
|
||||
node_coord , dpsi );
|
||||
|
||||
// Check for inverted spatial jacobian
|
||||
if ( detJ <= 0 ) { error = 1 ; detJ = 0 ; }
|
||||
|
||||
// Integration weight.
|
||||
const float w = m_hex_elem_data.weights[k] * detJ ;
|
||||
|
||||
// Cubature of function.
|
||||
for ( int j = 0 ; j < ValueCount ; ++j ) {
|
||||
accum[j] += val_at_pt[j] * w ;
|
||||
}
|
||||
}
|
||||
|
||||
m_elem_error(ielem) = error ;
|
||||
|
||||
|
||||
// Element contribution to global integral:
|
||||
|
||||
if ( error ) { update.error = 1 ; }
|
||||
|
||||
for ( int j = 0 ; j < ValueCount ; ++j ) { update.value[j] += accum[j] ; }
|
||||
|
||||
// Element-node quantity for lumping to nodes:
|
||||
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
|
||||
for ( int j = 0 ; j < ValueCount ; ++j ) {
|
||||
// Save element's integral apportionment to nodes to global memory
|
||||
m_elem_integral( ielem , i , j ) = accum[j] / ElemNodeCount ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( PerformScatterAddWithAtomic ) {
|
||||
// Option to immediately scatter-add the integrated quantities to nodes.
|
||||
// This is a race condition as two or more threads could attempt
|
||||
// concurrent update of nodal values. The atomic_fetch_add (+=)
|
||||
// function guarantees that the summation will occur correctly;
|
||||
// however, there can be no guarantee for the order of summation.
|
||||
// Due to non-associativity of floating point arithmetic the result
|
||||
// is non-deterministic within bounds of floating point round-off.
|
||||
|
||||
for ( int i = 0 ; i < ElemNodeCount ; ++i ) {
|
||||
for ( int j = 0 ; j < ValueCount ; ++j ) {
|
||||
Kokkos::atomic_fetch_add( & m_node_lumped( inode[i] , j ) ,
|
||||
m_elem_integral( ielem , i , j ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
// Initialization of the global reduction value.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init( value_type & update ) const
|
||||
{
|
||||
for ( int j = 0 ; j < ValueCount ; ++j ) update.value[j] = 0 ;
|
||||
update.error = 0 ;
|
||||
}
|
||||
|
||||
// Join two contributions to global reduction value.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join( volatile value_type & update ,
|
||||
volatile const value_type & input ) const
|
||||
{
|
||||
for ( int j = 0 ; j < ValueCount ; ++j ) update.value[j] += input.value[j] ;
|
||||
if ( input.error ) update.error = 1 ;
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template< class ViewElemNode ,
|
||||
class ViewNodeScan ,
|
||||
class ViewNodeElem >
|
||||
void map_node_to_elem( const ViewElemNode & elem_node ,
|
||||
const ViewNodeScan & node_scan ,
|
||||
const ViewNodeElem & node_elem );
|
||||
|
||||
/** \brief Functor to gather-sum elements' per-node quantities
|
||||
* to element nodes. Gather-sum is thread safe and
|
||||
* does not require atomic updates.
|
||||
*/
|
||||
template< class ViewNodeValue ,
|
||||
class ViewElemValue ,
|
||||
bool AlreadyUsedAtomic >
|
||||
struct LumpElemToNode {
|
||||
|
||||
typedef typename ViewElemValue::execution_space execution_space ;
|
||||
|
||||
// In this example we know that the ViewElemValue
|
||||
// array specification is < double*[nNode][nValue] >
|
||||
|
||||
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
|
||||
enum { value_count = ViewElemValue::dimension::N2 };
|
||||
#else
|
||||
enum { value_count = ViewElemValue::shape_type::N2 };
|
||||
#endif
|
||||
|
||||
ViewNodeValue m_node_value ; ///< Integrated values at nodes
|
||||
ViewElemValue m_elem_value ; ///< Values apportioned to nodes
|
||||
View<int*, execution_space> m_node_scan ; ///< Offsets for nodes->element
|
||||
View<int*[2],execution_space> m_node_elem ; ///< Node->element connectivity
|
||||
|
||||
// Only allocate node->element connectivity if have
|
||||
// not already used atomic updates for the nodes.
|
||||
template< class ViewElemNode >
|
||||
LumpElemToNode( const ViewNodeValue & node_value ,
|
||||
const ViewElemValue & elem_value ,
|
||||
const ViewElemNode & elem_node )
|
||||
: m_node_value( node_value )
|
||||
, m_elem_value( elem_value )
|
||||
, m_node_scan( "node_scan" ,
|
||||
AlreadyUsedAtomic ? 0 : node_value.dimension_0() + 1 )
|
||||
, m_node_elem( "node_elem" ,
|
||||
AlreadyUsedAtomic ? 0 : elem_node.dimension_0() *
|
||||
elem_node.dimension_1() )
|
||||
{
|
||||
if ( ! AlreadyUsedAtomic ) {
|
||||
map_node_to_elem( elem_node , m_node_scan , m_node_elem );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
struct value_type { double value[ value_count ]; };
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int inode , value_type & update ) const
|
||||
{
|
||||
if ( ! AlreadyUsedAtomic ) {
|
||||
// Sum element quantities to a local variable.
|
||||
value_type local ;
|
||||
for ( int j = 0 ; j < value_count ; ++j ) { local.value[j] = 0 ; }
|
||||
|
||||
{
|
||||
// nodes' element ids span [i,end)
|
||||
int i = m_node_scan(inode);
|
||||
const int end = m_node_scan(inode+1);
|
||||
|
||||
for ( ; i < end ; ++i ) {
|
||||
// element #ielem , local node #ielem_node is this node:
|
||||
const int ielem = m_node_elem(i,0);
|
||||
const int ielem_node = m_node_elem(i,1);
|
||||
// Sum the vector-values quantity
|
||||
for ( int j = 0 ; j < value_count ; ++j ) {
|
||||
local.value[j] += m_elem_value( ielem , ielem_node , j );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assign nodal quantity (no race condition).
|
||||
// Sum global value.
|
||||
for ( int j = 0 ; j < value_count ; ++j ) {
|
||||
m_node_value( inode , j ) = local.value[j] ;
|
||||
update.value[j] += local.value[j] ;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Already used atomic update of the nodal quantity,
|
||||
// query and sum the value.
|
||||
for ( int j = 0 ; j < value_count ; ++j ) {
|
||||
update.value[j] += m_node_value( inode , j );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init( value_type & update ) const
|
||||
{ for ( int j = 0 ; j < value_count ; ++j ) { update.value[j] = 0 ; } }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join( volatile value_type & update ,
|
||||
volatile const value_type & input ) const
|
||||
{
|
||||
for ( int j = 0 ; j < value_count ; ++j ) {
|
||||
update.value[j] += input.value[j] ;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ViewElemNode ,
|
||||
class ViewNodeScan ,
|
||||
class ViewNodeElem >
|
||||
void map_node_to_elem( const ViewElemNode & elem_node ,
|
||||
const ViewNodeScan & node_scan ,
|
||||
const ViewNodeElem & node_elem )
|
||||
{
|
||||
typedef typename ViewElemNode::host_mirror_space host_mirror_space ;
|
||||
|
||||
const typename ViewElemNode::HostMirror host_elem_node =
|
||||
Kokkos::create_mirror_view(elem_node);
|
||||
|
||||
const typename ViewNodeScan::HostMirror host_node_scan =
|
||||
Kokkos::create_mirror_view(node_scan);
|
||||
|
||||
const typename ViewNodeElem::HostMirror host_node_elem =
|
||||
Kokkos::create_mirror_view(node_elem);
|
||||
|
||||
const int elem_count = host_elem_node.dimension_0();
|
||||
const int elem_node_count = host_elem_node.dimension_1();
|
||||
const int node_count = host_node_scan.dimension_0() - 1 ;
|
||||
|
||||
const View<int*, host_mirror_space >
|
||||
node_elem_count( "node_elem_count" , node_count );
|
||||
|
||||
Kokkos::deep_copy( host_elem_node , elem_node );
|
||||
|
||||
for ( int i = 0 ; i < elem_count ; ++i ) {
|
||||
for ( int j = 0 ; j < elem_node_count ; ++j ) {
|
||||
++node_elem_count( host_elem_node(i,j) );
|
||||
}
|
||||
}
|
||||
|
||||
for ( int i = 0 ; i < node_count ; ++i ) {
|
||||
host_node_scan(i+1) += host_node_scan(i) + node_elem_count(i);
|
||||
node_elem_count(i) = 0 ;
|
||||
}
|
||||
|
||||
for ( int i = 0 ; i < elem_count ; ++i ) {
|
||||
for ( int j = 0 ; j < elem_node_count ; ++j ) {
|
||||
const int inode = host_elem_node(i,j);
|
||||
const int offset = host_node_scan(inode) + node_elem_count(inode);
|
||||
|
||||
host_node_elem( offset , 0 ) = i ;
|
||||
host_node_elem( offset , 1 ) = j ;
|
||||
|
||||
++node_elem_count(inode);
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::deep_copy( node_scan , host_node_scan );
|
||||
Kokkos::deep_copy( node_elem , host_node_elem );
|
||||
}
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP */
|
||||
|
||||
@ -1,62 +0,0 @@
|
||||
KOKKOS_PATH = ../..
|
||||
|
||||
vpath %.cpp ${KOKKOS_PATH}/example/fixture ${KOKKOS_PATH}/example/feint
|
||||
|
||||
EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp ${KOKKOS_PATH}/example/feint/*.hpp)
|
||||
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += \
|
||||
-I${KOKKOS_PATH}/example/common \
|
||||
-I${KOKKOS_PATH}/example/fixture \
|
||||
-I${KOKKOS_PATH}/example/feint
|
||||
|
||||
|
||||
EXE_EXAMPLE_FEINT = KokkosExample_Feint
|
||||
OBJ_EXAMPLE_FEINT = BoxElemPart.o main.o
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
OBJ_EXAMPLE_FEINT += feint_cuda.o
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
OBJ_EXAMPLE_FEINT += feint_threads.o
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
OBJ_EXAMPLE_FEINT += feint_openmp.o
|
||||
endif
|
||||
|
||||
TARGETS = $(EXE_EXAMPLE_FEINT)
|
||||
|
||||
#TEST_TARGETS =
|
||||
|
||||
$(EXE_EXAMPLE_FEINT) : $(OBJ_EXAMPLE_FEINT) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FEINT) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FEINT)
|
||||
|
||||
build_all : $(TARGETS)
|
||||
|
||||
|
||||
test : build_all
|
||||
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,165 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_FEINT_HPP
|
||||
#define KOKKOS_EXAMPLE_FEINT_HPP
|
||||
|
||||
#include <iostream>
|
||||
#include <BoxElemFixture.hpp>
|
||||
#include <ElemFunctor.hpp>
|
||||
#include <feint_fwd.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
/** \brief Vector valued function to numerically integrate.
|
||||
*
|
||||
* F(X) = { 1 , x , y , z , x*y , y*z , z*x , x*y*z }
|
||||
*
|
||||
* Integrates on a unit cube to:
|
||||
* { 1 , 1/2 , 1/2 , 1/2 , 1/4 , 1/4 , 1/4 , 1/8 }
|
||||
*/
|
||||
struct MyFunctionType {
|
||||
|
||||
enum { value_count = 8 };
|
||||
|
||||
// Evaluate function at coordinate.
|
||||
template< typename CoordType , typename ValueType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const CoordType point[] , ValueType value[] ) const
|
||||
{
|
||||
value[0] = 1 ;
|
||||
value[1] = point[0] ;
|
||||
value[2] = point[1] ;
|
||||
value[3] = point[2] ;
|
||||
value[4] = point[0] * point[1] ;
|
||||
value[5] = point[1] * point[2] ;
|
||||
value[6] = point[2] * point[0] ;
|
||||
value[7] = point[0] * point[1] * point[2] ;
|
||||
}
|
||||
};
|
||||
|
||||
template < class Device , bool UseAtomic >
|
||||
void feint(
|
||||
const unsigned global_elem_nx ,
|
||||
const unsigned global_elem_ny ,
|
||||
const unsigned global_elem_nz )
|
||||
{
|
||||
//----------------------------------------
|
||||
// Create the unstructured finite element mesh box fixture on the device:
|
||||
|
||||
typedef Kokkos::Example::
|
||||
BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear >
|
||||
// BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
|
||||
BoxFixtureType ;
|
||||
|
||||
// MPI distributed parallel domain decomposition of the fixture.
|
||||
// Either by element (DecomposeElem) or by node (DecomposeNode)
|
||||
// with ghosted elements.
|
||||
|
||||
static const Kokkos::Example::BoxElemPart::Decompose
|
||||
decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ;
|
||||
// decompose = Kokkos::Example::BoxElemPart:: DecomposeNode ;
|
||||
|
||||
// Not using MPI in this example.
|
||||
const unsigned mpi_rank = 0 ;
|
||||
const unsigned mpi_size = 1 ;
|
||||
|
||||
const BoxFixtureType fixture( decompose , mpi_size , mpi_rank ,
|
||||
global_elem_nx ,
|
||||
global_elem_ny ,
|
||||
global_elem_nz );
|
||||
|
||||
//----------------------------------------
|
||||
// Create and execute the numerical integration functor on the device:
|
||||
|
||||
typedef Kokkos::Example::
|
||||
FiniteElementIntegration< BoxFixtureType , MyFunctionType , UseAtomic >
|
||||
FeintType ;
|
||||
|
||||
const FeintType feint( fixture , MyFunctionType() );
|
||||
|
||||
typename FeintType::value_type elem_integral ;
|
||||
|
||||
// A reduction for the global integral:
|
||||
Kokkos::parallel_reduce( fixture.elem_count() , feint , elem_integral );
|
||||
|
||||
if ( elem_integral.error ) {
|
||||
std::cout << "An element had a spatial jacobian error" << std::endl ;
|
||||
return ;
|
||||
}
|
||||
|
||||
std::cout << "Elem integral =" ;
|
||||
for ( int i = 0 ; i < MyFunctionType::value_count ; ++i ) {
|
||||
std::cout << " " << elem_integral.value[i] ;
|
||||
}
|
||||
std::cout << std::endl ;
|
||||
|
||||
//----------------------------------------
|
||||
// Create and execute the nodal lumped value projection and reduction functor:
|
||||
|
||||
typedef Kokkos::Example::
|
||||
LumpElemToNode< typename FeintType::NodeValueType ,
|
||||
typename FeintType::ElemValueType ,
|
||||
UseAtomic > LumpType ;
|
||||
|
||||
const LumpType lump( feint.m_node_lumped ,
|
||||
feint.m_elem_integral ,
|
||||
fixture.elem_node() );
|
||||
|
||||
typename LumpType ::value_type node_sum ;
|
||||
|
||||
Kokkos::parallel_reduce( fixture.node_count() , lump , node_sum );
|
||||
|
||||
std::cout << "Node lumped sum =" ;
|
||||
for ( int i = 0 ; i < MyFunctionType::value_count ; ++i ) {
|
||||
std::cout << " " << node_sum.value[i] ;
|
||||
}
|
||||
std::cout << std::endl ;
|
||||
}
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_HPP */
|
||||
|
||||
@ -1,67 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
#include <feint.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template void feint<Kokkos::Cuda,false>(
|
||||
const unsigned global_elem_nx ,
|
||||
const unsigned global_elem_ny ,
|
||||
const unsigned global_elem_nz );
|
||||
|
||||
template void feint<Kokkos::Cuda,true>(
|
||||
const unsigned global_elem_nx ,
|
||||
const unsigned global_elem_ny ,
|
||||
const unsigned global_elem_nz );
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,60 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_FEINT_FWD_HPP
|
||||
#define KOKKOS_EXAMPLE_FEINT_FWD_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template < class Device , bool UseAtomic >
|
||||
void feint(
|
||||
const unsigned global_elem_nx = 100 ,
|
||||
const unsigned global_elem_ny = 115 ,
|
||||
const unsigned global_elem_nz = 130 );
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_FEINT_FWD_HPP */
|
||||
|
||||
@ -1,67 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
|
||||
#include <feint.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template void feint<Kokkos::OpenMP,false>(
|
||||
const unsigned global_elem_nx ,
|
||||
const unsigned global_elem_ny ,
|
||||
const unsigned global_elem_nz );
|
||||
|
||||
template void feint<Kokkos::OpenMP,true>(
|
||||
const unsigned global_elem_nx ,
|
||||
const unsigned global_elem_ny ,
|
||||
const unsigned global_elem_nz );
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,66 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
|
||||
#include <feint.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template void feint< Kokkos::Threads ,false>(
|
||||
const unsigned global_elem_nx ,
|
||||
const unsigned global_elem_ny ,
|
||||
const unsigned global_elem_nz );
|
||||
|
||||
template void feint< Kokkos::Threads ,true>(
|
||||
const unsigned global_elem_nx ,
|
||||
const unsigned global_elem_ny ,
|
||||
const unsigned global_elem_nz );
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_PTHREAD ) */
|
||||
@ -1,110 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <feint_fwd.hpp>
|
||||
|
||||
int main()
|
||||
{
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
{
|
||||
// Use 4 cores per NUMA region, unless fewer available
|
||||
|
||||
const unsigned use_numa_count = Kokkos::hwloc::get_available_numa_count();
|
||||
const unsigned use_cores_per_numa = std::min( 4u , Kokkos::hwloc::get_available_cores_per_numa() );
|
||||
|
||||
Kokkos::Threads::initialize( use_numa_count * use_cores_per_numa );
|
||||
|
||||
std::cout << "feint< Threads , NotUsingAtomic >" << std::endl ;
|
||||
Kokkos::Example::feint< Kokkos::Threads , false >();
|
||||
|
||||
std::cout << "feint< Threads , Usingtomic >" << std::endl ;
|
||||
Kokkos::Example::feint< Kokkos::Threads , true >();
|
||||
|
||||
Kokkos::Threads::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
{
|
||||
// Use 4 cores per NUMA region, unless fewer available
|
||||
|
||||
const unsigned use_numa_count = Kokkos::hwloc::get_available_numa_count();
|
||||
const unsigned use_cores_per_numa = std::min( 4u , Kokkos::hwloc::get_available_cores_per_numa() );
|
||||
|
||||
Kokkos::OpenMP::initialize( use_numa_count * use_cores_per_numa );
|
||||
|
||||
std::cout << "feint< OpenMP , NotUsingAtomic >" << std::endl ;
|
||||
Kokkos::Example::feint< Kokkos::OpenMP , false >();
|
||||
|
||||
std::cout << "feint< OpenMP , Usingtomic >" << std::endl ;
|
||||
Kokkos::Example::feint< Kokkos::OpenMP , true >();
|
||||
|
||||
Kokkos::OpenMP::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
{
|
||||
// Initialize Host mirror device
|
||||
Kokkos::HostSpace::execution_space::initialize(1);
|
||||
const unsigned device_count = Kokkos::Cuda::detect_device_count();
|
||||
|
||||
// Use the last device:
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(device_count-1) );
|
||||
|
||||
std::cout << "feint< Cuda , NotUsingAtomic >" << std::endl ;
|
||||
Kokkos::Example::feint< Kokkos::Cuda , false >();
|
||||
|
||||
std::cout << "feint< Cuda , UsingAtomic >" << std::endl ;
|
||||
Kokkos::Example::feint< Kokkos::Cuda , true >();
|
||||
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1,296 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_CG_SOLVE
|
||||
#define KOKKOS_EXAMPLE_CG_SOLVE
|
||||
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <WrapMPI.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template< typename ValueType , class Space >
|
||||
struct CrsMatrix {
|
||||
typedef Kokkos::StaticCrsGraph< unsigned , Space , void , unsigned > StaticCrsGraphType ;
|
||||
typedef View< ValueType * , Space > coeff_type ;
|
||||
|
||||
StaticCrsGraphType graph ;
|
||||
coeff_type coeff ;
|
||||
|
||||
CrsMatrix() : graph(), coeff() {}
|
||||
|
||||
CrsMatrix( const StaticCrsGraphType & arg_graph )
|
||||
: graph( arg_graph )
|
||||
, coeff( "crs_matrix_coeff" , arg_graph.entries.dimension_0() )
|
||||
{}
|
||||
};
|
||||
|
||||
template< typename MScalar
|
||||
, typename VScalar
|
||||
, class Space >
|
||||
struct Multiply {
|
||||
|
||||
const Example::CrsMatrix< MScalar , Space > m_A ;
|
||||
const Kokkos::View< const VScalar * , Space > m_x ;
|
||||
const Kokkos::View< VScalar * , Space > m_y ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int iRow ) const
|
||||
{
|
||||
const int iEntryBegin = m_A.graph.row_map[iRow];
|
||||
const int iEntryEnd = m_A.graph.row_map[iRow+1];
|
||||
|
||||
double sum = 0 ;
|
||||
|
||||
for ( int iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
|
||||
sum += m_A.coeff(iEntry) * m_x( m_A.graph.entries(iEntry) );
|
||||
}
|
||||
|
||||
m_y(iRow) = sum ;
|
||||
}
|
||||
|
||||
Multiply( const View< VScalar * , Space > & y
|
||||
, const CrsMatrix< MScalar , Space > & A
|
||||
, const View< const VScalar * , Space > & x
|
||||
)
|
||||
: m_A( A ), m_x( x ), m_y( y )
|
||||
{}
|
||||
};
|
||||
|
||||
template< typename MScalar
|
||||
, typename VScalar
|
||||
, class Space >
|
||||
inline
|
||||
void multiply( const int nrow
|
||||
, const Kokkos::View< VScalar * , Space > & y
|
||||
, const Example::CrsMatrix< MScalar , Space > & A
|
||||
, const Kokkos::View< VScalar * , Space > & x
|
||||
)
|
||||
{
|
||||
Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,nrow), Multiply<MScalar,VScalar,Space>( y , A , x ) );
|
||||
}
|
||||
|
||||
template< typename ValueType , class Space >
|
||||
struct WAXPBY {
|
||||
const Kokkos::View< const ValueType * , Space > m_x ;
|
||||
const Kokkos::View< const ValueType * , Space > m_y ;
|
||||
const Kokkos::View< ValueType * , Space > m_w ;
|
||||
const double m_alpha ;
|
||||
const double m_beta ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int i ) const
|
||||
{ m_w(i) = m_alpha * m_x(i) + m_beta * m_y(i); }
|
||||
|
||||
WAXPBY( const View< ValueType * , Space > & arg_w
|
||||
, const double arg_alpha
|
||||
, const View< ValueType * , Space > & arg_x
|
||||
, const double arg_beta
|
||||
, const View< ValueType * , Space > & arg_y
|
||||
)
|
||||
: m_x( arg_x )
|
||||
, m_y( arg_y )
|
||||
, m_w( arg_w )
|
||||
, m_alpha( arg_alpha )
|
||||
, m_beta( arg_beta )
|
||||
{}
|
||||
};
|
||||
|
||||
template< typename VScalar , class Space >
|
||||
void waxpby( const int n
|
||||
, const Kokkos::View< VScalar * , Space > & arg_w
|
||||
, const double arg_alpha
|
||||
, const Kokkos::View< VScalar * , Space > & arg_x
|
||||
, const double arg_beta
|
||||
, const Kokkos::View< VScalar * , Space > & arg_y
|
||||
)
|
||||
{
|
||||
Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,n), WAXPBY<VScalar,Space>(arg_w,arg_alpha,arg_x,arg_beta,arg_y) );
|
||||
}
|
||||
|
||||
template< typename VScalar , class Space >
|
||||
struct Dot {
|
||||
typedef double value_type ;
|
||||
|
||||
const Kokkos::View< const VScalar * , Space > m_x ;
|
||||
const Kokkos::View< const VScalar * , Space > m_y ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const int i , value_type & update ) const
|
||||
{ update += m_x(i) * m_y(i); }
|
||||
|
||||
Dot( const Kokkos::View< VScalar * , Space > & arg_x
|
||||
, const Kokkos::View< VScalar * , Space > & arg_y
|
||||
)
|
||||
: m_x(arg_x), m_y(arg_y) {}
|
||||
};
|
||||
|
||||
template< typename VScalar , class Space >
|
||||
double dot( const int n
|
||||
, const Kokkos::View< VScalar * , Space > & arg_x
|
||||
, const Kokkos::View< VScalar * , Space > & arg_y
|
||||
)
|
||||
{
|
||||
double result = 0 ;
|
||||
Kokkos::parallel_reduce( Kokkos::RangePolicy<Space>(0,n) , Dot<VScalar,Space>( arg_x , arg_y ) , result );
|
||||
return result ;
|
||||
}
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
struct CGSolveResult {
|
||||
size_t iteration ;
|
||||
double iter_time ;
|
||||
double matvec_time ;
|
||||
double norm_res ;
|
||||
};
|
||||
|
||||
template< class ImportType
|
||||
, typename MScalar
|
||||
, typename VScalar
|
||||
, class Space
|
||||
>
|
||||
inline
|
||||
void cgsolve( const ImportType & import
|
||||
, const CrsMatrix< MScalar , Space > & A
|
||||
, const Kokkos::View< VScalar * , Space > & b
|
||||
, const Kokkos::View< VScalar * , Space > & x
|
||||
, const size_t maximum_iteration = 200
|
||||
, const double tolerance = std::numeric_limits<double>::epsilon()
|
||||
, CGSolveResult * result = 0
|
||||
)
|
||||
{
|
||||
typedef View< VScalar * , Space > VectorType ;
|
||||
|
||||
const size_t count_owned = import.count_owned ;
|
||||
const size_t count_total = import.count_owned + import.count_receive;
|
||||
|
||||
size_t iteration = 0 ;
|
||||
double iter_time = 0 ;
|
||||
double matvec_time = 0 ;
|
||||
double norm_res = 0 ;
|
||||
|
||||
// Need input vector to matvec to be owned + received
|
||||
VectorType pAll ( "cg::p" , count_total );
|
||||
|
||||
VectorType p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
|
||||
VectorType r ( "cg::r" , count_owned );
|
||||
VectorType Ap( "cg::Ap", count_owned );
|
||||
|
||||
/* r = b - A * x ; */
|
||||
|
||||
/* p = x */ Kokkos::deep_copy( p , x );
|
||||
/* import p */ import( pAll );
|
||||
/* Ap = A * p */ multiply( count_owned , Ap , A , pAll );
|
||||
/* r = b - Ap */ waxpby( count_owned , r , 1.0 , b , -1.0 , Ap );
|
||||
/* p = r */ Kokkos::deep_copy( p , r );
|
||||
|
||||
double old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
|
||||
|
||||
norm_res = sqrt( old_rdot );
|
||||
iteration = 0 ;
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
Kokkos::Impl::Timer timer;
|
||||
|
||||
while ( tolerance < norm_res && iteration < maximum_iteration ) {
|
||||
|
||||
/* pAp_dot = dot( p , Ap = A * p ) */
|
||||
|
||||
timer.reset();
|
||||
/* import p */ import( pAll );
|
||||
/* Ap = A * p */ multiply( count_owned , Ap , A , pAll );
|
||||
Space::fence();
|
||||
matvec_time += timer.seconds();
|
||||
|
||||
const double pAp_dot = Kokkos::Example::all_reduce( dot( count_owned , p , Ap ) , import.comm );
|
||||
const double alpha = old_rdot / pAp_dot ;
|
||||
|
||||
/* x += alpha * p ; */ waxpby( count_owned , x , alpha, p , 1.0 , x );
|
||||
/* r += -alpha * Ap ; */ waxpby( count_owned , r , -alpha, Ap , 1.0 , r );
|
||||
|
||||
const double r_dot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm );
|
||||
const double beta = r_dot / old_rdot ;
|
||||
|
||||
/* p = r + beta * p ; */ waxpby( count_owned , p , 1.0 , r , beta , p );
|
||||
|
||||
norm_res = sqrt( old_rdot = r_dot );
|
||||
|
||||
++iteration ;
|
||||
}
|
||||
|
||||
Space::fence();
|
||||
iter_time = wall_clock.seconds();
|
||||
|
||||
if ( 0 != result ) {
|
||||
result->iteration = iteration ;
|
||||
result->iter_time = iter_time ;
|
||||
result->matvec_time = matvec_time ;
|
||||
result->norm_res = norm_res ;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_CG_SOLVE */
|
||||
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../fixture)
|
||||
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE( GLOB SOURCES *.cpp )
|
||||
|
||||
LIST( APPEND SOURCES ../fixture/BoxElemPart.cpp )
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
fenl
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
)
|
||||
@ -1,57 +0,0 @@
|
||||
KOKKOS_PATH ?= ../..
|
||||
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
SRC_DIR := $(dir $(MAKEFILE_PATH))
|
||||
|
||||
vpath %.cpp ${SRC_DIR}/../fixture ${SRC_DIR}
|
||||
|
||||
EXAMPLE_HEADERS = $(wildcard $(SRC_DIR)/../common/*.hpp ${SRC_DIR}/../fixture/*.hpp ${SRC_DIR}/*.hpp)
|
||||
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
# KOKKOS_INTERNAL_USE_CUDA is not exported to installed Makefile.kokkos
|
||||
# use KOKKOS_DEVICE here
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += \
|
||||
-I${SRC_DIR}/../common \
|
||||
-I${SRC_DIR}/../fixture \
|
||||
-I${SRC_DIR}
|
||||
|
||||
|
||||
EXE_EXAMPLE_FENL = KokkosExample_Fenl
|
||||
OBJ_EXAMPLE_FENL = BoxElemPart.o main.o fenl.o
|
||||
|
||||
TARGETS = $(EXE_EXAMPLE_FENL)
|
||||
|
||||
#TEST_TARGETS =
|
||||
|
||||
$(EXE_EXAMPLE_FENL) : $(OBJ_EXAMPLE_FENL) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FENL) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FENL)
|
||||
|
||||
build_all : $(TARGETS)
|
||||
|
||||
|
||||
test : build_all
|
||||
|
||||
clean:
|
||||
rm -f *.o $(EXE_EXAMPLE_FENL) KokkosCore_config.*
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,117 +0,0 @@
|
||||
/*
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos: Manycore Performance-Portable Multidimensional Arrays
|
||||
// Copyright (2012) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
*/
|
||||
|
||||
#include <HexElement.hpp>
|
||||
#include <fenl_impl.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemQuadratic >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined (KOKKOS_HAVE_OPENMP)
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemQuadratic >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
|
||||
template
|
||||
Perf fenl< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemQuadratic >(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
@ -1,89 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_FENL_HPP
|
||||
#define KOKKOS_EXAMPLE_FENL_HPP
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <BoxElemPart.hpp>
|
||||
#include <WrapMPI.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
struct Perf {
|
||||
size_t global_elem_count ;
|
||||
size_t global_node_count ;
|
||||
size_t newton_iter_count ;
|
||||
size_t cg_iter_count ;
|
||||
double map_ratio ;
|
||||
double fill_node_set ;
|
||||
double scan_node_count ;
|
||||
double fill_graph_entries ;
|
||||
double sort_graph_entries ;
|
||||
double fill_element_graph ;
|
||||
double create_sparse_matrix ;
|
||||
double fill_time ;
|
||||
double bc_time ;
|
||||
double matvec_time ;
|
||||
double cg_time ;
|
||||
double newton_residual ;
|
||||
double error_max ;
|
||||
|
||||
};
|
||||
|
||||
template < class Device , BoxElemPart::ElemOrder ElemOrder >
|
||||
Perf fenl(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int global_elems[] );
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_FENL_HPP */
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,598 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP
|
||||
#define KOKKOS_EXAMPLE_FENL_IMPL_HPP
|
||||
|
||||
#include <math.h>
|
||||
|
||||
// Kokkos libraries' headers:
|
||||
|
||||
#include <Kokkos_UnorderedMap.hpp>
|
||||
#include <Kokkos_StaticCrsGraph.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
// Examples headers:
|
||||
|
||||
#include <BoxElemFixture.hpp>
|
||||
#include <VectorImport.hpp>
|
||||
#include <CGSolve.hpp>
|
||||
|
||||
#include <fenl.hpp>
|
||||
#include <fenl_functors.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
inline
|
||||
double maximum( MPI_Comm comm , double local )
|
||||
{
|
||||
double global = local ;
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , comm );
|
||||
#endif
|
||||
return global ;
|
||||
}
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
class ManufacturedSolution {
|
||||
public:
|
||||
|
||||
// Manufactured solution for one dimensional nonlinear PDE
|
||||
//
|
||||
// -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax
|
||||
//
|
||||
// Has an analytic solution of the form:
|
||||
//
|
||||
// T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 )
|
||||
//
|
||||
// Given T_0 and T_L compute K for this analytic solution.
|
||||
//
|
||||
// Two analytic solutions:
|
||||
//
|
||||
// Solution with singularity:
|
||||
// , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
// , b( -1.0 / sqrt(T_zmin) )
|
||||
//
|
||||
// Solution without singularity:
|
||||
// , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
// , b( 1.0 / sqrt(T_zmin) )
|
||||
|
||||
const double zmin ;
|
||||
const double zmax ;
|
||||
const double T_zmin ;
|
||||
const double T_zmax ;
|
||||
const double a ;
|
||||
const double b ;
|
||||
const double K ;
|
||||
|
||||
ManufacturedSolution( const double arg_zmin ,
|
||||
const double arg_zmax ,
|
||||
const double arg_T_zmin ,
|
||||
const double arg_T_zmax )
|
||||
: zmin( arg_zmin )
|
||||
, zmax( arg_zmax )
|
||||
, T_zmin( arg_T_zmin )
|
||||
, T_zmax( arg_T_zmax )
|
||||
, a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
, b( 1.0 / sqrt(T_zmin) )
|
||||
, K( 1.0 / ( 6.0 * a * a ) )
|
||||
{}
|
||||
|
||||
double operator()( const double z ) const
|
||||
{
|
||||
const double tmp = a * ( z - zmin ) + b ;
|
||||
return 1.0 / ( tmp * tmp );
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
namespace FENL {
|
||||
|
||||
template < class Space , BoxElemPart::ElemOrder ElemOrder >
|
||||
Perf fenl(
|
||||
MPI_Comm comm ,
|
||||
const int use_print ,
|
||||
const int use_trials ,
|
||||
const int use_atomic ,
|
||||
const int use_elems[] )
|
||||
{
|
||||
typedef Kokkos::Example::BoxElemFixture< Space , ElemOrder > FixtureType ;
|
||||
|
||||
typedef Kokkos::Example::CrsMatrix< double , Space >
|
||||
SparseMatrixType ;
|
||||
|
||||
typedef typename SparseMatrixType::StaticCrsGraphType
|
||||
SparseGraphType ;
|
||||
|
||||
typedef Kokkos::Example::FENL::NodeNodeGraph< typename FixtureType::elem_node_type , SparseGraphType , FixtureType::ElemNode >
|
||||
NodeNodeGraphType ;
|
||||
|
||||
typedef Kokkos::Example::FENL::ElementComputation< FixtureType , SparseMatrixType >
|
||||
ElementComputationType ;
|
||||
|
||||
typedef Kokkos::Example::FENL::DirichletComputation< FixtureType , SparseMatrixType >
|
||||
DirichletComputationType ;
|
||||
|
||||
typedef NodeElemGatherFill< ElementComputationType >
|
||||
NodeElemGatherFillType ;
|
||||
|
||||
typedef typename ElementComputationType::vector_type VectorType ;
|
||||
|
||||
typedef Kokkos::Example::VectorImport<
|
||||
typename FixtureType::comm_list_type ,
|
||||
typename FixtureType::send_nodeid_type ,
|
||||
VectorType > ImportType ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const unsigned newton_iteration_limit = 10 ;
|
||||
const double newton_iteration_tolerance = 1e-7 ;
|
||||
const unsigned cg_iteration_limit = 200 ;
|
||||
const double cg_iteration_tolerance = 1e-7 ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const int print_flag = use_print && Kokkos::Impl::is_same< Kokkos::HostSpace , typename Space::memory_space >::value ;
|
||||
|
||||
int comm_rank ;
|
||||
int comm_size ;
|
||||
|
||||
MPI_Comm_rank( comm , & comm_rank );
|
||||
MPI_Comm_size( comm , & comm_size );
|
||||
|
||||
// Decompose by node to avoid mpi-communication for assembly
|
||||
|
||||
const float bubble_x = 1.0 ;
|
||||
const float bubble_y = 1.0 ;
|
||||
const float bubble_z = 1.0 ;
|
||||
|
||||
const FixtureType fixture( BoxElemPart::DecomposeNode , comm_size , comm_rank ,
|
||||
use_elems[0] , use_elems[1] , use_elems[2] ,
|
||||
bubble_x , bubble_y , bubble_z );
|
||||
|
||||
|
||||
{
|
||||
int global_error = ! fixture.ok();
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
int local_error = global_error ;
|
||||
global_error = 0 ;
|
||||
MPI_Allreduce( & local_error , & global_error , 1 , MPI_INT , MPI_SUM , comm );
|
||||
#endif
|
||||
|
||||
if ( global_error ) {
|
||||
throw std::runtime_error(std::string("Error generating finite element fixture"));
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const ImportType comm_nodal_import(
|
||||
comm ,
|
||||
fixture.recv_node() ,
|
||||
fixture.send_node() ,
|
||||
fixture.send_nodeid() ,
|
||||
fixture.node_count_owned() ,
|
||||
fixture.node_count() - fixture.node_count_owned() );
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const double bc_lower_value = 1 ;
|
||||
const double bc_upper_value = 2 ;
|
||||
|
||||
const Kokkos::Example::FENL::ManufacturedSolution
|
||||
manufactured_solution( 0 , 1 , bc_lower_value , bc_upper_value );
|
||||
|
||||
//------------------------------------
|
||||
|
||||
for ( int k = 0 ; k < comm_size && use_print ; ++k ) {
|
||||
if ( k == comm_rank ) {
|
||||
typename FixtureType::node_grid_type::HostMirror
|
||||
h_node_grid = Kokkos::create_mirror_view( fixture.node_grid() );
|
||||
|
||||
typename FixtureType::node_coord_type::HostMirror
|
||||
h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() );
|
||||
|
||||
typename FixtureType::elem_node_type::HostMirror
|
||||
h_elem_node = Kokkos::create_mirror_view( fixture.elem_node() );
|
||||
|
||||
Kokkos::deep_copy( h_node_grid , fixture.node_grid() );
|
||||
Kokkos::deep_copy( h_node_coord , fixture.node_coord() );
|
||||
Kokkos::deep_copy( h_elem_node , fixture.elem_node() );
|
||||
|
||||
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
|
||||
std::cout << "Node grid {" ;
|
||||
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
|
||||
std::cout << " (" << h_node_grid(inode,0)
|
||||
<< "," << h_node_grid(inode,1)
|
||||
<< "," << h_node_grid(inode,2)
|
||||
<< ")" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Node coord {" ;
|
||||
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
|
||||
std::cout << " (" << h_node_coord(inode,0)
|
||||
<< "," << h_node_coord(inode,1)
|
||||
<< "," << h_node_coord(inode,2)
|
||||
<< ")" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Manufactured solution"
|
||||
<< " a[" << manufactured_solution.a << "]"
|
||||
<< " b[" << manufactured_solution.b << "]"
|
||||
<< " K[" << manufactured_solution.K << "]"
|
||||
<< " {" ;
|
||||
for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) {
|
||||
std::cout << " " << manufactured_solution( h_node_coord( inode , 2 ) );
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "ElemNode {" << std::endl ;
|
||||
for ( unsigned ielem = 0 ; ielem < fixture.elem_count() ; ++ielem ) {
|
||||
std::cout << " elem[" << ielem << "]{" ;
|
||||
for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
|
||||
std::cout << " " << h_elem_node(ielem,inode);
|
||||
}
|
||||
std::cout << " }{" ;
|
||||
for ( unsigned inode = 0 ; inode < FixtureType::ElemNode ; ++inode ) {
|
||||
std::cout << " (" << h_node_grid(h_elem_node(ielem,inode),0)
|
||||
<< "," << h_node_grid(h_elem_node(ielem,inode),1)
|
||||
<< "," << h_node_grid(h_elem_node(ielem,inode),2)
|
||||
<< ")" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
}
|
||||
std::cout << "}" << std::endl ;
|
||||
}
|
||||
std::cout.flush();
|
||||
MPI_Barrier( comm );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
Perf perf_stats = Perf() ;
|
||||
|
||||
for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
|
||||
|
||||
Perf perf = Perf() ;
|
||||
|
||||
perf.global_elem_count = fixture.elem_count_global();
|
||||
perf.global_node_count = fixture.node_count_global();
|
||||
|
||||
//----------------------------------
|
||||
// Create the sparse matrix graph and element-to-graph map
|
||||
// from the element->to->node identifier array.
|
||||
// The graph only has rows for the owned nodes.
|
||||
|
||||
typename NodeNodeGraphType::Times graph_times;
|
||||
|
||||
const NodeNodeGraphType
|
||||
mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(), graph_times );
|
||||
|
||||
perf.map_ratio = maximum(comm, graph_times.ratio);
|
||||
perf.fill_node_set = maximum(comm, graph_times.fill_node_set);
|
||||
perf.scan_node_count = maximum(comm, graph_times.scan_node_count);
|
||||
perf.fill_graph_entries = maximum(comm, graph_times.fill_graph_entries);
|
||||
perf.sort_graph_entries = maximum(comm, graph_times.sort_graph_entries);
|
||||
perf.fill_element_graph = maximum(comm, graph_times.fill_element_graph);
|
||||
|
||||
wall_clock.reset();
|
||||
// Create the sparse matrix from the graph:
|
||||
|
||||
SparseMatrixType jacobian( mesh_to_graph.graph );
|
||||
|
||||
Space::fence();
|
||||
|
||||
perf.create_sparse_matrix = maximum( comm , wall_clock.seconds() );
|
||||
|
||||
//----------------------------------
|
||||
|
||||
for ( int k = 0 ; k < comm_size && print_flag ; ++k ) {
|
||||
if ( k == comm_rank ) {
|
||||
const unsigned nrow = jacobian.graph.numRows();
|
||||
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
|
||||
std::cout << "JacobianGraph {" << std::endl ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " row[" << irow << "]{" ;
|
||||
const unsigned entry_end = jacobian.graph.row_map(irow+1);
|
||||
for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) {
|
||||
std::cout << " " << jacobian.graph.entries(entry);
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
}
|
||||
std::cout << "}" << std::endl ;
|
||||
|
||||
std::cout << "ElemGraph {" << std::endl ;
|
||||
for ( unsigned ielem = 0 ; ielem < mesh_to_graph.elem_graph.dimension_0() ; ++ielem ) {
|
||||
std::cout << " elem[" << ielem << "]{" ;
|
||||
for ( unsigned irow = 0 ; irow < mesh_to_graph.elem_graph.dimension_1() ; ++irow ) {
|
||||
std::cout << " {" ;
|
||||
for ( unsigned icol = 0 ; icol < mesh_to_graph.elem_graph.dimension_2() ; ++icol ) {
|
||||
std::cout << " " << mesh_to_graph.elem_graph(ielem,irow,icol);
|
||||
}
|
||||
std::cout << " }" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
}
|
||||
std::cout << "}" << std::endl ;
|
||||
}
|
||||
std::cout.flush();
|
||||
MPI_Barrier( comm );
|
||||
}
|
||||
|
||||
//----------------------------------
|
||||
|
||||
// Allocate solution vector for each node in the mesh and residual vector for each owned node
|
||||
const VectorType nodal_solution( "nodal_solution" , fixture.node_count() );
|
||||
const VectorType nodal_residual( "nodal_residual" , fixture.node_count_owned() );
|
||||
const VectorType nodal_delta( "nodal_delta" , fixture.node_count_owned() );
|
||||
|
||||
// Create element computation functor
|
||||
const ElementComputationType elemcomp(
|
||||
use_atomic ? ElementComputationType( fixture , manufactured_solution.K , nodal_solution ,
|
||||
mesh_to_graph.elem_graph , jacobian , nodal_residual )
|
||||
: ElementComputationType( fixture , manufactured_solution.K , nodal_solution ) );
|
||||
|
||||
const NodeElemGatherFillType gatherfill(
|
||||
use_atomic ? NodeElemGatherFillType()
|
||||
: NodeElemGatherFillType( fixture.elem_node() ,
|
||||
mesh_to_graph.elem_graph ,
|
||||
nodal_residual ,
|
||||
jacobian ,
|
||||
elemcomp.elem_residuals ,
|
||||
elemcomp.elem_jacobians ) );
|
||||
|
||||
// Create boundary condition functor
|
||||
const DirichletComputationType dirichlet(
|
||||
fixture , nodal_solution , jacobian , nodal_residual ,
|
||||
2 /* apply at 'z' ends */ ,
|
||||
manufactured_solution.T_zmin ,
|
||||
manufactured_solution.T_zmax );
|
||||
|
||||
//----------------------------------
|
||||
// Nonlinear Newton iteration:
|
||||
|
||||
double residual_norm_init = 0 ;
|
||||
|
||||
for ( perf.newton_iter_count = 0 ;
|
||||
perf.newton_iter_count < newton_iteration_limit ;
|
||||
++perf.newton_iter_count ) {
|
||||
|
||||
//--------------------------------
|
||||
|
||||
comm_nodal_import( nodal_solution );
|
||||
|
||||
//--------------------------------
|
||||
// Element contributions to residual and jacobian
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
Kokkos::deep_copy( nodal_residual , double(0) );
|
||||
Kokkos::deep_copy( jacobian.coeff , double(0) );
|
||||
|
||||
elemcomp.apply();
|
||||
|
||||
if ( ! use_atomic ) {
|
||||
gatherfill.apply();
|
||||
}
|
||||
|
||||
Space::fence();
|
||||
perf.fill_time = maximum( comm , wall_clock.seconds() );
|
||||
|
||||
//--------------------------------
|
||||
// Apply boundary conditions
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
dirichlet.apply();
|
||||
|
||||
Space::fence();
|
||||
perf.bc_time = maximum( comm , wall_clock.seconds() );
|
||||
|
||||
//--------------------------------
|
||||
// Evaluate convergence
|
||||
|
||||
const double residual_norm =
|
||||
std::sqrt(
|
||||
Kokkos::Example::all_reduce(
|
||||
Kokkos::Example::dot( fixture.node_count_owned() , nodal_residual, nodal_residual ) , comm ) );
|
||||
|
||||
perf.newton_residual = residual_norm ;
|
||||
|
||||
if ( 0 == perf.newton_iter_count ) { residual_norm_init = residual_norm ; }
|
||||
|
||||
if ( residual_norm < residual_norm_init * newton_iteration_tolerance ) { break ; }
|
||||
|
||||
//--------------------------------
|
||||
// Solve for nonlinear update
|
||||
|
||||
CGSolveResult cg_result ;
|
||||
|
||||
Kokkos::Example::cgsolve( comm_nodal_import
|
||||
, jacobian
|
||||
, nodal_residual
|
||||
, nodal_delta
|
||||
, cg_iteration_limit
|
||||
, cg_iteration_tolerance
|
||||
, & cg_result
|
||||
);
|
||||
|
||||
// Update solution vector
|
||||
|
||||
Kokkos::Example::waxpby( fixture.node_count_owned() , nodal_solution , -1.0 , nodal_delta , 1.0 , nodal_solution );
|
||||
|
||||
perf.cg_iter_count += cg_result.iteration ;
|
||||
perf.matvec_time += cg_result.matvec_time ;
|
||||
perf.cg_time += cg_result.iter_time ;
|
||||
|
||||
//--------------------------------
|
||||
|
||||
if ( print_flag ) {
|
||||
const double delta_norm =
|
||||
std::sqrt(
|
||||
Kokkos::Example::all_reduce(
|
||||
Kokkos::Example::dot( fixture.node_count_owned() , nodal_delta, nodal_delta ) , comm ) );
|
||||
|
||||
if ( 0 == comm_rank ) {
|
||||
std::cout << "Newton iteration[" << perf.newton_iter_count << "]"
|
||||
<< " residual[" << perf.newton_residual << "]"
|
||||
<< " update[" << delta_norm << "]"
|
||||
<< " cg_iteration[" << cg_result.iteration << "]"
|
||||
<< " cg_residual[" << cg_result.norm_res << "]"
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
for ( int k = 0 ; k < comm_size ; ++k ) {
|
||||
if ( k == comm_rank ) {
|
||||
const unsigned nrow = jacobian.graph.numRows();
|
||||
|
||||
std::cout << "MPI[" << comm_rank << "]" << std::endl ;
|
||||
std::cout << "Residual {" ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " " << nodal_residual(irow);
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Delta {" ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " " << nodal_delta(irow);
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Solution {" ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " " << nodal_solution(irow);
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
|
||||
std::cout << "Jacobian[ "
|
||||
<< jacobian.graph.numRows() << " x " << Kokkos::maximum_entry( jacobian.graph )
|
||||
<< " ] {" << std::endl ;
|
||||
for ( unsigned irow = 0 ; irow < nrow ; ++irow ) {
|
||||
std::cout << " {" ;
|
||||
const unsigned entry_end = jacobian.graph.row_map(irow+1);
|
||||
for ( unsigned entry = jacobian.graph.row_map(irow) ; entry < entry_end ; ++entry ) {
|
||||
std::cout << " (" << jacobian.graph.entries(entry)
|
||||
<< "," << jacobian.coeff(entry)
|
||||
<< ")" ;
|
||||
}
|
||||
std::cout << " }" << std::endl ;
|
||||
}
|
||||
std::cout << "}" << std::endl ;
|
||||
}
|
||||
std::cout.flush();
|
||||
MPI_Barrier( comm );
|
||||
}
|
||||
}
|
||||
//--------------------------------
|
||||
}
|
||||
|
||||
// Evaluate solution error
|
||||
|
||||
if ( 0 == itrial ) {
|
||||
const typename FixtureType::node_coord_type::HostMirror
|
||||
h_node_coord = Kokkos::create_mirror_view( fixture.node_coord() );
|
||||
|
||||
const typename VectorType::HostMirror
|
||||
h_nodal_solution = Kokkos::create_mirror_view( nodal_solution );
|
||||
|
||||
Kokkos::deep_copy( h_node_coord , fixture.node_coord() );
|
||||
Kokkos::deep_copy( h_nodal_solution , nodal_solution );
|
||||
|
||||
double error_max = 0 ;
|
||||
for ( unsigned inode = 0 ; inode < fixture.node_count_owned() ; ++inode ) {
|
||||
const double answer = manufactured_solution( h_node_coord( inode , 2 ) );
|
||||
const double error = ( h_nodal_solution(inode) - answer ) / answer ;
|
||||
if ( error_max < fabs( error ) ) { error_max = fabs( error ); }
|
||||
}
|
||||
|
||||
perf.error_max = std::sqrt( Kokkos::Example::all_reduce_max( error_max , comm ) );
|
||||
|
||||
perf_stats = perf ;
|
||||
}
|
||||
else {
|
||||
perf_stats.fill_node_set = std::min( perf_stats.fill_node_set , perf.fill_node_set );
|
||||
perf_stats.scan_node_count = std::min( perf_stats.scan_node_count , perf.scan_node_count );
|
||||
perf_stats.fill_graph_entries = std::min( perf_stats.fill_graph_entries , perf.fill_graph_entries );
|
||||
perf_stats.sort_graph_entries = std::min( perf_stats.sort_graph_entries , perf.sort_graph_entries );
|
||||
perf_stats.fill_element_graph = std::min( perf_stats.fill_element_graph , perf.fill_element_graph );
|
||||
perf_stats.create_sparse_matrix = std::min( perf_stats.create_sparse_matrix , perf.create_sparse_matrix );
|
||||
perf_stats.fill_time = std::min( perf_stats.fill_time , perf.fill_time );
|
||||
perf_stats.bc_time = std::min( perf_stats.bc_time , perf.bc_time );
|
||||
perf_stats.cg_time = std::min( perf_stats.cg_time , perf.cg_time );
|
||||
}
|
||||
}
|
||||
|
||||
return perf_stats ;
|
||||
}
|
||||
|
||||
} /* namespace FENL */
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP */
|
||||
|
||||
@ -1,422 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <strings.h>
|
||||
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <WrapMPI.hpp>
|
||||
#include <fenl.hpp>
|
||||
|
||||
// For vtune
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
enum { CMD_USE_THREADS = 0
|
||||
, CMD_USE_NUMA
|
||||
, CMD_USE_CORE_PER_NUMA
|
||||
, CMD_USE_CUDA
|
||||
, CMD_USE_OPENMP
|
||||
, CMD_USE_CUDA_DEV
|
||||
, CMD_USE_FIXTURE_X
|
||||
, CMD_USE_FIXTURE_Y
|
||||
, CMD_USE_FIXTURE_Z
|
||||
, CMD_USE_FIXTURE_BEGIN
|
||||
, CMD_USE_FIXTURE_END
|
||||
, CMD_USE_FIXTURE_QUADRATIC
|
||||
, CMD_USE_ATOMIC
|
||||
, CMD_USE_TRIALS
|
||||
, CMD_VTUNE
|
||||
, CMD_PRINT
|
||||
, CMD_ECHO
|
||||
, CMD_ERROR
|
||||
, CMD_COUNT };
|
||||
|
||||
void print_cmdline( std::ostream & s , const int cmd[] )
|
||||
{
|
||||
if ( cmd[ CMD_USE_THREADS ] ) {
|
||||
s << " Threads(" << cmd[ CMD_USE_THREADS ]
|
||||
<< ") NUMA(" << cmd[ CMD_USE_NUMA ]
|
||||
<< ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ]
|
||||
<< ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_OPENMP ] ) {
|
||||
s << " OpenMP(" << cmd[ CMD_USE_OPENMP ]
|
||||
<< ") NUMA(" << cmd[ CMD_USE_NUMA ]
|
||||
<< ") CORE_PER_NUMA(" << cmd[ CMD_USE_CORE_PER_NUMA ]
|
||||
<< ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_FIXTURE_X ] ) {
|
||||
s << " Fixture(" << cmd[ CMD_USE_FIXTURE_X ]
|
||||
<< "x" << cmd[ CMD_USE_FIXTURE_Y ]
|
||||
<< "x" << cmd[ CMD_USE_FIXTURE_Z ]
|
||||
<< ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) {
|
||||
s << " Fixture( " << cmd[ CMD_USE_FIXTURE_BEGIN ]
|
||||
<< " .. " << cmd[ CMD_USE_FIXTURE_END ]
|
||||
<< " )" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) {
|
||||
s << " Quadratic-Element" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_CUDA ] ) {
|
||||
s << " CUDA(" << cmd[ CMD_USE_CUDA_DEV ] << ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_ATOMIC ] ) {
|
||||
s << " ATOMIC" ;
|
||||
}
|
||||
if ( cmd[ CMD_USE_TRIALS ] ) {
|
||||
s << " TRIALS(" << cmd[ CMD_USE_TRIALS ] << ")" ;
|
||||
}
|
||||
if ( cmd[ CMD_VTUNE ] ) {
|
||||
s << " VTUNE" ;
|
||||
}
|
||||
if ( cmd[ CMD_PRINT ] ) {
|
||||
s << " PRINT" ;
|
||||
}
|
||||
s << std::endl ;
|
||||
}
|
||||
|
||||
void print_perf_value( std::ostream & s , const std::vector<size_t> & widths, const Kokkos::Example::FENL::Perf & perf )
|
||||
{
|
||||
int i=0;
|
||||
s << std::setw(widths[i++]) << perf.global_elem_count << " ,";
|
||||
s << std::setw(widths[i++]) << perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << perf.newton_iter_count << " ,";
|
||||
s << std::setw(widths[i++]) << perf.cg_iter_count << " ,";
|
||||
s << std::setw(widths[i++]) << perf.map_ratio << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.fill_node_set * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.scan_node_count * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.fill_graph_entries * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.sort_graph_entries * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.fill_element_graph * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.create_sparse_matrix * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.fill_time * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( perf.bc_time * 1000.0 ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( ( perf.matvec_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i++]) << ( ( perf.cg_time * 1000.0 ) / perf.cg_iter_count ) / perf.global_node_count << " ,";
|
||||
s << std::setw(widths[i]) << perf.error_max;
|
||||
s << std::endl ;
|
||||
}
|
||||
|
||||
template< class Device , Kokkos::Example::BoxElemPart::ElemOrder ElemOrder >
|
||||
void run( MPI_Comm comm , const int cmd[] )
|
||||
{
|
||||
int comm_rank = 0 ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Comm_rank( comm , & comm_rank );
|
||||
#else
|
||||
comm = 0 ;
|
||||
#endif
|
||||
|
||||
|
||||
if ( 0 == comm_rank ) {
|
||||
if ( cmd[ CMD_USE_THREADS ] ) { std::cout << "THREADS , " << cmd[ CMD_USE_THREADS ] ; }
|
||||
else if ( cmd[ CMD_USE_OPENMP ] ) { std::cout << "OPENMP , " << cmd[ CMD_USE_OPENMP ] ; }
|
||||
else if ( cmd[ CMD_USE_CUDA ] ) { std::cout << "CUDA" ; }
|
||||
|
||||
if ( cmd[ CMD_USE_FIXTURE_QUADRATIC ] ) { std::cout << " , QUADRATIC-ELEMENT" ; }
|
||||
else { std::cout << " , LINEAR-ELEMENT" ; }
|
||||
|
||||
if ( cmd[ CMD_USE_ATOMIC ] ) { std::cout << " , USING ATOMICS" ; }
|
||||
}
|
||||
|
||||
std::vector< std::pair<std::string,std::string> > headers;
|
||||
|
||||
|
||||
headers.push_back(std::make_pair("ELEMS","count"));
|
||||
headers.push_back(std::make_pair("NODES","count"));
|
||||
headers.push_back(std::make_pair("NEWTON","iter"));
|
||||
headers.push_back(std::make_pair("CG","iter"));
|
||||
headers.push_back(std::make_pair("MAP_RATIO","ratio"));
|
||||
headers.push_back(std::make_pair("SET_FILL/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("SCAN/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("GRAPH_FILL/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("SORT/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("ELEM_GRAPH_FILL/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("MATRIX_CREATE/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("MATRIX_FILL/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("BOUNDARY/NODE","millisec"));
|
||||
headers.push_back(std::make_pair("MAT_VEC/ITER/ROW","millisec"));
|
||||
headers.push_back(std::make_pair("CG/ITER/ROW","millisec"));
|
||||
headers.push_back(std::make_pair("ERROR","ratio"));
|
||||
|
||||
// find print widths
|
||||
size_t min_width = 10;
|
||||
std::vector< size_t > widths(headers.size());
|
||||
for (size_t i=0, ie=headers.size(); i<ie; ++i)
|
||||
widths[i] = std::max(min_width, headers[i].first.size()+1);
|
||||
|
||||
// print column headers
|
||||
if ( 0 == comm_rank ) {
|
||||
std::cout << std::endl ;
|
||||
for (size_t i=0; i<headers.size(); ++i)
|
||||
std::cout << std::setw(widths[i]) << headers[i].first << " ,";
|
||||
std::cout << "\b\b " << std::endl;
|
||||
for (size_t i=0; i<headers.size(); ++i)
|
||||
std::cout << std::setw(widths[i]) << headers[i].second << " ,";
|
||||
std::cout << "\b\b " << std::endl;
|
||||
|
||||
std::cout << std::scientific;
|
||||
std::cout.precision(3);
|
||||
}
|
||||
|
||||
if ( cmd[ CMD_USE_FIXTURE_BEGIN ] ) {
|
||||
for ( int i = cmd[CMD_USE_FIXTURE_BEGIN] ; i < cmd[CMD_USE_FIXTURE_END] * 2 ; i *= 2 ) {
|
||||
int nelem[3] ;
|
||||
nelem[0] = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
|
||||
nelem[1] = 1 + nelem[0] ;
|
||||
nelem[2] = 2 * nelem[0] ;
|
||||
|
||||
const Kokkos::Example::FENL::Perf perf =
|
||||
cmd[ CMD_USE_FIXTURE_QUADRATIC ]
|
||||
? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
|
||||
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
|
||||
: Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear >
|
||||
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
|
||||
;
|
||||
|
||||
if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf );
|
||||
}
|
||||
}
|
||||
else {
|
||||
int nelem[3] = { cmd[ CMD_USE_FIXTURE_X ] ,
|
||||
cmd[ CMD_USE_FIXTURE_Y ] ,
|
||||
cmd[ CMD_USE_FIXTURE_Z ] };
|
||||
|
||||
const Kokkos::Example::FENL::Perf perf =
|
||||
cmd[ CMD_USE_FIXTURE_QUADRATIC ]
|
||||
? Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemQuadratic >
|
||||
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
|
||||
: Kokkos::Example::FENL::fenl< Device , Kokkos::Example::BoxElemPart::ElemLinear >
|
||||
( comm , cmd[CMD_PRINT], cmd[CMD_USE_TRIALS], cmd[CMD_USE_ATOMIC], nelem )
|
||||
;
|
||||
|
||||
if ( 0 == comm_rank ) print_perf_value( std::cout , widths, perf );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
int main( int argc , char ** argv )
|
||||
{
|
||||
int comm_rank = 0 ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Init( & argc , & argv );
|
||||
MPI_Comm comm = MPI_COMM_WORLD ;
|
||||
MPI_Comm_rank( comm , & comm_rank );
|
||||
#else
|
||||
MPI_Comm comm = 0 ;
|
||||
(void) comm ; // suppress warning
|
||||
#endif
|
||||
|
||||
int cmdline[ CMD_COUNT ] ;
|
||||
|
||||
for ( int i = 0 ; i < CMD_COUNT ; ++i ) cmdline[i] = 0 ;
|
||||
|
||||
if ( 0 == comm_rank ) {
|
||||
for ( int i = 1 ; i < argc ; ++i ) {
|
||||
if ( 0 == strcasecmp( argv[i] , "threads" ) ) {
|
||||
cmdline[ CMD_USE_THREADS ] = atoi( argv[++i] );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "openmp" ) ) {
|
||||
cmdline[ CMD_USE_OPENMP ] = atoi( argv[++i] );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "cores" ) ) {
|
||||
sscanf( argv[++i] , "%dx%d" ,
|
||||
cmdline + CMD_USE_NUMA ,
|
||||
cmdline + CMD_USE_CORE_PER_NUMA );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "cuda" ) ) {
|
||||
cmdline[ CMD_USE_CUDA ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "cuda-dev" ) ) {
|
||||
cmdline[ CMD_USE_CUDA ] = 1 ;
|
||||
cmdline[ CMD_USE_CUDA_DEV ] = atoi( argv[++i] ) ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "fixture" ) ) {
|
||||
sscanf( argv[++i] , "%dx%dx%d" ,
|
||||
cmdline + CMD_USE_FIXTURE_X ,
|
||||
cmdline + CMD_USE_FIXTURE_Y ,
|
||||
cmdline + CMD_USE_FIXTURE_Z );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "fixture-range" ) ) {
|
||||
sscanf( argv[++i] , "%d..%d" ,
|
||||
cmdline + CMD_USE_FIXTURE_BEGIN ,
|
||||
cmdline + CMD_USE_FIXTURE_END );
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "fixture-quadratic" ) ) {
|
||||
cmdline[ CMD_USE_FIXTURE_QUADRATIC ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "atomic" ) ) {
|
||||
cmdline[ CMD_USE_ATOMIC ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "trials" ) ) {
|
||||
cmdline[ CMD_USE_TRIALS ] = atoi( argv[++i] ) ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "vtune" ) ) {
|
||||
cmdline[ CMD_VTUNE ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "print" ) ) {
|
||||
cmdline[ CMD_PRINT ] = 1 ;
|
||||
}
|
||||
else if ( 0 == strcasecmp( argv[i] , "echo" ) ) {
|
||||
cmdline[ CMD_ECHO ] = 1 ;
|
||||
}
|
||||
else {
|
||||
cmdline[ CMD_ERROR ] = 1 ;
|
||||
|
||||
std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( cmdline[ CMD_ECHO ] && 0 == comm_rank ) { print_cmdline( std::cout , cmdline ); }
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Bcast( cmdline , CMD_COUNT , MPI_INT , 0 , comm );
|
||||
#endif
|
||||
|
||||
if ( cmdline[ CMD_VTUNE ] ) {
|
||||
std::stringstream cmd;
|
||||
pid_t my_os_pid=getpid();
|
||||
const std::string vtune_loc =
|
||||
"/usr/local/intel/vtune_amplifier_xe_2013/bin64/amplxe-cl";
|
||||
const std::string output_dir = "./vtune/vtune.";
|
||||
const int p_rank = comm_rank;
|
||||
cmd << vtune_loc
|
||||
<< " -collect hotspots -result-dir " << output_dir << p_rank
|
||||
<< " -target-pid " << my_os_pid << " &";
|
||||
if (p_rank == 0)
|
||||
std::cout << cmd.str() << std::endl;
|
||||
system(cmd.str().c_str());
|
||||
system("sleep 10");
|
||||
}
|
||||
|
||||
if ( ! cmdline[ CMD_ERROR ] && ! cmdline[ CMD_ECHO ] ) {
|
||||
|
||||
if ( ! cmdline[ CMD_USE_TRIALS ] ) { cmdline[ CMD_USE_TRIALS ] = 1 ; }
|
||||
|
||||
if ( ! cmdline[ CMD_USE_FIXTURE_X ] && ! cmdline[ CMD_USE_FIXTURE_BEGIN ] ) {
|
||||
cmdline[ CMD_USE_FIXTURE_X ] = 2 ;
|
||||
cmdline[ CMD_USE_FIXTURE_Y ] = 2 ;
|
||||
cmdline[ CMD_USE_FIXTURE_Z ] = 2 ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
|
||||
if ( cmdline[ CMD_USE_THREADS ] ) {
|
||||
|
||||
if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) {
|
||||
Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] ,
|
||||
cmdline[ CMD_USE_NUMA ] ,
|
||||
cmdline[ CMD_USE_CORE_PER_NUMA ] );
|
||||
}
|
||||
else {
|
||||
Kokkos::Threads::initialize( cmdline[ CMD_USE_THREADS ] );
|
||||
}
|
||||
|
||||
run< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
|
||||
|
||||
Kokkos::Threads::finalize();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
|
||||
if ( cmdline[ CMD_USE_OPENMP ] ) {
|
||||
|
||||
if ( cmdline[ CMD_USE_NUMA ] && cmdline[ CMD_USE_CORE_PER_NUMA ] ) {
|
||||
Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] ,
|
||||
cmdline[ CMD_USE_NUMA ] ,
|
||||
cmdline[ CMD_USE_CORE_PER_NUMA ] );
|
||||
}
|
||||
else {
|
||||
Kokkos::OpenMP::initialize( cmdline[ CMD_USE_OPENMP ] );
|
||||
}
|
||||
|
||||
run< Kokkos::OpenMP , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
|
||||
|
||||
Kokkos::OpenMP::finalize();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if ( cmdline[ CMD_USE_CUDA ] ) {
|
||||
// Use the last device:
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cmdline[ CMD_USE_CUDA_DEV ] ) );
|
||||
|
||||
run< Kokkos::Cuda , Kokkos::Example::BoxElemPart::ElemLinear >( comm , cmdline );
|
||||
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
|
||||
return cmdline[ CMD_ERROR ] ? -1 : 0 ;
|
||||
}
|
||||
|
||||
@ -1,355 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP
|
||||
#define KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP
|
||||
|
||||
#include <stdio.h>
|
||||
#include <utility>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <HexElement.hpp>
|
||||
#include <BoxElemPart.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
/** \brief Map a grid onto a unit cube with smooth nonlinear grading
|
||||
* of the map.
|
||||
*/
|
||||
struct MapGridUnitCube {
|
||||
|
||||
const float m_a ;
|
||||
const float m_b ;
|
||||
const float m_c ;
|
||||
const size_t m_max_x ;
|
||||
const size_t m_max_y ;
|
||||
const size_t m_max_z ;
|
||||
|
||||
MapGridUnitCube( const size_t grid_max_x ,
|
||||
const size_t grid_max_y ,
|
||||
const size_t grid_max_z ,
|
||||
const float bubble_x ,
|
||||
const float bubble_y ,
|
||||
const float bubble_z )
|
||||
: m_a( bubble_x )
|
||||
, m_b( bubble_y )
|
||||
, m_c( bubble_z )
|
||||
, m_max_x( grid_max_x )
|
||||
, m_max_y( grid_max_y )
|
||||
, m_max_z( grid_max_z )
|
||||
{}
|
||||
|
||||
template< typename Scalar >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int grid_x ,
|
||||
int grid_y ,
|
||||
int grid_z ,
|
||||
Scalar & coord_x ,
|
||||
Scalar & coord_y ,
|
||||
Scalar & coord_z ) const
|
||||
{
|
||||
// Map to a unit cube [0,1]^3
|
||||
|
||||
const double x = double(grid_x) / double(m_max_x);
|
||||
const double y = double(grid_y) / double(m_max_y);
|
||||
const double z = double(grid_z) / double(m_max_z);
|
||||
|
||||
coord_x = x + x * x * ( x - 1 ) * ( x - 1 ) * m_a ;
|
||||
coord_y = y + y * y * ( y - 1 ) * ( y - 1 ) * m_b ;
|
||||
coord_z = z + z * z * ( z - 1 ) * ( z - 1 ) * m_c ;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
/** \brief Generate a distributed unstructured finite element mesh
|
||||
* from a partitioned NX*NY*NZ box of elements.
|
||||
*
|
||||
* Order owned nodes first followed by off-process nodes
|
||||
* grouped by owning process.
|
||||
*/
|
||||
template< class Device ,
|
||||
BoxElemPart::ElemOrder Order ,
|
||||
class CoordinateMap = MapGridUnitCube >
|
||||
class BoxElemFixture {
|
||||
public:
|
||||
|
||||
typedef Device execution_space ;
|
||||
|
||||
enum { SpaceDim = 3 };
|
||||
enum { ElemNode = Order == BoxElemPart::ElemLinear ? 8 :
|
||||
Order == BoxElemPart::ElemQuadratic ? 27 : 0 };
|
||||
|
||||
private:
|
||||
|
||||
typedef Kokkos::Example::HexElement_TensorData< ElemNode > hex_data ;
|
||||
|
||||
Kokkos::Example::BoxElemPart m_box_part ;
|
||||
CoordinateMap m_coord_map ;
|
||||
|
||||
Kokkos::View< double *[SpaceDim] , Device > m_node_coord ;
|
||||
Kokkos::View< size_t *[SpaceDim] , Device > m_node_grid ;
|
||||
Kokkos::View< size_t *[ElemNode] , Device > m_elem_node ;
|
||||
Kokkos::View< size_t *[2] , Device > m_recv_node ;
|
||||
Kokkos::View< size_t *[2] , Device > m_send_node ;
|
||||
Kokkos::View< size_t * , Device > m_send_node_id ;
|
||||
|
||||
unsigned char m_elem_node_local[ ElemNode ][4] ;
|
||||
|
||||
public:
|
||||
|
||||
typedef Kokkos::View< const size_t * [ElemNode], Device > elem_node_type ;
|
||||
typedef Kokkos::View< const double * [SpaceDim], Device > node_coord_type ;
|
||||
typedef Kokkos::View< const size_t * [SpaceDim], Device > node_grid_type ;
|
||||
typedef Kokkos::View< const size_t * [2] , Device > comm_list_type ;
|
||||
typedef Kokkos::View< const size_t * , Device > send_nodeid_type ;
|
||||
|
||||
inline bool ok() const { return m_box_part.ok(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t node_count() const { return m_node_grid.dimension_0(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t node_count_owned() const { return m_box_part.owns_node_count(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t node_count_global() const { return m_box_part.global_node_count(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t elem_count() const { return m_elem_node.dimension_0(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t elem_count_global() const { return m_box_part.global_elem_count(); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t elem_node_local( size_t inode , int k ) const
|
||||
{ return m_elem_node_local[inode][k] ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t node_grid( size_t inode , int iaxis ) const
|
||||
{ return m_node_grid(inode,iaxis); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t node_global_index( size_t local ) const
|
||||
{
|
||||
const size_t tmp_node_grid[SpaceDim] =
|
||||
{ m_node_grid(local,0) , m_node_grid(local,1) , m_node_grid(local,2) };
|
||||
return m_box_part.global_node_id( tmp_node_grid );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double node_coord( size_t inode , int iaxis ) const
|
||||
{ return m_node_coord(inode,iaxis); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t node_grid_max( int iaxis ) const
|
||||
{ return m_box_part.global_coord_max(iaxis); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t elem_node( size_t ielem , size_t inode ) const
|
||||
{ return m_elem_node(ielem,inode); }
|
||||
|
||||
elem_node_type elem_node() const { return m_elem_node ; }
|
||||
node_coord_type node_coord() const { return m_node_coord ; }
|
||||
node_grid_type node_grid() const { return m_node_grid ; }
|
||||
comm_list_type recv_node() const { return m_recv_node ; }
|
||||
comm_list_type send_node() const { return m_send_node ; }
|
||||
send_nodeid_type send_nodeid() const { return m_send_node_id ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
BoxElemFixture( const BoxElemFixture & rhs )
|
||||
: m_box_part( rhs.m_box_part )
|
||||
, m_coord_map( rhs.m_coord_map )
|
||||
, m_node_coord( rhs.m_node_coord )
|
||||
, m_node_grid( rhs.m_node_grid )
|
||||
, m_elem_node( rhs.m_elem_node )
|
||||
, m_recv_node( rhs.m_recv_node )
|
||||
, m_send_node( rhs.m_send_node )
|
||||
, m_send_node_id( rhs.m_send_node_id )
|
||||
{
|
||||
for ( int i = 0 ; i < ElemNode ; ++i ) {
|
||||
m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ;
|
||||
m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ;
|
||||
m_elem_node_local[i][2] = rhs.m_elem_node_local[i][2] ;
|
||||
m_elem_node_local[i][3] = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
BoxElemFixture & operator = ( const BoxElemFixture & rhs )
|
||||
{
|
||||
m_box_part = rhs.m_box_part ;
|
||||
m_coord_map = rhs.m_coord_map ;
|
||||
m_node_coord = rhs.m_node_coord ;
|
||||
m_node_grid = rhs.m_node_grid ;
|
||||
m_elem_node = rhs.m_elem_node ;
|
||||
m_recv_node = rhs.m_recv_node ;
|
||||
m_send_node = rhs.m_send_node ;
|
||||
m_send_node_id = rhs.m_send_node_id ;
|
||||
|
||||
for ( int i = 0 ; i < ElemNode ; ++i ) {
|
||||
m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ;
|
||||
m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ;
|
||||
m_elem_node_local[i][2] = rhs.m_elem_node_local[i][2] ;
|
||||
m_elem_node_local[i][3] = 0 ;
|
||||
}
|
||||
return *this ;
|
||||
}
|
||||
|
||||
BoxElemFixture( const BoxElemPart::Decompose decompose ,
|
||||
const size_t global_size ,
|
||||
const size_t global_rank ,
|
||||
const size_t elem_nx ,
|
||||
const size_t elem_ny ,
|
||||
const size_t elem_nz ,
|
||||
const float bubble_x = 1.1f ,
|
||||
const float bubble_y = 1.2f ,
|
||||
const float bubble_z = 1.3f )
|
||||
: m_box_part( Order , decompose , global_size , global_rank , elem_nx , elem_ny , elem_nz )
|
||||
, m_coord_map( m_box_part.global_coord_max(0) ,
|
||||
m_box_part.global_coord_max(1) ,
|
||||
m_box_part.global_coord_max(2) ,
|
||||
bubble_x ,
|
||||
bubble_y ,
|
||||
bubble_z )
|
||||
, m_node_coord( "fixture_node_coord" , m_box_part.uses_node_count() )
|
||||
, m_node_grid( "fixture_node_grid" , m_box_part.uses_node_count() )
|
||||
, m_elem_node( "fixture_elem_node" , m_box_part.uses_elem_count() )
|
||||
, m_recv_node( "fixture_recv_node" , m_box_part.recv_node_msg_count() )
|
||||
, m_send_node( "fixture_send_node" , m_box_part.send_node_msg_count() )
|
||||
, m_send_node_id( "fixture_send_node_id" , m_box_part.send_node_id_count() )
|
||||
{
|
||||
{
|
||||
const hex_data elem_data ;
|
||||
|
||||
for ( int i = 0 ; i < ElemNode ; ++i ) {
|
||||
m_elem_node_local[i][0] = elem_data.eval_map[i][0] ;
|
||||
m_elem_node_local[i][1] = elem_data.eval_map[i][1] ;
|
||||
m_elem_node_local[i][2] = elem_data.eval_map[i][2] ;
|
||||
m_elem_node_local[i][3] = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
const size_t nwork =
|
||||
std::max( m_recv_node.dimension_0() ,
|
||||
std::max( m_send_node.dimension_0() ,
|
||||
std::max( m_send_node_id.dimension_0() ,
|
||||
std::max( m_node_grid.dimension_0() ,
|
||||
m_elem_node.dimension_0() * m_elem_node.dimension_1() ))));
|
||||
|
||||
Kokkos::parallel_for( nwork , *this );
|
||||
}
|
||||
|
||||
|
||||
// Initialization:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_t i ) const
|
||||
{
|
||||
if ( i < m_elem_node.dimension_0() * m_elem_node.dimension_1() ) {
|
||||
|
||||
const size_t ielem = i / ElemNode ;
|
||||
const size_t inode = i % ElemNode ;
|
||||
|
||||
size_t elem_grid[SpaceDim] ;
|
||||
size_t tmp_node_grid[SpaceDim] ;
|
||||
|
||||
m_box_part.uses_elem_coord( ielem , elem_grid );
|
||||
|
||||
enum { elem_node_scale = Order == BoxElemPart::ElemLinear ? 1 :
|
||||
Order == BoxElemPart::ElemQuadratic ? 2 : 0 };
|
||||
|
||||
tmp_node_grid[0] = elem_node_scale * elem_grid[0] + m_elem_node_local[inode][0] ;
|
||||
tmp_node_grid[1] = elem_node_scale * elem_grid[1] + m_elem_node_local[inode][1] ;
|
||||
tmp_node_grid[2] = elem_node_scale * elem_grid[2] + m_elem_node_local[inode][2] ;
|
||||
|
||||
m_elem_node(ielem,inode) = m_box_part.local_node_id( tmp_node_grid );
|
||||
}
|
||||
|
||||
if ( i < m_node_grid.dimension_0() ) {
|
||||
size_t tmp_node_grid[SpaceDim] ;
|
||||
m_box_part.local_node_coord( i , tmp_node_grid );
|
||||
m_node_grid(i,0) = tmp_node_grid[0] ;
|
||||
m_node_grid(i,1) = tmp_node_grid[1] ;
|
||||
m_node_grid(i,2) = tmp_node_grid[2] ;
|
||||
|
||||
m_coord_map( tmp_node_grid[0] ,
|
||||
tmp_node_grid[1] ,
|
||||
tmp_node_grid[2] ,
|
||||
m_node_coord(i,0) ,
|
||||
m_node_coord(i,1) ,
|
||||
m_node_coord(i,2) );
|
||||
}
|
||||
|
||||
if ( i < m_recv_node.dimension_0() ) {
|
||||
m_recv_node(i,0) = m_box_part.recv_node_rank(i);
|
||||
m_recv_node(i,1) = m_box_part.recv_node_count(i);
|
||||
}
|
||||
|
||||
if ( i < m_send_node.dimension_0() ) {
|
||||
m_send_node(i,0) = m_box_part.send_node_rank(i);
|
||||
m_send_node(i,1) = m_box_part.send_node_count(i);
|
||||
}
|
||||
|
||||
if ( i < m_send_node_id.dimension_0() ) {
|
||||
m_send_node_id(i) = m_box_part.send_node_id(i);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP */
|
||||
|
||||
@ -1,413 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <BoxElemPart.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
void box_partition( const size_t global_size ,
|
||||
const size_t global_rank ,
|
||||
const size_t global_box[][2] ,
|
||||
size_t box[][2] )
|
||||
{
|
||||
box[0][0] = global_box[0][0] ; box[0][1] = global_box[0][1] ;
|
||||
box[1][0] = global_box[1][0] ; box[1][1] = global_box[1][1] ;
|
||||
box[2][0] = global_box[2][0] ; box[2][1] = global_box[2][1] ;
|
||||
|
||||
size_t ip = 0 ;
|
||||
size_t np = global_size ;
|
||||
|
||||
while ( 1 < np ) {
|
||||
|
||||
// P = [ ip + j * portion , ip + ( j + 1 ) * portion )
|
||||
|
||||
size_t jip , jup ;
|
||||
|
||||
{
|
||||
const size_t part = ( 0 == ( np % 5 ) ) ? 5 : (
|
||||
( 0 == ( np % 3 ) ) ? 3 : 2 );
|
||||
|
||||
const size_t portion = np / part ;
|
||||
|
||||
if ( 2 < part || global_rank < ip + portion ) {
|
||||
jip = portion * size_t( double( global_rank - ip ) / double(portion) );
|
||||
jup = jip + portion ;
|
||||
}
|
||||
else {
|
||||
jip = portion ;
|
||||
jup = np ;
|
||||
}
|
||||
}
|
||||
|
||||
// Choose axis with largest count:
|
||||
|
||||
const size_t nb[3] = {
|
||||
box[0][1] - box[0][0] ,
|
||||
box[1][1] - box[1][0] ,
|
||||
box[2][1] - box[2][0] };
|
||||
|
||||
const int axis = nb[2] > nb[1] ? ( nb[2] > nb[0] ? 2 : 0 )
|
||||
: ( nb[1] > nb[0] ? 1 : 0 );
|
||||
|
||||
box[ axis ][1] = box[ axis ][0] + size_t( double(nb[axis]) * ( double(jup) / double(np) ));
|
||||
box[ axis ][0] = box[ axis ][0] + size_t( double(nb[axis]) * ( double(jip) / double(np) ));
|
||||
|
||||
np = jup - jip ;
|
||||
ip = ip + jip ;
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
void BoxElemPart::local( const size_t rank ,
|
||||
size_t uses_elem[][2] ,
|
||||
size_t owns_node[][2] ,
|
||||
size_t uses_node[][2] ) const
|
||||
{
|
||||
if ( BoxElemPart::DecomposeElem == m_decompose ) {
|
||||
|
||||
Kokkos::Example::box_partition( m_global_size , rank , m_global_elem_box , uses_elem );
|
||||
|
||||
for ( int i = 0 ; i < 3 ; ++i ) {
|
||||
owns_node[i][0] = uses_elem[i][0] ;
|
||||
owns_node[i][1] = uses_elem[i][1] + ( m_global_elem_box[i][1] == uses_elem[i][1] ? 1 : 0 );
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
const size_t global_vert[3][2] =
|
||||
{ { 0 , m_global_elem_box[0][1] + 1 },
|
||||
{ 0 , m_global_elem_box[1][1] + 1 },
|
||||
{ 0 , m_global_elem_box[2][1] + 1 } };
|
||||
|
||||
Kokkos::Example::box_partition( m_global_size , rank , global_vert , owns_node );
|
||||
|
||||
for ( int i = 0 ; i < 3 ; ++i ) {
|
||||
uses_elem[i][0] = global_vert[i][0] == owns_node[i][0] ? owns_node[i][0] : owns_node[i][0] - 1 ;
|
||||
uses_elem[i][1] = global_vert[i][1] == owns_node[i][1] ? owns_node[i][1] - 1 : owns_node[i][1] ;
|
||||
}
|
||||
}
|
||||
|
||||
for ( int i = 0 ; i < 3 ; ++i ) {
|
||||
uses_node[i][0] = uses_elem[i][0] ;
|
||||
uses_node[i][1] = uses_elem[i][1] + 1 ;
|
||||
}
|
||||
|
||||
if ( BoxElemPart::ElemQuadratic == m_elem_order ) {
|
||||
for ( int i = 0 ; i < 3 ; ++i ) {
|
||||
owns_node[i][0] = 2 * owns_node[i][0] ;
|
||||
uses_node[i][0] = 2 * uses_node[i][0] ;
|
||||
owns_node[i][1] = 2 * owns_node[i][1] - 1 ;
|
||||
uses_node[i][1] = 2 * uses_node[i][1] - 1 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BoxElemPart::BoxElemPart(
|
||||
const BoxElemPart::ElemOrder elem_order ,
|
||||
const BoxElemPart::Decompose decompose ,
|
||||
const size_t global_size ,
|
||||
const size_t global_rank ,
|
||||
const size_t elem_nx ,
|
||||
const size_t elem_ny ,
|
||||
const size_t elem_nz )
|
||||
{
|
||||
m_global_size = global_size ;
|
||||
m_global_rank = global_rank ;
|
||||
m_decompose = decompose ;
|
||||
m_elem_order = elem_order ;
|
||||
|
||||
m_global_elem_box[0][0] = 0 ; m_global_elem_box[0][1] = elem_nx ;
|
||||
m_global_elem_box[1][0] = 0 ; m_global_elem_box[1][1] = elem_ny ;
|
||||
m_global_elem_box[2][0] = 0 ; m_global_elem_box[2][1] = elem_nz ;
|
||||
|
||||
m_global_node_box[0][0] = 0 ; m_global_node_box[0][1] = 0 ;
|
||||
m_global_node_box[1][0] = 0 ; m_global_node_box[1][1] = 0 ;
|
||||
m_global_node_box[2][0] = 0 ; m_global_node_box[2][1] = 0 ;
|
||||
|
||||
m_owns_node_count = 0 ;
|
||||
m_send_node_count = 0 ;
|
||||
|
||||
m_ok = true ;
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
if ( ElemLinear == elem_order ) {
|
||||
m_global_node_box[0][1] = elem_nx + 1 ;
|
||||
m_global_node_box[1][1] = elem_ny + 1 ;
|
||||
m_global_node_box[2][1] = elem_nz + 1 ;
|
||||
}
|
||||
else if ( ElemQuadratic == elem_order ) {
|
||||
m_global_node_box[0][1] = 2 * elem_nx + 1 ;
|
||||
m_global_node_box[1][1] = 2 * elem_ny + 1 ;
|
||||
m_global_node_box[2][1] = 2 * elem_nz + 1 ;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
local( m_global_rank , m_uses_elem_box , m_owns_node_box[0] , m_uses_node_box );
|
||||
|
||||
const size_t global_node_count_ = Kokkos::Example::box_count( m_global_node_box );
|
||||
const size_t global_elem_count_ = Kokkos::Example::box_count( m_global_elem_box );
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
size_t elem_count = Kokkos::Example::box_count( m_uses_elem_box );
|
||||
size_t node_count = Kokkos::Example::box_count( m_owns_node_box[0] );
|
||||
|
||||
m_owns_node[0][0] = global_rank ;
|
||||
m_owns_node[0][1] = node_count ;
|
||||
m_owns_node_count = 1 ;
|
||||
m_send_node_count = 0 ;
|
||||
|
||||
for ( size_t rr = 1 ; rr < m_global_size && m_ok ; ++rr ) {
|
||||
|
||||
const size_t rank = ( m_global_rank + rr ) % m_global_size ;
|
||||
|
||||
size_t elem_box[3][2] , o_node_box[3][2] , u_node_box[3][2] ;
|
||||
|
||||
// Boxes for process 'rank'
|
||||
local( rank , elem_box , o_node_box , u_node_box );
|
||||
|
||||
// Box that this process uses but is owned by process 'rank'
|
||||
Kokkos::Example::box_intersect( m_owns_node_box[ m_owns_node_count ] , m_uses_node_box , o_node_box );
|
||||
|
||||
m_owns_node[ m_owns_node_count ][1] = Kokkos::Example::box_count( m_owns_node_box[ m_owns_node_count ] );
|
||||
|
||||
if ( m_owns_node[ m_owns_node_count ][1] ) {
|
||||
|
||||
if ( ( PROC_NEIGH_MAX - 1 ) <= m_owns_node_count ) {
|
||||
std::cout << "BoxElemPart exceeded maximum neighbor count" << std::endl ;
|
||||
m_ok = false ;
|
||||
break ;
|
||||
}
|
||||
|
||||
m_owns_node[ m_owns_node_count ][0] = rank ;
|
||||
|
||||
++m_owns_node_count ;
|
||||
}
|
||||
|
||||
// Box that this process owns and is used by process 'rank'
|
||||
Kokkos::Example::box_intersect( m_send_node_box[ m_send_node_count ] , m_owns_node_box[0] , u_node_box );
|
||||
|
||||
m_send_node[ m_send_node_count ][1] = Kokkos::Example::box_count( m_send_node_box[ m_send_node_count ] );
|
||||
|
||||
if ( m_send_node[ m_send_node_count ][1] ) {
|
||||
|
||||
if ( ( PROC_NEIGH_MAX - 1 ) <= m_send_node_count ) {
|
||||
std::cout << "BoxElemPart exceeded maximum neighbor count" << std::endl ;
|
||||
m_ok = false ;
|
||||
break ;
|
||||
}
|
||||
|
||||
m_send_node[ m_send_node_count ][0] = rank ;
|
||||
++m_send_node_count ;
|
||||
}
|
||||
|
||||
// Error checking:
|
||||
|
||||
size_t test_box[3][2] ;
|
||||
|
||||
elem_count += Kokkos::Example::box_count( elem_box );
|
||||
node_count += Kokkos::Example::box_count( o_node_box );
|
||||
|
||||
{
|
||||
Kokkos::Example::box_intersect( test_box , m_owns_node_box[0] , o_node_box );
|
||||
|
||||
if ( Kokkos::Example::box_count( test_box ) ) {
|
||||
std::cout << "Box partitioning error" << std::endl ;
|
||||
std::cout << "owns_node[" << m_global_rank << "]{"
|
||||
<< " [" << m_owns_node_box[0][0][0] << "," << m_owns_node_box[0][0][1] << ")"
|
||||
<< " [" << m_owns_node_box[0][1][0] << "," << m_owns_node_box[0][1][1] << ")"
|
||||
<< " [" << m_owns_node_box[0][2][0] << "," << m_owns_node_box[0][2][1] << ")"
|
||||
<< "} intersects"
|
||||
<< " owns_node[" << rank << "]{"
|
||||
<< " [" << o_node_box[0][0] << "," << o_node_box[0][1] << ")"
|
||||
<< " [" << o_node_box[1][0] << "," << o_node_box[1][1] << ")"
|
||||
<< " [" << o_node_box[2][0] << "," << o_node_box[2][1] << ")"
|
||||
<< "}" << std::endl ;
|
||||
m_ok = false ;
|
||||
break ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( DecomposeElem == decompose ) {
|
||||
|
||||
Kokkos::Example::box_intersect( test_box , m_uses_elem_box , elem_box );
|
||||
|
||||
if ( Kokkos::Example::box_count( test_box ) ) {
|
||||
std::cout << "Box partitioning error" << std::endl ;
|
||||
std::cout << "ElemBox[" << m_global_rank << "]{"
|
||||
<< " [" << m_uses_elem_box[0][0] << "," << m_uses_elem_box[0][1] << ")"
|
||||
<< " [" << m_uses_elem_box[1][0] << "," << m_uses_elem_box[1][1] << ")"
|
||||
<< " [" << m_uses_elem_box[2][0] << "," << m_uses_elem_box[2][1] << ")"
|
||||
<< "} intersects"
|
||||
<< " ElemBox[" << rank << "]{"
|
||||
<< " [" << elem_box[0][0] << "," << elem_box[0][1] << ")"
|
||||
<< " [" << elem_box[1][0] << "," << elem_box[1][1] << ")"
|
||||
<< " [" << elem_box[2][0] << "," << elem_box[2][1] << ")"
|
||||
<< "}" << std::endl ;
|
||||
m_ok = false ;
|
||||
break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sentinal values at the end of the owns and send lists:
|
||||
|
||||
m_owns_node[ m_owns_node_count ][0] = ~0u ;
|
||||
m_owns_node[ m_owns_node_count ][1] = ~0u ;
|
||||
m_owns_node_box[ m_owns_node_count ][0][0] = 0u ; m_owns_node_box[ m_owns_node_count ][0][0] = ~0u ;
|
||||
m_owns_node_box[ m_owns_node_count ][1][0] = 0u ; m_owns_node_box[ m_owns_node_count ][1][0] = ~0u ;
|
||||
m_owns_node_box[ m_owns_node_count ][2][0] = 0u ; m_owns_node_box[ m_owns_node_count ][2][0] = ~0u ;
|
||||
|
||||
m_send_node[ m_send_node_count ][0] = ~0u ;
|
||||
m_send_node[ m_send_node_count ][1] = ~0u ;
|
||||
m_send_node_box[ m_send_node_count ][0][0] = 0u ; m_send_node_box[ m_send_node_count ][0][0] = ~0u ;
|
||||
m_send_node_box[ m_send_node_count ][1][0] = 0u ; m_send_node_box[ m_send_node_count ][1][0] = ~0u ;
|
||||
m_send_node_box[ m_send_node_count ][2][0] = 0u ; m_send_node_box[ m_send_node_count ][2][0] = ~0u ;
|
||||
|
||||
{
|
||||
size_t count = 0 ;
|
||||
for ( size_t i = 0 ; i < m_owns_node_count ; ++i ) {
|
||||
count += m_owns_node[i][1] ;
|
||||
}
|
||||
if ( count != Kokkos::Example::box_count( m_uses_node_box ) ) {
|
||||
std::cout << "Node uses count = " << Kokkos::Example::box_count( m_uses_node_box )
|
||||
<< " error count = " << count << std::endl ;
|
||||
m_ok = false ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( global_node_count_ != node_count ) {
|
||||
std::cout << "Node count = " << global_node_count_ << " overlap error count = " << node_count << std::endl ;
|
||||
m_ok = false ;
|
||||
}
|
||||
|
||||
if ( DecomposeElem == decompose && global_elem_count_ != elem_count ) {
|
||||
std::cout << "Elem count = " << global_elem_count_ << " overlap error count = " << elem_count << std::endl ;
|
||||
m_ok = false ;
|
||||
}
|
||||
|
||||
if ( ! m_ok ) {
|
||||
for ( int i = 0 ; i < 3 ; ++i ) { for ( int j = 0 ; j < 2 ; ++j ) {
|
||||
m_global_elem_box[i][j] = 0 ;
|
||||
m_global_node_box[i][j] = 0 ;
|
||||
m_uses_elem_box[i][j] = 0 ;
|
||||
m_uses_node_box[i][j] = 0 ;
|
||||
}}
|
||||
m_owns_node_count = 0 ;
|
||||
m_send_node_count = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
void BoxElemPart::print( std::ostream & s ) const
|
||||
{
|
||||
s << "BoxElemPart P[" << m_global_rank << ":" << m_global_size << "]"
|
||||
<< std::endl
|
||||
<< " elem_box {"
|
||||
<< " [" << m_uses_elem_box[0][0] << "," << m_uses_elem_box[0][1] << ")"
|
||||
<< " [" << m_uses_elem_box[1][0] << "," << m_uses_elem_box[1][1] << ")"
|
||||
<< " [" << m_uses_elem_box[2][0] << "," << m_uses_elem_box[2][1] << ")"
|
||||
<< " } / {"
|
||||
<< " [" << m_global_elem_box[0][0] << "," << m_global_elem_box[0][1] << ")"
|
||||
<< " [" << m_global_elem_box[1][0] << "," << m_global_elem_box[1][1] << ")"
|
||||
<< " [" << m_global_elem_box[2][0] << "," << m_global_elem_box[2][1] << ")"
|
||||
<< " }"
|
||||
<< std::endl
|
||||
<< " node_box {"
|
||||
<< " [" << m_owns_node_box[0][0][0] << "," << m_owns_node_box[0][0][1] << ")"
|
||||
<< " [" << m_owns_node_box[0][1][0] << "," << m_owns_node_box[0][1][1] << ")"
|
||||
<< " [" << m_owns_node_box[0][2][0] << "," << m_owns_node_box[0][2][1] << ")"
|
||||
<< " } / {"
|
||||
<< " [" << m_uses_node_box[0][0] << "," << m_uses_node_box[0][1] << ")"
|
||||
<< " [" << m_uses_node_box[1][0] << "," << m_uses_node_box[1][1] << ")"
|
||||
<< " [" << m_uses_node_box[2][0] << "," << m_uses_node_box[2][1] << ")"
|
||||
<< " } / {"
|
||||
<< " [" << m_global_node_box[0][0] << "," << m_global_node_box[0][1] << ")"
|
||||
<< " [" << m_global_node_box[1][0] << "," << m_global_node_box[1][1] << ")"
|
||||
<< " [" << m_global_node_box[2][0] << "," << m_global_node_box[2][1] << ")"
|
||||
<< " }"
|
||||
<< std::endl ;
|
||||
|
||||
for ( size_t i = 1 ; i < m_owns_node_count ; ++i ) {
|
||||
s << " P[" << m_owns_node[i][0] << "]"
|
||||
<< " recv node_box {"
|
||||
<< " [" << m_owns_node_box[i][0][0] << "," << m_owns_node_box[i][0][1] << ")"
|
||||
<< " [" << m_owns_node_box[i][1][0] << "," << m_owns_node_box[i][1][1] << ")"
|
||||
<< " [" << m_owns_node_box[i][2][0] << "," << m_owns_node_box[i][2][1] << ")"
|
||||
<< " }"
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
for ( size_t i = 0 ; i < m_send_node_count ; ++i ) {
|
||||
s << " P[" << m_send_node[i][0] << "]"
|
||||
<< " send node_box {"
|
||||
<< " [" << m_send_node_box[i][0][0] << "," << m_send_node_box[i][0][1] << ")"
|
||||
<< " [" << m_send_node_box[i][1][0] << "," << m_send_node_box[i][1][1] << ")"
|
||||
<< " [" << m_send_node_box[i][2][0] << "," << m_send_node_box[i][2][1] << ")"
|
||||
<< " }"
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -1,320 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_BOXELEMPART_HPP
|
||||
#define KOKKOS_BOXELEMPART_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <ostream>
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void box_intersect( size_t box[][2] ,
|
||||
const size_t boxA[][2] ,
|
||||
const size_t boxB[][2] )
|
||||
{
|
||||
for ( int i = 0 ; i < 3 ; ++i ) {
|
||||
box[i][0] = boxA[i][0] > boxB[i][0] ? boxA[i][0] : boxB[i][0] ;
|
||||
box[i][1] = boxA[i][1] < boxB[i][1] ? boxA[i][1] : boxB[i][1] ;
|
||||
if ( box[i][0] > box[i][1] ) box[i][1] = box[i][0] ;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t box_count( const size_t box[][2] )
|
||||
{
|
||||
return size_t( box[0][1] - box[0][0] ) *
|
||||
size_t( box[1][1] - box[1][0] ) *
|
||||
size_t( box[2][1] - box[2][0] );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void box_ghost_layer( const size_t global_box[][2] ,
|
||||
const size_t local_box[][2] ,
|
||||
const size_t ghost_layer ,
|
||||
size_t ghost_box[][2] )
|
||||
{
|
||||
for ( int i = 0 ; i < 3 ; ++i ) {
|
||||
ghost_box[i][0] = global_box[i][0] + ghost_layer > local_box[i][0] ? global_box[i][0] : local_box[i][0] - ghost_layer ;
|
||||
ghost_box[i][1] = global_box[i][1] < local_box[i][1] + ghost_layer ? global_box[i][1] : local_box[i][1] + ghost_layer ;
|
||||
}
|
||||
}
|
||||
|
||||
void box_partition( const size_t global_size ,
|
||||
const size_t global_rank ,
|
||||
const size_t global_box[][2] ,
|
||||
size_t box[][2] );
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
/** \brief Partition a box of hexahedral elements among subdomains.
|
||||
*
|
||||
* Nodes are ordered locally as follows:
|
||||
* { owned_by[ this_process ] ,
|
||||
* owned_by[ neighbor_process[0] ] ,
|
||||
* owned_by[ neighbor_process[1] ] ,
|
||||
* owned_by[ neighbor_process[2] ] ,
|
||||
* ... };
|
||||
*/
|
||||
class BoxElemPart {
|
||||
public:
|
||||
|
||||
enum Decompose { DecomposeNode , DecomposeElem };
|
||||
enum ElemOrder { ElemLinear , ElemQuadratic };
|
||||
|
||||
bool ok() const { return m_ok ; }
|
||||
|
||||
BoxElemPart( const ElemOrder elem_order ,
|
||||
const Decompose decompose ,
|
||||
const size_t global_size ,
|
||||
const size_t global_rank ,
|
||||
const size_t elem_nx ,
|
||||
const size_t elem_ny ,
|
||||
const size_t elem_nz );
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t global_elem_count() const
|
||||
{ return Kokkos::Example::box_count( m_global_elem_box ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t global_node_count() const
|
||||
{ return Kokkos::Example::box_count( m_global_node_box ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t uses_elem_count() const
|
||||
{ return Kokkos::Example::box_count( m_uses_elem_box ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t owns_node_count() const
|
||||
{ return Kokkos::Example::box_count( m_owns_node_box[0] ); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t uses_node_count() const
|
||||
{ return Kokkos::Example::box_count( m_uses_node_box ); }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t uses_elem_offset( const size_t ix ,
|
||||
const size_t iy ,
|
||||
const size_t iz ) const
|
||||
{
|
||||
return size_t( ix - m_uses_elem_box[0][0] ) + size_t( m_uses_elem_box[0][1] - m_uses_elem_box[0][0] ) * (
|
||||
size_t( iy - m_uses_elem_box[1][0] ) + size_t( m_uses_elem_box[1][1] - m_uses_elem_box[1][0] ) * (
|
||||
size_t( iz - m_uses_elem_box[2][0] ) ) );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void uses_elem_coord( size_t lid , size_t c[] ) const
|
||||
{
|
||||
const size_t nx = m_uses_elem_box[0][1] - m_uses_elem_box[0][0] ;
|
||||
const size_t ny = m_uses_elem_box[1][1] - m_uses_elem_box[1][0] ;
|
||||
|
||||
c[0] = m_uses_elem_box[0][0] + lid % nx ; lid /= nx ;
|
||||
c[1] = m_uses_elem_box[1][0] + lid % ny ; lid /= ny ;
|
||||
c[2] = m_uses_elem_box[2][0] + lid ;
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t global_coord_max( size_t axis ) const
|
||||
{ return m_global_node_box[axis][1] - 1 ; }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void local_node_coord( size_t lid , size_t coord[] ) const
|
||||
{
|
||||
// Local id within an 'owns' block (has sentinal)
|
||||
size_t j = 0 ;
|
||||
while ( m_owns_node[j][1] <= lid ) { lid -= m_owns_node[j][1] ; ++j ; }
|
||||
|
||||
// Map to global coordinates:
|
||||
const size_t nx = m_owns_node_box[j][0][1] - m_owns_node_box[j][0][0] ;
|
||||
const size_t ny = m_owns_node_box[j][1][1] - m_owns_node_box[j][1][0] ;
|
||||
|
||||
coord[0] = m_owns_node_box[j][0][0] + lid % nx ; lid /= nx ;
|
||||
coord[1] = m_owns_node_box[j][1][0] + lid % ny ; lid /= ny ;
|
||||
coord[2] = m_owns_node_box[j][2][0] + lid ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t local_node_id( const size_t c[] ) const
|
||||
{
|
||||
// Find which 'owns' block and accumulate the offset of this block:
|
||||
size_t lid = 0 ;
|
||||
size_t j = 0 ;
|
||||
while ( ! ( m_owns_node_box[j][0][0] <= c[0] && c[0] < m_owns_node_box[j][0][1] &&
|
||||
m_owns_node_box[j][1][0] <= c[1] && c[1] < m_owns_node_box[j][1][1] &&
|
||||
m_owns_node_box[j][2][0] <= c[2] && c[2] < m_owns_node_box[j][2][1] ) ) {
|
||||
|
||||
lid += m_owns_node[j][1] ;
|
||||
++j ;
|
||||
}
|
||||
|
||||
// Map offset to the block plus offset within the block:
|
||||
return lid +
|
||||
size_t( c[0] - m_owns_node_box[j][0][0] ) + size_t( m_owns_node_box[j][0][1] - m_owns_node_box[j][0][0] ) * (
|
||||
size_t( c[1] - m_owns_node_box[j][1][0] ) + size_t( m_owns_node_box[j][1][1] - m_owns_node_box[j][1][0] ) * (
|
||||
size_t( c[2] - m_owns_node_box[j][2][0] ) ) );
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t global_node_id( const size_t c[] ) const
|
||||
{
|
||||
return size_t( c[0] - m_global_node_box[0][0] ) + size_t( m_global_node_box[0][1] - m_global_node_box[0][0] ) * (
|
||||
size_t( c[1] - m_global_node_box[1][0] ) + size_t( m_global_node_box[1][1] - m_global_node_box[1][0] ) * (
|
||||
size_t( c[2] - m_global_node_box[2][0] ) ) );
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t recv_node_msg_count() const { return m_owns_node_count - 1 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t recv_node_rank( size_t msg ) const { return m_owns_node[msg+1][0] ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t recv_node_count( size_t msg ) const { return m_owns_node[msg+1][1] ; }
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t send_node_msg_count() const { return m_send_node_count ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t send_node_rank( size_t msg ) const { return m_send_node[msg][0] ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t send_node_count( size_t msg ) const { return m_send_node[msg][1] ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t send_node_id_count() const
|
||||
{
|
||||
size_t count = 0 ;
|
||||
for ( size_t i = 0 ; i < m_send_node_count ; ++i ) {
|
||||
count += m_send_node[i][1] ;
|
||||
}
|
||||
return count ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
size_t send_node_id( size_t item ) const
|
||||
{
|
||||
// Find which send list this send item is in:
|
||||
size_t j = 0 ;
|
||||
while ( m_send_node[j][1] <= item ) { item -= m_send_node[j][1] ; ++j ; }
|
||||
|
||||
// Map to global coordinate:
|
||||
const size_t nx = m_send_node_box[j][0][1] - m_send_node_box[j][0][0] ;
|
||||
const size_t ny = m_send_node_box[j][1][1] - m_send_node_box[j][1][0] ;
|
||||
|
||||
size_t c[3] ;
|
||||
|
||||
c[0] = m_send_node_box[j][0][0] + item % nx ; item /= nx ;
|
||||
c[1] = m_send_node_box[j][1][0] + item % ny ; item /= ny ;
|
||||
c[2] = m_send_node_box[j][2][0] + item ;
|
||||
|
||||
// Map to local id:
|
||||
return size_t( c[0] - m_owns_node_box[0][0][0] ) + size_t( m_owns_node_box[0][0][1] - m_owns_node_box[0][0][0] ) * (
|
||||
size_t( c[1] - m_owns_node_box[0][1][0] ) + size_t( m_owns_node_box[0][1][1] - m_owns_node_box[0][1][0] ) * (
|
||||
size_t( c[2] - m_owns_node_box[0][2][0] ) ) );
|
||||
}
|
||||
|
||||
//----------------------------------------
|
||||
|
||||
void print( std::ostream & s ) const ;
|
||||
|
||||
private:
|
||||
|
||||
// Maximum number of processes in a neighborhood, including this process
|
||||
enum { PROC_NEIGH_MAX = 64 };
|
||||
|
||||
void local( const size_t rank ,
|
||||
size_t uses_elem[][2] ,
|
||||
size_t owns_node[][2] ,
|
||||
size_t uses_node[][2] ) const ;
|
||||
|
||||
size_t m_global_size ;
|
||||
size_t m_global_rank ;
|
||||
|
||||
Decompose m_decompose ;
|
||||
ElemOrder m_elem_order ;
|
||||
|
||||
size_t m_global_elem_box[3][2] ;
|
||||
size_t m_global_node_box[3][2] ;
|
||||
size_t m_uses_elem_box[3][2] ;
|
||||
size_t m_uses_node_box[3][2] ;
|
||||
|
||||
// [ processor rank , count ]
|
||||
size_t m_owns_node_box[ PROC_NEIGH_MAX ][3][2] ;
|
||||
size_t m_owns_node[ PROC_NEIGH_MAX ][2] ;
|
||||
size_t m_owns_node_count ;
|
||||
|
||||
size_t m_send_node_box[ PROC_NEIGH_MAX ][3][2] ;
|
||||
size_t m_send_node[ PROC_NEIGH_MAX ][2] ;
|
||||
size_t m_send_node_count ;
|
||||
|
||||
bool m_ok ;
|
||||
};
|
||||
|
||||
} // namespace Example
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_BOXELEMPART_HPP */
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../common)
|
||||
|
||||
SET(SOURCES_TEST Main.cpp TestFixture.cpp BoxElemPart.cpp )
|
||||
|
||||
# Automatically picks up 'kokkosexample_fixture'
|
||||
TRIBITS_ADD_EXECUTABLE_AND_TEST(
|
||||
TestFixture
|
||||
SOURCES ${SOURCES_TEST}
|
||||
)
|
||||
|
||||
@ -1,270 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_HEXELEMENT_HPP
|
||||
#define KOKKOS_HEXELEMENT_HPP
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template< unsigned NodeCount >
|
||||
class HexElement_TensorData ;
|
||||
|
||||
template< unsigned NodeCount , class Device >
|
||||
class HexElement_TensorEval ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Evaluate Hex element on interval [-1,1]^3 */
|
||||
template<>
|
||||
class HexElement_TensorData< 8 > {
|
||||
public:
|
||||
|
||||
static const unsigned element_node_count = 8 ;
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned integration_count_1d = 2 ;
|
||||
static const unsigned function_count_1d = 2 ;
|
||||
|
||||
float values_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float derivs_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float weights_1d[ integration_count_1d ];
|
||||
|
||||
unsigned char eval_map[ element_node_count ][4] ;
|
||||
|
||||
static float eval_value_1d( const unsigned jf , const float x )
|
||||
{
|
||||
return 0 == jf ? 0.5 * ( 1.0 - x ) : (
|
||||
1 == jf ? 0.5 * ( 1.0 + x ) : 0 );
|
||||
}
|
||||
|
||||
static float eval_deriv_1d( const unsigned jf , const float )
|
||||
{
|
||||
return 0 == jf ? -0.5 : (
|
||||
1 == jf ? 0.5 : 0 );
|
||||
}
|
||||
|
||||
HexElement_TensorData()
|
||||
{
|
||||
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
|
||||
{ { 0 , 0 , 0 },
|
||||
{ 1 , 0 , 0 },
|
||||
{ 1 , 1 , 0 },
|
||||
{ 0 , 1 , 0 },
|
||||
{ 0 , 0 , 1 },
|
||||
{ 1 , 0 , 1 },
|
||||
{ 1 , 1 , 1 },
|
||||
{ 0 , 1 , 1 } };
|
||||
|
||||
weights_1d[0] = 1 ;
|
||||
weights_1d[1] = 1 ;
|
||||
|
||||
const float points_1d[ integration_count_1d ] =
|
||||
{ -0.577350269 , 0.577350269 };
|
||||
|
||||
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
|
||||
eval_map[i][0] = tmp_map[i][0];
|
||||
eval_map[i][1] = tmp_map[i][1];
|
||||
eval_map[i][2] = tmp_map[i][2];
|
||||
}
|
||||
|
||||
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
|
||||
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
|
||||
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
|
||||
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
class HexElement_TensorData< 27 > {
|
||||
public:
|
||||
|
||||
static const unsigned element_node_count = 27 ;
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned integration_count_1d = 3 ;
|
||||
static const unsigned function_count_1d = 3 ;
|
||||
|
||||
float values_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float derivs_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float weights_1d[ integration_count_1d ];
|
||||
|
||||
unsigned char eval_map[ element_node_count ][4] ;
|
||||
|
||||
// sizeof(EvaluateElementHex) = 111 bytes =
|
||||
// sizeof(float) * 9 +
|
||||
// sizeof(float) * 9 +
|
||||
// sizeof(float) * 3 +
|
||||
// sizeof(char) * 27
|
||||
|
||||
static float eval_value_1d( const unsigned jf , const float p )
|
||||
{
|
||||
return 0 == jf ? 0.5 * p * ( p - 1 ) : (
|
||||
1 == jf ? 1.0 - p * p : (
|
||||
2 == jf ? 0.5 * p * ( p + 1 ) : 0 ));
|
||||
}
|
||||
|
||||
static float eval_deriv_1d( const unsigned jf , const float p )
|
||||
{
|
||||
return 0 == jf ? p - 0.5 : (
|
||||
1 == jf ? -2.0 * p : (
|
||||
2 == jf ? p + 0.5 : 0 ));
|
||||
}
|
||||
|
||||
HexElement_TensorData()
|
||||
{
|
||||
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
|
||||
{ { 0 , 0 , 0 },
|
||||
{ 2 , 0 , 0 },
|
||||
{ 2 , 2 , 0 },
|
||||
{ 0 , 2 , 0 },
|
||||
{ 0 , 0 , 2 },
|
||||
{ 2 , 0 , 2 },
|
||||
{ 2 , 2 , 2 },
|
||||
{ 0 , 2 , 2 },
|
||||
{ 1 , 0 , 0 },
|
||||
{ 2 , 1 , 0 },
|
||||
{ 1 , 2 , 0 },
|
||||
{ 0 , 1 , 0 },
|
||||
{ 0 , 0 , 1 },
|
||||
{ 2 , 0 , 1 },
|
||||
{ 2 , 2 , 1 },
|
||||
{ 0 , 2 , 1 },
|
||||
{ 1 , 0 , 2 },
|
||||
{ 2 , 1 , 2 },
|
||||
{ 1 , 2 , 2 },
|
||||
{ 0 , 1 , 2 },
|
||||
{ 1 , 1 , 1 },
|
||||
{ 1 , 1 , 0 },
|
||||
{ 1 , 1 , 2 },
|
||||
{ 0 , 1 , 1 },
|
||||
{ 2 , 1 , 1 },
|
||||
{ 1 , 0 , 1 },
|
||||
{ 1 , 2 , 1 } };
|
||||
|
||||
// Interval [-1,1]
|
||||
|
||||
weights_1d[0] = 0.555555556 ;
|
||||
weights_1d[1] = 0.888888889 ;
|
||||
weights_1d[2] = 0.555555556 ;
|
||||
|
||||
const float points_1d[3] = { -0.774596669 ,
|
||||
0.000000000 ,
|
||||
0.774596669 };
|
||||
|
||||
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
|
||||
eval_map[i][0] = tmp_map[i][0];
|
||||
eval_map[i][1] = tmp_map[i][1];
|
||||
eval_map[i][2] = tmp_map[i][2];
|
||||
}
|
||||
|
||||
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
|
||||
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
|
||||
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
|
||||
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< unsigned NodeCount >
|
||||
class HexElement_Data {
|
||||
public:
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned element_node_count = NodeCount ;
|
||||
static const unsigned integration_count = NodeCount ;
|
||||
static const unsigned function_count = NodeCount ;
|
||||
|
||||
float weights[ integration_count ] ;
|
||||
float values[ integration_count ][ function_count ];
|
||||
float gradients[ integration_count ][ spatial_dimension ][ function_count ];
|
||||
|
||||
HexElement_Data()
|
||||
{
|
||||
HexElement_TensorData< NodeCount > tensor_data ;
|
||||
|
||||
for ( unsigned ip = 0 ; ip < integration_count ; ++ip ) {
|
||||
|
||||
const unsigned ipx = tensor_data.eval_map[ip][0] ;
|
||||
const unsigned ipy = tensor_data.eval_map[ip][1] ;
|
||||
const unsigned ipz = tensor_data.eval_map[ip][2] ;
|
||||
|
||||
weights[ip] = tensor_data.weights_1d[ ipx ] *
|
||||
tensor_data.weights_1d[ ipy ] *
|
||||
tensor_data.weights_1d[ ipz ] ;
|
||||
|
||||
for ( unsigned jf = 0 ; jf < function_count ; ++jf ) {
|
||||
|
||||
const unsigned jfx = tensor_data.eval_map[jf][0] ;
|
||||
const unsigned jfy = tensor_data.eval_map[jf][1] ;
|
||||
const unsigned jfz = tensor_data.eval_map[jf][2] ;
|
||||
|
||||
values[ip][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][0][jf] = tensor_data.derivs_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][1][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.derivs_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][2][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.derivs_1d[ ipz ][ jfz ] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_HEXELEMENT_HPP */
|
||||
|
||||
|
||||
@ -1,304 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <BoxElemPart.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
template< class > void test_fixture();
|
||||
}
|
||||
}
|
||||
|
||||
int test_box( const size_t global_size
|
||||
, const size_t global_box[][2]
|
||||
, const bool print_verbose )
|
||||
{
|
||||
size_t global_count = 0 ;
|
||||
size_t global_max = 0 ;
|
||||
size_t global_min = Kokkos::Example::box_count( global_box );
|
||||
size_t global_box_max[3][2] = { { 0 , 0 } , { 0 , 0 } , { 0 , 0 } };
|
||||
size_t global_box_min[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } };
|
||||
size_t intersect_error = 0 ;
|
||||
size_t neighbor_max = 0 ;
|
||||
|
||||
for ( size_t global_rank = 0 ; global_rank < global_size ; ++global_rank ) {
|
||||
size_t box[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } };
|
||||
size_t ghost_box[3][2] ;
|
||||
size_t neighbor_count = 0 ;
|
||||
|
||||
Kokkos::Example::box_partition( global_size , global_rank , global_box , box );
|
||||
|
||||
Kokkos::Example::box_ghost_layer( global_box , box , 1 , ghost_box );
|
||||
|
||||
{
|
||||
const size_t n = Kokkos::Example::box_count( box );
|
||||
|
||||
for ( int i = 0 ; i < 3 ; ++i ) {
|
||||
if ( ( box[i][1] - box[i][0] ) < ( global_box_min[i][1] - global_box_min[i][0] ) ) {
|
||||
global_box_min[i][0] = box[i][0] ;
|
||||
global_box_min[i][1] = box[i][1] ;
|
||||
}
|
||||
if ( ( box[i][1] - box[i][0] ) > ( global_box_max[i][1] - global_box_max[i][0] ) ) {
|
||||
global_box_max[i][0] = box[i][0] ;
|
||||
global_box_max[i][1] = box[i][1] ;
|
||||
}
|
||||
}
|
||||
|
||||
global_max = std::max( global_max , n );
|
||||
global_min = std::min( global_min , n );
|
||||
global_count += n ;
|
||||
}
|
||||
|
||||
for ( size_t other_rank = 0 ; other_rank < global_size ; ++other_rank ) {
|
||||
|
||||
if ( other_rank == global_rank ) continue ;
|
||||
|
||||
size_t other_box[3][2] = { { 0 , global_box[0][1] } , { 0 , global_box[1][1] } , { 0 , global_box[2][1] } };
|
||||
size_t intersect_box[3][2] ;
|
||||
|
||||
Kokkos::Example::box_partition( global_size , other_rank , global_box , other_box );
|
||||
|
||||
Kokkos::Example::box_intersect( intersect_box , box , other_box );
|
||||
|
||||
const size_t n = Kokkos::Example::box_count( intersect_box );
|
||||
|
||||
intersect_error += n ;
|
||||
|
||||
Kokkos::Example::box_intersect( intersect_box , ghost_box , other_box );
|
||||
|
||||
neighbor_count += Kokkos::Example::box_count( intersect_box ) ? 1 : 0 ;
|
||||
|
||||
if ( n ) {
|
||||
std::cout << "box partition intersection error" << std::endl ;
|
||||
std::cout << "box = {"
|
||||
<< " [ " << box[0][0] << " , " << box[0][1] << " )"
|
||||
<< " [ " << box[1][0] << " , " << box[1][1] << " )"
|
||||
<< " [ " << box[2][0] << " , " << box[2][1] << " )"
|
||||
<< " }" << std::endl ;
|
||||
std::cout << "other_box = {"
|
||||
<< " [ " << other_box[0][0] << " , " << other_box[0][1] << " )"
|
||||
<< " [ " << other_box[1][0] << " , " << other_box[1][1] << " )"
|
||||
<< " [ " << other_box[2][0] << " , " << other_box[2][1] << " )"
|
||||
<< " }" << std::endl ;
|
||||
return 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
neighbor_max = std::max( neighbor_max , neighbor_count );
|
||||
}
|
||||
|
||||
if ( print_verbose ) {
|
||||
|
||||
std::cout << "global_part = " << global_size << std::endl ;
|
||||
std::cout << "global_box = { "
|
||||
<< " [ " << global_box[0][0] << " .. " << global_box[0][1] << " ) X"
|
||||
<< " [ " << global_box[1][0] << " .. " << global_box[1][1] << " ) X"
|
||||
<< " [ " << global_box[2][0] << " .. " << global_box[2][1] << " )"
|
||||
<< " }" << std::endl ;
|
||||
std::cout << "count( global_box ) = " << Kokkos::Example::box_count( global_box ) << std::endl ;
|
||||
std::cout << "sum partition( global_box ) = " << global_count << std::endl ;
|
||||
std::cout << "avg partition( global_box ) = " << size_t( double(global_count) / double(global_size)) << std::endl ;
|
||||
std::cout << "min partition( global_box ) = " << global_min << std::endl ;
|
||||
std::cout << "min part X ( global_box ) = [ " << global_box_min[0][0] << " .. " << global_box_min[0][1] << " )" << std::endl ;
|
||||
std::cout << "min part Y ( global_box ) = [ " << global_box_min[1][0] << " .. " << global_box_min[1][1] << " )" << std::endl ;
|
||||
std::cout << "min part Z ( global_box ) = [ " << global_box_min[2][0] << " .. " << global_box_min[2][1] << " )" << std::endl ;
|
||||
std::cout << "max partition( global_box ) = " << global_max << std::endl ;
|
||||
std::cout << "max part X ( global_box ) = [ " << global_box_max[0][0] << " .. " << global_box_max[0][1] << " )" << std::endl ;
|
||||
std::cout << "max part Y ( global_box ) = [ " << global_box_max[1][0] << " .. " << global_box_max[1][1] << " )" << std::endl ;
|
||||
std::cout << "max part Z ( global_box ) = [ " << global_box_max[2][0] << " .. " << global_box_max[2][1] << " )" << std::endl ;
|
||||
std::cout << "sum intersect( global_box ) = " << intersect_error << std::endl ;
|
||||
std::cout << "max neighbor = " << neighbor_max << std::endl ;
|
||||
}
|
||||
|
||||
return neighbor_max ;
|
||||
}
|
||||
|
||||
void test_elem()
|
||||
{
|
||||
const Kokkos::Example::BoxElemPart::Decompose
|
||||
decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ; // DecomposeElem | DecomposeNode ;
|
||||
const size_t global_size = 256 ;
|
||||
const size_t global_nx = 100 ;
|
||||
const size_t global_ny = 120 ;
|
||||
const size_t global_nz = 140 ;
|
||||
|
||||
double node_count_avg = 0 ;
|
||||
size_t node_count_max = 0 ;
|
||||
size_t node_count_min = ( global_nx + 1 ) * ( global_ny + 1 ) * ( global_nz + 1 );
|
||||
double elem_count_avg = 0 ;
|
||||
size_t elem_count_max = 0 ;
|
||||
size_t elem_count_min = global_nx * global_ny * global_nz ;
|
||||
double recv_count_avg = 0 ;
|
||||
size_t recv_count_max = 0 ;
|
||||
size_t recv_count_min = global_size ;
|
||||
double send_count_avg = 0 ;
|
||||
size_t send_count_max = 0 ;
|
||||
size_t send_count_min = global_size ;
|
||||
|
||||
for ( size_t r = 0 ; r < global_size ; ++r ) {
|
||||
const Kokkos::Example::BoxElemPart
|
||||
fixture( Kokkos::Example::BoxElemPart::ElemLinear ,
|
||||
decompose , global_size , r , global_nx , global_ny , global_nz );
|
||||
|
||||
// Print a sample:
|
||||
|
||||
// if ( r == global_size * 2 / 3 ) fixture.print( std::cout );
|
||||
|
||||
// Verify recv/send alignment:
|
||||
|
||||
{
|
||||
size_t recv_lid = fixture.owns_node_count();
|
||||
|
||||
for ( size_t i = 0 ; i < fixture.recv_node_msg_count() ; ++i ) {
|
||||
const size_t recv_rank = fixture.recv_node_rank( i );
|
||||
const size_t recv_count = fixture.recv_node_count( i );
|
||||
|
||||
const Kokkos::Example::BoxElemPart other_fixture(
|
||||
Kokkos::Example::BoxElemPart::ElemLinear ,
|
||||
decompose , global_size , recv_rank , global_nx , global_ny , global_nz );
|
||||
|
||||
size_t send_item = 0 ;
|
||||
|
||||
size_t j = 0 ;
|
||||
while ( j < other_fixture.send_node_msg_count() && other_fixture.send_node_rank(j) != r ) {
|
||||
send_item += other_fixture.send_node_count( j );
|
||||
++j ;
|
||||
}
|
||||
|
||||
if ( recv_count != other_fixture.send_node_count(j) ) {
|
||||
std::cout << "Error P[" << r << "].recv(" << recv_count << ") != "
|
||||
<< "P[" << recv_rank << "].send(" << other_fixture.send_node_count(j) << ")"
|
||||
<< std::endl ;
|
||||
}
|
||||
else {
|
||||
|
||||
for ( size_t k = 0 ; k < recv_count ; ++k , ++send_item , ++recv_lid ) {
|
||||
|
||||
const size_t send_lid = other_fixture.send_node_id( send_item );
|
||||
|
||||
size_t recv_coord[3] , send_coord[3] ;
|
||||
|
||||
fixture.local_node_coord( recv_lid , recv_coord );
|
||||
|
||||
other_fixture.local_node_coord( send_lid , send_coord );
|
||||
|
||||
if ( recv_coord[0] != send_coord[0] ||
|
||||
recv_coord[1] != send_coord[1] ||
|
||||
recv_coord[2] != send_coord[2] ) {
|
||||
std::cout << "Error P[" << r << "].recv[" << recv_lid << "]{ "
|
||||
<< recv_coord[0] << " , "
|
||||
<< recv_coord[1] << " , "
|
||||
<< recv_coord[2] << " } != "
|
||||
<< "P[" << recv_rank << "].send[" << send_lid << "]{ "
|
||||
<< send_coord[0] << " , "
|
||||
<< send_coord[1] << " , "
|
||||
<< send_coord[2] << " }"
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node_count_avg += fixture.owns_node_count();
|
||||
elem_count_avg += fixture.uses_elem_count();
|
||||
recv_count_avg += fixture.recv_node_msg_count();
|
||||
send_count_avg += fixture.send_node_msg_count();
|
||||
|
||||
elem_count_min = std::min( (size_t) fixture.uses_elem_count() , elem_count_min );
|
||||
elem_count_max = std::max( (size_t) fixture.uses_elem_count() , elem_count_max );
|
||||
node_count_min = std::min( (size_t) fixture.owns_node_count() , node_count_min );
|
||||
node_count_max = std::max( (size_t) fixture.owns_node_count() , node_count_max );
|
||||
|
||||
recv_count_max = std::max( (size_t) fixture.recv_node_msg_count() , recv_count_max );
|
||||
recv_count_min = std::min( (size_t) fixture.recv_node_msg_count() , recv_count_min );
|
||||
send_count_max = std::max( (size_t) fixture.send_node_msg_count() , send_count_max );
|
||||
send_count_min = std::min( (size_t) fixture.send_node_msg_count() , send_count_min );
|
||||
}
|
||||
|
||||
node_count_avg /= double(global_size);
|
||||
elem_count_avg /= double(global_size);
|
||||
recv_count_avg /= double(global_size);
|
||||
send_count_avg /= double(global_size);
|
||||
|
||||
std::cout << "Elem min(" << elem_count_min << ") avg(" << elem_count_avg << ") max(" << elem_count_max << ") " << std::endl
|
||||
<< "Node min(" << node_count_min << ") avg(" << node_count_avg << ") max(" << node_count_max << ") " << std::endl
|
||||
<< "Recv min(" << recv_count_min << ") avg(" << recv_count_avg << ") max(" << recv_count_max << ") " << std::endl
|
||||
<< "Send min(" << send_count_min << ") avg(" << send_count_avg << ") max(" << send_count_max << ") " << std::endl
|
||||
;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
for ( int i = 1 ; i <= 32 ; ++i ) {
|
||||
const size_t global_size = 16 * i ;
|
||||
const size_t global_box[3][2] = { { 0 , 65 } , { 0 , 65 } , { 0 , 65 } };
|
||||
if ( 30 < test_box( global_size , global_box , false ) ) {
|
||||
test_box( global_size , global_box , true );
|
||||
}
|
||||
}
|
||||
|
||||
// test_elem();
|
||||
|
||||
{
|
||||
std::cout << "test_fixture< Host >" << std::endl ;
|
||||
Kokkos::HostSpace::execution_space::initialize( 1 );
|
||||
Kokkos::Example::test_fixture< Kokkos::HostSpace::execution_space >();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
{
|
||||
std::cout << "test_fixture< Cuda >" << std::endl ;
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||
Kokkos::Example::test_fixture< Kokkos::Cuda >();
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1,49 +0,0 @@
|
||||
KOKKOS_PATH = ../..
|
||||
|
||||
vpath %.cpp ${KOKKOS_PATH}/example/fixture
|
||||
|
||||
EXAMPLE_HEADERS = $(wildcard $(KOKKOS_PATH)/example/common/*.hpp ${KOKKOS_PATH}/example/fixture/*.hpp )
|
||||
|
||||
default: build_all
|
||||
echo "End Build"
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS ?= -O3
|
||||
LINK = $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
else
|
||||
CXX ?= g++
|
||||
CXXFLAGS ?= -O3
|
||||
LINK ?= $(CXX)
|
||||
LDFLAGS ?= -lpthread
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += \
|
||||
-I${KOKKOS_PATH}/example/common \
|
||||
-I${KOKKOS_PATH}/example/fixture
|
||||
|
||||
OBJ_EXAMPLE_FIXTURE = Main.o TestFixture.o BoxElemPart.o
|
||||
EXE_EXAMPLE_FIXTURE = KokkosExample_Fixture
|
||||
|
||||
TARGETS = $(EXE_EXAMPLE_FIXTURE)
|
||||
|
||||
#TEST_TARGETS =
|
||||
|
||||
$(EXE_EXAMPLE_FIXTURE) : $(OBJ_EXAMPLE_FIXTURE) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_EXAMPLE_FIXTURE) $(KOKKOS_LIBS) $(LIB) -o $(EXE_EXAMPLE_FIXTURE)
|
||||
|
||||
|
||||
build_all : $(TARGETS)
|
||||
|
||||
|
||||
test : build_all
|
||||
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(EXAMPLE_HEADERS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,58 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <TestFixture.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template void test_fixture< Kokkos::HostSpace::execution_space >();
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
template void test_fixture<Kokkos::Cuda>();
|
||||
#endif
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
@ -1,156 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_EXAMPLE_TESTFIXTURE_HPP
|
||||
#define KOKKOS_EXAMPLE_TESTFIXTURE_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <BoxElemPart.hpp>
|
||||
#include <BoxElemFixture.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Example {
|
||||
|
||||
template< class Device >
|
||||
struct FixtureVerifyElemNodeCoord
|
||||
{
|
||||
typedef Device execution_space ;
|
||||
|
||||
typedef struct { size_t success , error ; } value_type ;
|
||||
|
||||
typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > FixtureType ;
|
||||
|
||||
FixtureType m_fixture ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init( value_type & update ) const { update.success = update.error = 0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join( volatile value_type & update ,
|
||||
volatile const value_type & input ) const
|
||||
{
|
||||
update.success += input.success ;
|
||||
update.error += input.error ;
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_t ielem , value_type & update ) const
|
||||
{
|
||||
unsigned node_coord[ FixtureType::ElemNode ][3] ;
|
||||
|
||||
for ( unsigned i = 0 ; i < FixtureType::ElemNode ; ++i ) {
|
||||
const unsigned node_id = m_fixture.elem_node(ielem,i);
|
||||
node_coord[i][0] = m_fixture.node_grid(node_id,0);
|
||||
node_coord[i][1] = m_fixture.node_grid(node_id,1);
|
||||
node_coord[i][2] = m_fixture.node_grid(node_id,2);
|
||||
}
|
||||
|
||||
int error = 0 ;
|
||||
for ( unsigned i = 1 ; i < FixtureType::ElemNode ; ++i ) {
|
||||
if ( node_coord[0][0] + m_fixture.elem_node_local(i,0) != node_coord[i][0] ||
|
||||
node_coord[0][1] + m_fixture.elem_node_local(i,1) != node_coord[i][1] ||
|
||||
node_coord[0][2] + m_fixture.elem_node_local(i,2) != node_coord[i][2] ) {
|
||||
error = 1 ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( error ) {
|
||||
++update.error ;
|
||||
}
|
||||
else {
|
||||
++update.success ;
|
||||
}
|
||||
}
|
||||
|
||||
FixtureVerifyElemNodeCoord( const FixtureType & f ) : m_fixture(f) {}
|
||||
};
|
||||
|
||||
|
||||
template< class Device >
|
||||
void test_fixture()
|
||||
{
|
||||
typedef Kokkos::Example::BoxElemFixture< Device , Kokkos::Example::BoxElemPart::ElemLinear > FixtureType ;
|
||||
|
||||
const Kokkos::Example::BoxElemPart::Decompose
|
||||
decompose = Kokkos::Example::BoxElemPart:: DecomposeElem ; // DecomposeElem | DecomposeNode ;
|
||||
|
||||
const unsigned global_size = 256 ;
|
||||
const unsigned global_nx = 400 ;
|
||||
const unsigned global_ny = 400 ;
|
||||
const unsigned global_nz = 400 ;
|
||||
|
||||
for ( unsigned my_rank = 0 ; my_rank < global_size ; ++my_rank ) {
|
||||
|
||||
const FixtureType fixture( decompose , global_size , my_rank , global_nx , global_ny , global_nz );
|
||||
|
||||
// Verify grid coordinates of element's nodes
|
||||
|
||||
typename FixtureVerifyElemNodeCoord<Device>::value_type result = { 0 , 0 };
|
||||
|
||||
Kokkos::parallel_reduce( fixture.elem_node().dimension_0() , FixtureVerifyElemNodeCoord<Device>( fixture ) , result );
|
||||
|
||||
if ( result.error ) {
|
||||
std::cout << "P[" << my_rank << ":" << global_size
|
||||
<< "] Fixture elem_node_coord"
|
||||
<< " success(" << result.success << ")"
|
||||
<< " error(" << result.error << ")"
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
// Check send/recv alignment
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} /* namespace Example */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #ifndef KOKKOS_EXAMPLE_TESTFIXTURE_HPP */
|
||||
|
||||
@ -1,17 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES "")
|
||||
|
||||
SET(SOURCES
|
||||
G2L_Main.cpp
|
||||
)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
global_2_local_ids
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
)
|
||||
|
||||
|
||||
@ -1,266 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#ifndef KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP
|
||||
#define KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <Kokkos_UnorderedMap.hpp>
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
// This test will simulate global ids
|
||||
|
||||
namespace G2L {
|
||||
|
||||
static const unsigned begin_id_size = 256u;
|
||||
static const unsigned end_id_size = 1u << 25;
|
||||
static const unsigned id_step = 2u;
|
||||
|
||||
//use to help generate global ids
|
||||
union helper
|
||||
{
|
||||
uint32_t word;
|
||||
uint8_t byte[4];
|
||||
};
|
||||
|
||||
|
||||
//generate a unique global id from the local id
|
||||
template <typename Device>
|
||||
struct generate_ids
|
||||
{
|
||||
typedef Device execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
|
||||
|
||||
local_id_view local_2_global;
|
||||
|
||||
generate_ids( local_id_view & ids)
|
||||
: local_2_global(ids)
|
||||
{
|
||||
Kokkos::parallel_for(local_2_global.size(), *this);
|
||||
}
|
||||
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(size_type i) const
|
||||
{
|
||||
|
||||
helper x = {static_cast<uint32_t>(i)};
|
||||
|
||||
// shuffle the bytes of i to create a unique, semi-random global_id
|
||||
x.word = ~x.word;
|
||||
|
||||
uint8_t tmp = x.byte[3];
|
||||
x.byte[3] = x.byte[1];
|
||||
x.byte[1] = tmp;
|
||||
|
||||
tmp = x.byte[2];
|
||||
x.byte[2] = x.byte[0];
|
||||
x.byte[0] = tmp;
|
||||
|
||||
local_2_global[i] = x.word;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// fill a map of global_id -> local_id
|
||||
template <typename Device>
|
||||
struct fill_map
|
||||
{
|
||||
typedef Device execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
|
||||
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
|
||||
|
||||
global_id_view global_2_local;
|
||||
local_id_view local_2_global;
|
||||
|
||||
fill_map( global_id_view gIds, local_id_view lIds)
|
||||
: global_2_local(gIds) , local_2_global(lIds)
|
||||
{
|
||||
Kokkos::parallel_for(local_2_global.size(), *this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(size_type i) const
|
||||
{
|
||||
global_2_local.insert( local_2_global[i], i);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// check that the global id is found and that it maps to the local id
|
||||
template <typename Device>
|
||||
struct find_test
|
||||
{
|
||||
typedef Device execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
typedef Kokkos::View<const uint32_t*,execution_space, Kokkos::MemoryRandomAccess> local_id_view;
|
||||
typedef Kokkos::UnorderedMap<const uint32_t, const size_type,execution_space> global_id_view;
|
||||
|
||||
global_id_view global_2_local;
|
||||
local_id_view local_2_global;
|
||||
|
||||
typedef size_t value_type;
|
||||
|
||||
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
|
||||
: global_2_local(gIds) , local_2_global(lIds)
|
||||
{
|
||||
Kokkos::parallel_reduce(local_2_global.size(), *this, num_errors);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void init(value_type & v) const
|
||||
{ v = 0; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void join(volatile value_type & dst, volatile value_type const & src) const
|
||||
{ dst += src; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(size_type i, value_type & num_errors) const
|
||||
{
|
||||
uint32_t index = global_2_local.find( local_2_global[i] );
|
||||
|
||||
if ( !global_2_local.valid_at(index)
|
||||
|| global_2_local.key_at(index) != local_2_global[i]
|
||||
|| global_2_local.value_at(index) != i)
|
||||
++num_errors;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// run test
|
||||
template <typename Device>
|
||||
size_t test_global_to_local_ids(unsigned num_ids, unsigned capacity, unsigned num_find_iterations)
|
||||
{
|
||||
|
||||
typedef Device execution_space;
|
||||
typedef typename execution_space::size_type size_type;
|
||||
|
||||
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
|
||||
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
|
||||
|
||||
double elasped_time = 0;
|
||||
Kokkos::Impl::Timer timer;
|
||||
|
||||
local_id_view local_2_global("local_ids", num_ids);
|
||||
global_id_view global_2_local(capacity);
|
||||
|
||||
int shiftw = 15;
|
||||
|
||||
//create
|
||||
elasped_time = timer.seconds();
|
||||
std::cout << std::setw(shiftw) << "allocate: " << elasped_time << std::endl;
|
||||
timer.reset();
|
||||
|
||||
// generate unique ids
|
||||
{
|
||||
generate_ids<Device> gen(local_2_global);
|
||||
}
|
||||
|
||||
// generate
|
||||
elasped_time = timer.seconds();
|
||||
std::cout << std::setw(shiftw) << "generate: " << elasped_time << std::endl;
|
||||
timer.reset();
|
||||
|
||||
{
|
||||
fill_map<Device> fill(global_2_local, local_2_global);
|
||||
}
|
||||
|
||||
// fill
|
||||
elasped_time = timer.seconds();
|
||||
std::cout << std::setw(shiftw) << "fill: " << elasped_time << std::endl;
|
||||
timer.reset();
|
||||
|
||||
|
||||
size_t num_errors = global_2_local.failed_insert();
|
||||
|
||||
if (num_errors == 0u) {
|
||||
for (unsigned i=0; i<num_find_iterations; ++i)
|
||||
{
|
||||
find_test<Device> find(global_2_local, local_2_global,num_errors);
|
||||
}
|
||||
|
||||
// find
|
||||
elasped_time = timer.seconds();
|
||||
std::cout << std::setw(shiftw) << "lookup: " << elasped_time << std::endl;
|
||||
}
|
||||
else {
|
||||
std::cout << " !!! Fill Failed !!!" << std::endl;
|
||||
}
|
||||
|
||||
return num_errors;
|
||||
}
|
||||
|
||||
template <typename Device>
|
||||
size_t run_test(unsigned num_ids, unsigned num_find_iterations)
|
||||
{
|
||||
// expect to fail
|
||||
unsigned capacity = (num_ids*2u)/3u;
|
||||
std::cout << " 66% of needed capacity (should fail)" << std::endl;
|
||||
test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations);
|
||||
|
||||
//should not fail
|
||||
std::cout << " 100% of needed capacity" << std::endl;
|
||||
capacity = num_ids;
|
||||
size_t num_errors = test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations);
|
||||
|
||||
//should not fail
|
||||
std::cout << " 150% of needed capacity" << std::endl;
|
||||
capacity = (num_ids*3u)/2u;
|
||||
num_errors += test_global_to_local_ids<Device>(num_ids, capacity, num_find_iterations);
|
||||
|
||||
return num_errors;
|
||||
}
|
||||
|
||||
|
||||
} // namespace G2L
|
||||
|
||||
|
||||
#endif //KOKKOS_GLOBAL_TO_LOCAL_IDS_HPP
|
||||
|
||||
@ -1,149 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <G2L.hpp>
|
||||
|
||||
namespace G2L {
|
||||
|
||||
size_t run_serial(unsigned num_ids, unsigned num_find_iterations)
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_SERIAL
|
||||
std::cout << "Serial" << std::endl;
|
||||
return run_test<Kokkos::Serial>(num_ids,num_find_iterations);
|
||||
#else
|
||||
return 0;
|
||||
#endif // KOKKOS_HAVE_SERIAL
|
||||
}
|
||||
|
||||
size_t run_threads(unsigned num_ids, unsigned num_find_iterations)
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_PTHREAD
|
||||
std::cout << "Threads" << std::endl;
|
||||
return run_test<Kokkos::Threads>(num_ids,num_find_iterations);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t run_openmp(unsigned num_ids, unsigned num_find_iterations)
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
std::cout << "OpenMP" << std::endl;
|
||||
return run_test<Kokkos::OpenMP>(num_ids,num_find_iterations);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t run_cuda(unsigned num_ids, unsigned num_find_iterations)
|
||||
{
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
std::cout << "Cuda" << std::endl;
|
||||
return run_test<Kokkos::Cuda>(num_ids,num_find_iterations);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace G2L
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
unsigned num_ids = 100000;
|
||||
unsigned num_find_iterations = 1000;
|
||||
|
||||
if (argc == 3) {
|
||||
num_ids = atoi(argv[1]);
|
||||
num_find_iterations = atoi(argv[2]);
|
||||
}
|
||||
else if (argc != 1) {
|
||||
std::cout << argv[0] << " num_ids num_find_iterations" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// query the topology of the host
|
||||
unsigned threads_count = 4 ;
|
||||
|
||||
if (Kokkos::hwloc::available()) {
|
||||
threads_count = Kokkos::hwloc::get_available_numa_count() *
|
||||
Kokkos::hwloc::get_available_cores_per_numa() *
|
||||
Kokkos::hwloc::get_available_threads_per_core();
|
||||
|
||||
}
|
||||
|
||||
std::cout << "Threads: " << threads_count << std::endl;
|
||||
std::cout << "Number of ids: " << num_ids << std::endl;
|
||||
std::cout << "Number of find iterations: " << num_find_iterations << std::endl;
|
||||
|
||||
size_t num_errors = 0;
|
||||
|
||||
num_errors += G2L::run_serial(num_ids,num_find_iterations);
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::HostSpace::execution_space::initialize(threads_count);
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||
num_errors += G2L::run_cuda(num_ids,num_find_iterations);
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_PTHREAD
|
||||
Kokkos::Threads::initialize( threads_count );
|
||||
num_errors += G2L::run_threads(num_ids,num_find_iterations);
|
||||
Kokkos::Threads::finalize();
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_HAVE_OPENMP
|
||||
Kokkos::OpenMP::initialize( threads_count );
|
||||
num_errors += G2L::run_openmp(num_ids,num_find_iterations);
|
||||
Kokkos::OpenMP::finalize();
|
||||
#endif
|
||||
|
||||
|
||||
return num_errors;
|
||||
}
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
KOKKOS_PATH ?= ../..
|
||||
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
SRC_DIR := $(dir $(MAKEFILE_PATH))
|
||||
|
||||
SRC = $(wildcard $(SRC_DIR)/*.cpp)
|
||||
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
|
||||
#SRC = $(wildcard *.cpp)
|
||||
#OBJ = $(SRC:%.cpp=%.o)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
# use installed Makefile.kokkos
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
#KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "OpenMP"
|
||||
#KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
LIB =
|
||||
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean:
|
||||
rm -f *.a *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,14 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE(GLOB SOURCES *.cpp)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
grow_array
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
)
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
KOKKOS_PATH ?= ../..
|
||||
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
SRC_DIR := $(dir $(MAKEFILE_PATH))
|
||||
|
||||
SRC = $(wildcard $(SRC_DIR)/*.cpp)
|
||||
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
|
||||
#SRC = $(wildcard *.cpp)
|
||||
#OBJ = $(SRC:%.cpp=%.o)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
# use installed Makefile.kokkos
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
#KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "OpenMP"
|
||||
#KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
LIB =
|
||||
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean:
|
||||
rm -f *.a *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,257 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef EXAMPLE_GROW_ARRAY
|
||||
#define EXAMPLE_GROW_ARRAY
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#if defined(KOKKOS_HAVE_CUDA)
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
#endif
|
||||
|
||||
namespace Example {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ExecSpace >
|
||||
struct SortView {
|
||||
|
||||
template< typename ValueType >
|
||||
SortView( const Kokkos::View<ValueType*,ExecSpace> v , int begin , int end )
|
||||
{
|
||||
std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end );
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(KOKKOS_HAVE_CUDA)
|
||||
template<>
|
||||
struct SortView< Kokkos::Cuda > {
|
||||
template< typename ValueType >
|
||||
SortView( const Kokkos::View<ValueType*,Kokkos::Cuda> v , int begin , int end )
|
||||
{
|
||||
thrust::sort( thrust::device_ptr<ValueType>( v.ptr_on_device() + begin )
|
||||
, thrust::device_ptr<ValueType>( v.ptr_on_device() + end ) );
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class ExecSpace >
|
||||
struct GrowArrayFunctor {
|
||||
|
||||
typedef ExecSpace execution_space ;
|
||||
|
||||
enum { SHIFT = sizeof(int) == 8 ? 6 : 5 }; // 8 or 4 byte int
|
||||
enum { MASK = ( 1 << SHIFT ) - 1 };
|
||||
|
||||
const Kokkos::View<int*,ExecSpace> m_search_flags ; // bit flags for values to append
|
||||
const Kokkos::View<int*,ExecSpace> m_search_array ; // array to append values
|
||||
const Kokkos::View<int,ExecSpace> m_search_count ; // offset
|
||||
const int m_search_total ;
|
||||
const int m_search_team_chunk ;
|
||||
|
||||
GrowArrayFunctor( int array_length , int search_length , int print = 1 )
|
||||
: m_search_flags( "flags" , ( search_length + MASK ) >> SHIFT ) // One bit per search entry
|
||||
, m_search_array( "array" , array_length )
|
||||
, m_search_count( "count" )
|
||||
, m_search_total( search_length )
|
||||
, m_search_team_chunk( 2048 )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
bool flag_is_set( const int index ) const
|
||||
{
|
||||
// 64 or 32 bit integer:
|
||||
|
||||
const int j = index >> SHIFT ; // which integer flag
|
||||
const int k = 1 << ( index & MASK ); // which bit in that integer
|
||||
const int s = ( j < int(m_search_flags.dimension_0()) ) && ( 0 != ( m_search_flags(j) & k ) );
|
||||
|
||||
return s ;
|
||||
}
|
||||
|
||||
typedef typename Kokkos::TeamPolicy<ExecSpace>::member_type team_member ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const team_member & member ) const
|
||||
{
|
||||
enum { LOCAL_BUFFER_LENGTH = 16 };
|
||||
|
||||
int local_buffer[ LOCAL_BUFFER_LENGTH ] ;
|
||||
int local_count = 0 ;
|
||||
|
||||
// Each team searches 'm_search_team_chunk' indices.
|
||||
// The threads of a team must iterate together because all
|
||||
// threads in the team must call 'team_scan' to prevent deadlock in the team.
|
||||
|
||||
int search_team_begin = member.league_rank() * m_search_team_chunk ;
|
||||
const int search_team_end = search_team_begin + m_search_team_chunk ;
|
||||
|
||||
int k = 0 ;
|
||||
|
||||
while ( search_team_begin < search_team_end ) {
|
||||
|
||||
// This iteration searches [ search_team_begin .. search_team_begin + member.team_size() ]
|
||||
const int thread_search_index = search_team_begin + member.team_rank();
|
||||
|
||||
// If this thread's search index is in the range
|
||||
// and the flag is set, push into this thread's local buffer.
|
||||
if ( thread_search_index < m_search_total && flag_is_set(thread_search_index) ) {
|
||||
local_buffer[ local_count ] = thread_search_index ;
|
||||
++local_count ;
|
||||
}
|
||||
|
||||
// Move the team's search range forward
|
||||
search_team_begin += member.team_size(); // Striding team by team size
|
||||
|
||||
// Count number of times a thread's buffer might have grown:
|
||||
++k ;
|
||||
|
||||
// Write buffer if end of search or a thread might have filled its buffer.
|
||||
if ( k == LOCAL_BUFFER_LENGTH /* A thread in my team might have filled its buffer */ ||
|
||||
! ( search_team_begin < search_team_end ) /* Team is at the end of its search */ ) {
|
||||
|
||||
// Team's exclusive scan of threads' contributions, with global offset.
|
||||
// This thread writes its buffer into [ team_offset .. team_offset + local_count )
|
||||
const int team_offset = member.team_scan( local_count , & *m_search_count );
|
||||
|
||||
// Copy locally buffered entries into global array:
|
||||
for ( int i = 0 ; i < local_count ; ++i ) {
|
||||
m_search_array( team_offset + i ) = local_buffer[i] ;
|
||||
}
|
||||
|
||||
k = 0 ;
|
||||
local_count = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template< class ExecSpace >
|
||||
void grow_array( int array_length , int search_length , int print = 1 )
|
||||
{
|
||||
typedef GrowArrayFunctor< ExecSpace > FunctorType ;
|
||||
|
||||
FunctorType functor( array_length , search_length , print );
|
||||
|
||||
typename Kokkos::View<int,ExecSpace>::HostMirror count = Kokkos::create_mirror_view( functor.m_search_count );
|
||||
typename Kokkos::View<int*,ExecSpace>::HostMirror flags = Kokkos::create_mirror_view( functor.m_search_flags );
|
||||
|
||||
// Set at most 'array_length' random bits over the search length.
|
||||
for ( int i = 0 ; i < array_length ; ++i ) {
|
||||
// 'lrand48()' generates random number between [0..2^31]
|
||||
// index = ( lrand48() * search_length ) / ( 2^31 )
|
||||
const long int index = ( lrand48() * search_length ) >> 31 ;
|
||||
// set the bit within the flags:
|
||||
flags( index >> FunctorType::SHIFT ) |= ( 1 << ( index & FunctorType::MASK ) );
|
||||
}
|
||||
|
||||
Kokkos::deep_copy( functor.m_search_flags , flags );
|
||||
|
||||
// Each team works on 'functor.m_search_team_chunk' span of the search_length
|
||||
Kokkos::TeamPolicy< ExecSpace >
|
||||
work( /* #teams */ ( search_length + functor.m_search_team_chunk - 1 ) / functor.m_search_team_chunk
|
||||
, /* threads/team */ Kokkos::TeamPolicy< ExecSpace >::team_size_max( functor ) );
|
||||
|
||||
// Fill array:
|
||||
Kokkos::parallel_for( work , functor );
|
||||
|
||||
// How much was filled:
|
||||
Kokkos::deep_copy( count , functor.m_search_count );
|
||||
|
||||
// Sort array:
|
||||
SortView< ExecSpace >( functor.m_search_array , 0 , *count );
|
||||
|
||||
// Mirror the results:
|
||||
typename Kokkos::View<int*,ExecSpace>::HostMirror results = Kokkos::create_mirror_view( functor.m_search_array );
|
||||
Kokkos::deep_copy( results , functor.m_search_array );
|
||||
|
||||
// Verify results:
|
||||
int result_error_count = 0 ;
|
||||
int flags_error_count = 0 ;
|
||||
for ( int i = 0 ; i < *count ; ++i ) {
|
||||
const int index = results(i);
|
||||
const int entry = index >> FunctorType::SHIFT ;
|
||||
const int bit = 1 << ( index & FunctorType::MASK );
|
||||
const bool flag = 0 != ( flags( entry ) & bit );
|
||||
if ( ! flag ) {
|
||||
if ( print ) std::cerr << "result( " << i << " : " << index << " )";
|
||||
++result_error_count ;
|
||||
}
|
||||
flags( entry ) &= ~bit ; // Clear that verified bit
|
||||
}
|
||||
|
||||
for ( int i = 0 ; i < int(flags.dimension_0()) ; ++i ) {
|
||||
// If any uncleared bits then an error
|
||||
if ( flags(i) ) {
|
||||
if ( print ) std::cerr << "flags( " << i << " : " << flags(i) << " )" ;
|
||||
++flags_error_count ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( result_error_count || flags_error_count ) {
|
||||
std::cerr << std::endl << "Example::GrowArrayFunctor( " << array_length
|
||||
<< " , " << search_length
|
||||
<< " ) result_error_count( " << result_error_count << " )"
|
||||
<< " ) flags_error_count( " << flags_error_count << " )"
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace Example
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef EXAMPLE_GROW_ARRAY */
|
||||
|
||||
@ -1,110 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <grow_array.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
int main( int argc , char ** argv )
|
||||
{
|
||||
int num_threads = 4 ;
|
||||
int use_numa = 1 ;
|
||||
int use_core = 1 ;
|
||||
int length_array = 1000000 ;
|
||||
int span_values = 100000000 ;
|
||||
|
||||
|
||||
if ( Kokkos::hwloc::available() ) {
|
||||
use_numa = Kokkos::hwloc::get_available_numa_count();
|
||||
use_core = Kokkos::hwloc::get_available_cores_per_numa() - 1 ;
|
||||
num_threads = use_numa * use_core * Kokkos::hwloc::get_available_threads_per_core();
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_SERIAL )
|
||||
{
|
||||
std::cout << "Kokkos::Serial" << std::endl ;
|
||||
// The Serial device accepts these arguments, though it may ignore them.
|
||||
Kokkos::Serial::initialize( num_threads , use_numa , use_core );
|
||||
Example::grow_array< Kokkos::Serial >( length_array , span_values );
|
||||
Kokkos::Serial::finalize ();
|
||||
}
|
||||
#endif // defined( KOKKOS_HAVE_SERIAL )
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
{
|
||||
std::cout << "Kokkos::Threads" << std::endl ;
|
||||
Kokkos::Threads::initialize( num_threads , use_numa , use_core );
|
||||
Example::grow_array< Kokkos::Threads >( length_array , span_values );
|
||||
Kokkos::Threads::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
{
|
||||
std::cout << "Kokkos::OpenMP" << std::endl ;
|
||||
Kokkos::OpenMP::initialize( num_threads , use_numa , use_core );
|
||||
Example::grow_array< Kokkos::OpenMP >( length_array , span_values );
|
||||
Kokkos::OpenMP::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
{
|
||||
std::cout << "Kokkos::Cuda" << std::endl ;
|
||||
Kokkos::HostSpace::execution_space::initialize(1);
|
||||
Kokkos::Cuda::initialize();
|
||||
Example::grow_array< Kokkos::Cuda >( length_array , span_values );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES "")
|
||||
SET(LIBRARIES "")
|
||||
|
||||
FILE(GLOB SOURCES *.cpp )
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
md_skeleton
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
DEPLIBS ${LIBRARIES}
|
||||
)
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
KOKKOS_PATH ?= ../..
|
||||
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
SRC_DIR := $(dir $(MAKEFILE_PATH))
|
||||
|
||||
SRC = $(wildcard $(SRC_DIR)/*.cpp)
|
||||
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
|
||||
#SRC = $(wildcard *.cpp)
|
||||
#OBJ = $(SRC:%.cpp=%.o)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
# use installed Makefile.kokkos
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
#KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "OpenMP"
|
||||
#KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
LIB =
|
||||
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean:
|
||||
rm -f *.a *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
To build this example on a 2012-model Macbook Pro with NVIDIA Kepler GPU:
|
||||
|
||||
./build.cuda_std g++_osx cuda_osx 30 opt
|
||||
@ -1,192 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
/* Define values which set the max number of registers used for the Force Kernel
|
||||
* Its 32 * 2048 / (KOKKOS_CUDA_MAX_THREADS * KOKKOS_CUDA_MIN_BLOCKS)
|
||||
* Have to be set before including Kokkos header files.
|
||||
*/
|
||||
|
||||
#define KOKKOS_CUDA_MAX_THREADS 512
|
||||
#define KOKKOS_CUDA_MIN_BLOCKS 3
|
||||
|
||||
#include <system.h>
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
/* Simple Lennard Jones Force Kernel using neighborlists
|
||||
* Calculates for every pair of atoms (i,j) with distance smaller r_cut
|
||||
* f_ij = 4*epsilon * ( (sigma/r_ij)^12 - (sigma/r_ij)^6 )
|
||||
* where r_ij is the distance of atoms (i,j).
|
||||
* The force on atom i is the sum over f_ij:
|
||||
* f_i = sum_j (f_ij)
|
||||
* Neighborlists are used in order to pre calculate which atoms j are
|
||||
* close enough to i to be able to contribute. By choosing a larger neighbor
|
||||
* cutoff then the force cutoff, the neighbor list can be reused several times
|
||||
* (typically 10 - 100).
|
||||
*/
|
||||
|
||||
struct ForceFunctor {
|
||||
|
||||
typedef t_x_array::execution_space execution_space; //Device Type for running the kernel
|
||||
typedef double2 value_type; // When energy calculation is requested return energy, and virial
|
||||
|
||||
t_x_array_randomread x; //atom positions
|
||||
t_f_array f; //atom forces
|
||||
t_int_1d_const numneigh; //number of neighbors per atom
|
||||
t_neighbors_const neighbors; //neighborlist
|
||||
double cutforcesq; //force cutoff
|
||||
double epsilon; //Potential parameter
|
||||
double sigma6; //Potential parameter
|
||||
|
||||
|
||||
ForceFunctor(System s) {
|
||||
x = s.d_x;
|
||||
f = s.f;
|
||||
numneigh = s.numneigh;
|
||||
neighbors = s.neighbors;
|
||||
cutforcesq = s.force_cutsq;
|
||||
epsilon = 1.0;
|
||||
sigma6 = 1.0;
|
||||
}
|
||||
|
||||
/* Operator for not calculating energy and virial */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const {
|
||||
force<0>(i);
|
||||
}
|
||||
|
||||
/* Operator for calculating energy and virial */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i, double2 &energy_virial) const {
|
||||
double2 ev = force<1>(i);
|
||||
energy_virial.x += ev.x;
|
||||
energy_virial.y += ev.y;
|
||||
}
|
||||
|
||||
template<int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double2 force(const int &i) const
|
||||
{
|
||||
const int numneighs = numneigh[i];
|
||||
const double xtmp = x(i, 0);
|
||||
const double ytmp = x(i, 1);
|
||||
const double ztmp = x(i, 2);
|
||||
double fix = 0;
|
||||
double fiy = 0;
|
||||
double fiz = 0;
|
||||
double energy = 0;
|
||||
double virial = 0;
|
||||
|
||||
//pragma simd forces vectorization (ignoring the performance objections of the compiler)
|
||||
//give hint to compiler that fix, fiy and fiz are used for reduction only
|
||||
|
||||
#ifdef USE_SIMD
|
||||
#pragma simd reduction (+: fix,fiy,fiz,energy,virial)
|
||||
#endif
|
||||
for(int k = 0; k < numneighs; k++) {
|
||||
const int j = neighbors(i, k);
|
||||
const double delx = xtmp - x(j, 0);
|
||||
const double dely = ytmp - x(j, 1);
|
||||
const double delz = ztmp - x(j, 2);
|
||||
const double rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
//if(i==0) printf("%i %i %lf %lf\n",i,j,rsq,cutforcesq);
|
||||
if(rsq < cutforcesq) {
|
||||
const double sr2 = 1.0 / rsq;
|
||||
const double sr6 = sr2 * sr2 * sr2 * sigma6;
|
||||
const double force = 48.0 * sr6 * (sr6 - 0.5) * sr2 * epsilon;
|
||||
fix += delx * force;
|
||||
fiy += dely * force;
|
||||
fiz += delz * force;
|
||||
|
||||
if(EVFLAG) {
|
||||
energy += sr6 * (sr6 - 1.0) * epsilon;
|
||||
virial += delx * delx * force + dely * dely * force + delz * delz * force;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f(i, 0) += fix;
|
||||
f(i, 1) += fiy;
|
||||
f(i, 2) += fiz;
|
||||
|
||||
double2 energy_virial ;
|
||||
energy_virial.x = 4.0 * energy ;
|
||||
energy_virial.y = 0.5 * virial ;
|
||||
return energy_virial;
|
||||
}
|
||||
|
||||
/* init and join functions when doing the reduction to obtain energy and virial */
|
||||
|
||||
KOKKOS_FUNCTION
|
||||
static void init(volatile value_type &update) {
|
||||
update.x = update.y = 0;
|
||||
}
|
||||
KOKKOS_FUNCTION
|
||||
static void join(volatile value_type &update ,
|
||||
const volatile value_type &source) {
|
||||
update.x += source.x ;
|
||||
update.y += source.y ;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
/* Calling function */
|
||||
|
||||
double2 force(System &s,int evflag) {
|
||||
|
||||
ForceFunctor f(s);
|
||||
|
||||
double2 ev ; ev.x = 0 ; ev.y = 0 ;
|
||||
if(!evflag)
|
||||
Kokkos::parallel_for(s.nlocal,f);
|
||||
else
|
||||
Kokkos::parallel_reduce(s.nlocal,f,ev);
|
||||
|
||||
execution_space::fence();
|
||||
return ev;
|
||||
}
|
||||
|
||||
@ -1,205 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include "system.h"
|
||||
|
||||
int create_system(System &system, int nx, int ny, int nz, double rho);
|
||||
int neigh_setup(System &system);
|
||||
int neigh_build(System &system);
|
||||
double2 force(System &system,int evflag);
|
||||
|
||||
/* simple MD Skeleton which
|
||||
* - constructs a simple FCC lattice,
|
||||
* - computes a neighborlist
|
||||
* - compute LJ-Force kernel a number of times
|
||||
*/
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
printf("Running MD Skeleton\n");
|
||||
/* Thread numbers for Host */
|
||||
|
||||
int num_threads = 1;
|
||||
int teams = 1;
|
||||
int device = 0; // Default device for GPU runs
|
||||
|
||||
/* avoid unused variable warnings */
|
||||
(void)num_threads;
|
||||
(void)teams;
|
||||
(void)device;
|
||||
|
||||
/* Default value for number of force calculations */
|
||||
|
||||
int iter = 100;
|
||||
|
||||
/* Default value for system size (4*nx*ny*nz atoms)
|
||||
* nx, ny and nz are set to system_size if not specififed on commandline */
|
||||
|
||||
int system_size = 20;
|
||||
int nx = -1;
|
||||
int ny = -1;
|
||||
int nz = -1;
|
||||
|
||||
int neighbor_size = 1; // Default bin size for neighbor list construction
|
||||
|
||||
double rho = 0.8442; // Number density of the system
|
||||
double delta = 0; // Scaling factor for random offsets of atom positions
|
||||
|
||||
|
||||
/* read in command-line arguments */
|
||||
|
||||
for(int i = 0; i < argc; i++) {
|
||||
if((strcmp(argv[i], "-t") == 0) || (strcmp(argv[i], "--num_threads") == 0)) {
|
||||
num_threads = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "--teams") == 0)) {
|
||||
teams = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "-d") == 0) || (strcmp(argv[i], "--device") == 0)) {
|
||||
device = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "--delta") == 0)) {
|
||||
delta = atof(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "-i") == 0) || (strcmp(argv[i], "--iter") == 0)) {
|
||||
iter = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "-rho") == 0)) {
|
||||
rho = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "-s") == 0) || (strcmp(argv[i], "--size") == 0)) {
|
||||
system_size = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "-nx") == 0)) {
|
||||
nx = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "-ny") == 0)) {
|
||||
ny = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "-nz") == 0)) {
|
||||
nz = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if((strcmp(argv[i], "-b") == 0) || (strcmp(argv[i], "--neigh_bins") == 0)) {
|
||||
neighbor_size = atoi(argv[++i]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if( nx < 0 ) nx = system_size;
|
||||
if( ny < 0 ) ny = system_size;
|
||||
if( nz < 0 ) nz = system_size;
|
||||
|
||||
printf("-> Init Device\n");
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
Kokkos::HostSpace::execution_space::initialize(teams*num_threads);
|
||||
Kokkos::Cuda::SelectDevice select_device(device);
|
||||
Kokkos::Cuda::initialize(select_device);
|
||||
#elif defined( KOKKOS_HAVE_OPENMP )
|
||||
Kokkos::OpenMP::initialize(teams*num_threads);
|
||||
#elif defined( KOKKOS_HAVE_PTHREAD )
|
||||
Kokkos::Threads::initialize(teams*num_threads);
|
||||
#endif
|
||||
|
||||
System system;
|
||||
system.neigh_cut = 2.8;
|
||||
system.force_cut = 2.5;
|
||||
system.force_cutsq = system.force_cut*system.force_cut;
|
||||
system.delta = delta;
|
||||
|
||||
printf("-> Build system\n");
|
||||
create_system(system,nx,ny,nz,rho);
|
||||
|
||||
printf("-> Created %i atoms and %i ghost atoms\n",system.nlocal,system.nghost);
|
||||
|
||||
system.nbinx = system.box.xprd/neighbor_size+1;
|
||||
system.nbiny = system.box.yprd/neighbor_size+1;
|
||||
system.nbinz = system.box.zprd/neighbor_size+1;
|
||||
|
||||
|
||||
printf("-> Building Neighborlist\n");
|
||||
|
||||
neigh_setup(system);
|
||||
neigh_build(system);
|
||||
|
||||
double2 ev = force(system,1);
|
||||
|
||||
printf("-> Calculate Energy: %f Virial: %f\n",ev.x,ev.y);
|
||||
|
||||
printf("-> Running %i force calculations\n",iter);
|
||||
|
||||
Kokkos::Impl::Timer timer;
|
||||
|
||||
for(int i=0;i<iter;i++) {
|
||||
force(system,0);
|
||||
}
|
||||
|
||||
|
||||
double time = timer.seconds();
|
||||
printf("Time: %e s for %i iterations with %i atoms\n",time,iter,system.nlocal);
|
||||
|
||||
execution_space::finalize();
|
||||
}
|
||||
@ -1,430 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <system.h>
|
||||
#include <cstdio>
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define FACTOR 0.999
|
||||
|
||||
/* BinningFunctor puts atoms into bins of the simulation box
|
||||
* Neighborlists are then created by checking only distances of atoms
|
||||
* in adjacent bins. That makes neighborlist construction a O(N) operation.
|
||||
*/
|
||||
|
||||
struct BinningFunctor {
|
||||
typedef t_int_2d::execution_space execution_space;
|
||||
|
||||
System s;
|
||||
|
||||
int atoms_per_bin;
|
||||
|
||||
BinningFunctor(System _s): s(_s) {
|
||||
atoms_per_bin = s.bins.dimension_1();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const
|
||||
{
|
||||
const int ibin = coord2bin(s.d_x(i, 0), s.d_x(i, 1), s.d_x(i, 2));
|
||||
|
||||
const int ac = Kokkos::atomic_fetch_add(&s.bincount[ibin], 1);
|
||||
|
||||
if(ac < atoms_per_bin) {
|
||||
s.bins(ibin, ac) = i;
|
||||
} else if(s.d_resize(0) < ac) {
|
||||
s.d_resize(0) = ac;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(double x, double y, double z) const
|
||||
{
|
||||
int ix, iy, iz;
|
||||
|
||||
if(x >= s.box.xprd)
|
||||
ix = (int)((x - s.box.xprd) * s.bininvx) + s.nbinx - s.mbinxlo;
|
||||
else if(x >= 0.0)
|
||||
ix = (int)(x * s.bininvx) - s.mbinxlo;
|
||||
else
|
||||
ix = (int)(x * s.bininvx) - s.mbinxlo - 1;
|
||||
|
||||
if(y >= s.box.yprd)
|
||||
iy = (int)((y - s.box.yprd) * s.bininvy) + s.nbiny - s.mbinylo;
|
||||
else if(y >= 0.0)
|
||||
iy = (int)(y * s.bininvy) - s.mbinylo;
|
||||
else
|
||||
iy = (int)(y * s.bininvy) - s.mbinylo - 1;
|
||||
|
||||
if(z >= s.box.zprd)
|
||||
iz = (int)((z - s.box.zprd) * s.bininvz) + s.nbinz - s.mbinzlo;
|
||||
else if(z >= 0.0)
|
||||
iz = (int)(z * s.bininvz) - s.mbinzlo;
|
||||
else
|
||||
iz = (int)(z * s.bininvz) - s.mbinzlo - 1;
|
||||
|
||||
return (iz * s.mbiny * s.mbinx + iy * s.mbinx + ix + 1);
|
||||
}
|
||||
};
|
||||
|
||||
/* Build the actual neighborlist*/
|
||||
|
||||
struct BuildFunctor {
|
||||
|
||||
typedef t_int_2d::execution_space execution_space;
|
||||
|
||||
System s;
|
||||
|
||||
int maxneighs;
|
||||
BuildFunctor(System _s): s(_s) {
|
||||
maxneighs = s.neighbors.dimension_1();
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const
|
||||
{
|
||||
int n = 0;
|
||||
|
||||
const t_int_1d_const_um bincount_c = s.bincount;
|
||||
|
||||
const double xtmp = s.d_x(i, 0);
|
||||
const double ytmp = s.d_x(i, 1);
|
||||
const double ztmp = s.d_x(i, 2);
|
||||
|
||||
const int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
// loop over all bins in neighborhood (includes ibin)
|
||||
for(int k = 0; k < s.nstencil; k++) {
|
||||
const int jbin = ibin + s.d_stencil[k];
|
||||
|
||||
// get subview of jbin
|
||||
const t_int_1d_const_um loc_bin =
|
||||
Kokkos::subview(s.bins,jbin,Kokkos::ALL());
|
||||
|
||||
if(ibin == jbin)
|
||||
for(int m = 0; m < bincount_c[jbin]; m++) {
|
||||
const int j = loc_bin[m];
|
||||
|
||||
//for same bin as atom i skip j if i==j
|
||||
if (j == i) continue;
|
||||
|
||||
const double delx = xtmp - s.d_x(j, 0);
|
||||
const double dely = ytmp - s.d_x(j, 1);
|
||||
const double delz = ztmp - s.d_x(j, 2);
|
||||
const double rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq <= s.neigh_cutsq && n<maxneighs) s.neighbors(i,n++) = j;
|
||||
}
|
||||
else {
|
||||
for(int m = 0; m < bincount_c[jbin]; m++) {
|
||||
const int j = loc_bin[m];
|
||||
|
||||
const double delx = xtmp - s.d_x(j, 0);
|
||||
const double dely = ytmp - s.d_x(j, 1);
|
||||
const double delz = ztmp - s.d_x(j, 2);
|
||||
const double rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq <= s.neigh_cutsq && n<maxneighs) s.neighbors(i,n++) = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.numneigh[i] = n;
|
||||
|
||||
if(n >= maxneighs) {
|
||||
if(n >= s.d_resize(0)) s.d_resize(0) = n;
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(double x, double y, double z) const
|
||||
{
|
||||
int ix, iy, iz;
|
||||
|
||||
if(x >= s.box.xprd)
|
||||
ix = (int)((x - s.box.xprd) * s.bininvx) + s.nbinx - s.mbinxlo;
|
||||
else if(x >= 0.0)
|
||||
ix = (int)(x * s.bininvx) - s.mbinxlo;
|
||||
else
|
||||
ix = (int)(x * s.bininvx) - s.mbinxlo - 1;
|
||||
|
||||
if(y >= s.box.yprd)
|
||||
iy = (int)((y - s.box.yprd) * s.bininvy) + s.nbiny - s.mbinylo;
|
||||
else if(y >= 0.0)
|
||||
iy = (int)(y * s.bininvy) - s.mbinylo;
|
||||
else
|
||||
iy = (int)(y * s.bininvy) - s.mbinylo - 1;
|
||||
|
||||
if(z >= s.box.zprd)
|
||||
iz = (int)((z - s.box.zprd) * s.bininvz) + s.nbinz - s.mbinzlo;
|
||||
else if(z >= 0.0)
|
||||
iz = (int)(z * s.bininvz) - s.mbinzlo;
|
||||
else
|
||||
iz = (int)(z * s.bininvz) - s.mbinzlo - 1;
|
||||
|
||||
return (iz * s.mbiny * s.mbinx + iy * s.mbinx + ix + 1);
|
||||
}
|
||||
};
|
||||
|
||||
/* Reset an array to zero */
|
||||
|
||||
struct MemsetZeroFunctor {
|
||||
typedef t_x_array::execution_space execution_space ;
|
||||
void* ptr;
|
||||
KOKKOS_INLINE_FUNCTION void operator()(const int i) const {
|
||||
((int*)ptr)[i] = 0;
|
||||
}
|
||||
};
|
||||
|
||||
/* Calculate distance of two bins */
|
||||
|
||||
double bindist(System &s, int i, int j, int k)
|
||||
{
|
||||
double delx, dely, delz;
|
||||
|
||||
if(i > 0)
|
||||
delx = (i - 1) * s.binsizex;
|
||||
else if(i == 0)
|
||||
delx = 0.0;
|
||||
else
|
||||
delx = (i + 1) * s.binsizex;
|
||||
|
||||
if(j > 0)
|
||||
dely = (j - 1) * s.binsizey;
|
||||
else if(j == 0)
|
||||
dely = 0.0;
|
||||
else
|
||||
dely = (j + 1) * s.binsizey;
|
||||
|
||||
if(k > 0)
|
||||
delz = (k - 1) * s.binsizez;
|
||||
else if(k == 0)
|
||||
delz = 0.0;
|
||||
else
|
||||
delz = (k + 1) * s.binsizez;
|
||||
|
||||
return (delx * delx + dely * dely + delz * delz);
|
||||
}
|
||||
|
||||
/* Setup the neighborlist construction
|
||||
* Determine binsizes, a stencil for defining adjacency, etc.
|
||||
*/
|
||||
|
||||
void neigh_setup(System &s) {
|
||||
|
||||
s.neigh_cutsq = s.neigh_cut * s.neigh_cut;
|
||||
|
||||
/*
|
||||
c bins must evenly divide into box size,
|
||||
c becoming larger than cutneigh if necessary
|
||||
c binsize = 1/2 of cutoff is near optimal
|
||||
|
||||
if (flag == 0) {
|
||||
nbinx = 2.0 * xprd / cutneigh;
|
||||
nbiny = 2.0 * yprd / cutneigh;
|
||||
nbinz = 2.0 * zprd / cutneigh;
|
||||
if (nbinx == 0) nbinx = 1;
|
||||
if (nbiny == 0) nbiny = 1;
|
||||
if (nbinz == 0) nbinz = 1;
|
||||
}
|
||||
*/
|
||||
|
||||
s.binsizex = s.box.xprd / s.nbinx;
|
||||
s.binsizey = s.box.yprd / s.nbiny;
|
||||
s.binsizez = s.box.zprd / s.nbinz;
|
||||
s.bininvx = 1.0 / s.binsizex;
|
||||
s.bininvy = 1.0 / s.binsizey;
|
||||
s.bininvz = 1.0 / s.binsizez;
|
||||
|
||||
double coord = s.box.xlo - s.neigh_cut - SMALL * s.box.xprd;
|
||||
s.mbinxlo = static_cast<int>(coord * s.bininvx);
|
||||
|
||||
if(coord < 0.0) s.mbinxlo = s.mbinxlo - 1;
|
||||
|
||||
coord = s.box.xhi + s.neigh_cut + SMALL * s.box.xprd;
|
||||
int mbinxhi = static_cast<int>(coord * s.bininvx);
|
||||
|
||||
coord = s.box.ylo - s.neigh_cut - SMALL * s.box.yprd;
|
||||
s.mbinylo = static_cast<int>(coord * s.bininvy);
|
||||
|
||||
if(coord < 0.0) s.mbinylo = s.mbinylo - 1;
|
||||
|
||||
coord = s.box.yhi + s.neigh_cut + SMALL * s.box.yprd;
|
||||
int mbinyhi = static_cast<int>(coord * s.bininvy);
|
||||
|
||||
coord = s.box.zlo - s.neigh_cut - SMALL * s.box.zprd;
|
||||
s.mbinzlo = static_cast<int>(coord * s.bininvz);
|
||||
|
||||
if(coord < 0.0) s.mbinzlo = s.mbinzlo - 1;
|
||||
|
||||
coord = s.box.zhi + s.neigh_cut + SMALL * s.box.zprd;
|
||||
int mbinzhi = static_cast<int>(coord * s.bininvz);
|
||||
|
||||
/* extend bins by 1 in each direction to insure stencil coverage */
|
||||
|
||||
s.mbinxlo = s.mbinxlo - 1;
|
||||
mbinxhi = mbinxhi + 1;
|
||||
s.mbinx = mbinxhi - s.mbinxlo + 1;
|
||||
|
||||
s.mbinylo = s.mbinylo - 1;
|
||||
mbinyhi = mbinyhi + 1;
|
||||
s.mbiny = mbinyhi - s.mbinylo + 1;
|
||||
|
||||
s.mbinzlo = s.mbinzlo - 1;
|
||||
mbinzhi = mbinzhi + 1;
|
||||
s.mbinz = mbinzhi - s.mbinzlo + 1;
|
||||
|
||||
/*
|
||||
compute bin stencil of all bins whose closest corner to central bin
|
||||
is within neighbor cutoff
|
||||
for partial Newton (newton = 0),
|
||||
stencil is all surrounding bins including self
|
||||
for full Newton (newton = 1),
|
||||
stencil is bins to the "upper right" of central bin, does NOT include self
|
||||
next(xyz) = how far the stencil could possibly extend
|
||||
factor < 1.0 for special case of LJ benchmark so code will create
|
||||
correct-size stencil when there are 3 bins for every 5 lattice spacings
|
||||
*/
|
||||
|
||||
int nextx = static_cast<int>(s.neigh_cut * s.bininvx);
|
||||
|
||||
if(nextx * s.binsizex < FACTOR * s.neigh_cut) nextx++;
|
||||
|
||||
int nexty = static_cast<int>(s.neigh_cut * s.bininvy);
|
||||
|
||||
if(nexty * s.binsizey < FACTOR * s.neigh_cut) nexty++;
|
||||
|
||||
int nextz = static_cast<int>(s.neigh_cut * s.bininvz);
|
||||
|
||||
if(nextz * s.binsizez < FACTOR * s.neigh_cut) nextz++;
|
||||
|
||||
int nmax = (2 * nextz + 1) * (2 * nexty + 1) * (2 * nextx + 1);
|
||||
s.d_stencil = t_int_1d("stencil", nmax);
|
||||
s.h_stencil = Kokkos::create_mirror_view(s.d_stencil);
|
||||
s.nstencil = 0;
|
||||
int kstart = -nextz;
|
||||
|
||||
for(int k = kstart; k <= nextz; k++) {
|
||||
for(int j = -nexty; j <= nexty; j++) {
|
||||
for(int i = -nextx; i <= nextx; i++) {
|
||||
if(bindist(s,i, j, k) < s.neigh_cutsq) {
|
||||
s.h_stencil(s.nstencil++) = k * s.mbiny * s.mbinx + j * s.mbinx + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate neighbor arrays */
|
||||
|
||||
Kokkos::deep_copy(s.d_stencil, s.h_stencil);
|
||||
s.mbins = s.mbinx * s.mbiny * s.mbinz;
|
||||
s.bincount = t_int_1d("bincount", s.mbins);
|
||||
s.bins = t_int_2d("bins", s.mbins, 8);
|
||||
|
||||
s.neighbors = t_neighbors("neighbors",s.natoms,80);
|
||||
s.numneigh = t_int_1d("numneigh",s.natoms);
|
||||
s.d_resize = t_int_scalar("resize");
|
||||
s.h_resize = Kokkos::create_mirror_view(s.d_resize);
|
||||
}
|
||||
|
||||
|
||||
/* Build the neighborlist
|
||||
* This is a try and rerun algorithm for handling the case where the bins array
|
||||
* and the neighbors array are not big enough. So if one is too small, it will
|
||||
* reallocate and rerun the binnind algorithm or the neighborlist construction.
|
||||
*/
|
||||
|
||||
void neigh_build(System &s) {
|
||||
|
||||
/* Binning of atoms */
|
||||
|
||||
s.h_resize(0) = 1;
|
||||
|
||||
while(s.h_resize(0) > 0) {
|
||||
s.h_resize(0) = 0;
|
||||
Kokkos::deep_copy(s.d_resize, s.h_resize);
|
||||
|
||||
MemsetZeroFunctor f_zero;
|
||||
f_zero.ptr = (void*) s.bincount.ptr_on_device();
|
||||
Kokkos::parallel_for(s.mbins, f_zero);
|
||||
execution_space::fence();
|
||||
|
||||
BinningFunctor f(s);
|
||||
Kokkos::parallel_for(s.natoms, f);
|
||||
execution_space::fence();
|
||||
|
||||
/* Check if bins was large enough, if nor reallocated and rerun */
|
||||
|
||||
deep_copy(s.h_resize, s.d_resize);
|
||||
|
||||
if(s.h_resize(0)) {
|
||||
int atoms_per_bin = s.h_resize(0)+2;
|
||||
s.bins = t_int_2d("bins", s.mbins, atoms_per_bin);
|
||||
}
|
||||
}
|
||||
|
||||
/* Neighborlist construction */
|
||||
|
||||
s.h_resize(0) = 1;
|
||||
|
||||
while(s.h_resize(0)) {
|
||||
s.h_resize(0) = 0;
|
||||
|
||||
Kokkos::deep_copy(s.d_resize, s.h_resize);
|
||||
|
||||
BuildFunctor f(s);
|
||||
Kokkos::parallel_for(s.nlocal, f);
|
||||
|
||||
execution_space::fence();
|
||||
|
||||
/* Check if neighbors was large enough, if nor reallocated and rerun */
|
||||
|
||||
deep_copy(s.h_resize, s.d_resize);
|
||||
|
||||
if(s.h_resize(0)) {
|
||||
int maxneighs = s.h_resize(0) * 1.2;
|
||||
s.neighbors = t_neighbors("neighbors", s.natoms, maxneighs);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,271 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <system.h>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
/* initialize atoms on fcc lattice in parallel fashion */
|
||||
|
||||
#define MAX(a,b) (a>b?a:b)
|
||||
#define MIN(a,b) (a<b?a:b)
|
||||
|
||||
|
||||
int create_system(System &system, int nx, int ny, int nz, double rho)
|
||||
{
|
||||
/* Box Setup */
|
||||
|
||||
double lattice = pow((4.0 / rho), (1.0 / 3.0));
|
||||
system.box.xprd = nx * lattice;
|
||||
system.box.yprd = ny * lattice;
|
||||
system.box.zprd = nz * lattice;
|
||||
system.box.xlo = 0;
|
||||
system.box.ylo = 0;
|
||||
system.box.zlo = 0;
|
||||
system.box.xhi = system.box.xprd;
|
||||
system.box.yhi = system.box.yprd;
|
||||
system.box.zhi = system.box.zprd;
|
||||
|
||||
|
||||
int ghost_dist = int(system.neigh_cut/lattice) + 1;
|
||||
|
||||
/* total # of atoms */
|
||||
|
||||
system.nlocal = 4 * nx * ny * nz;
|
||||
system.nghost = 4 * (nx + 2 * ghost_dist) *
|
||||
(ny + 2 * ghost_dist) *
|
||||
(nz + 2 * ghost_dist) -
|
||||
system.nlocal;
|
||||
system.natoms = system.nlocal + system.nghost;
|
||||
|
||||
system.d_x = t_x_array("X",system.natoms);
|
||||
system.h_x = Kokkos::create_mirror_view(system.d_x);
|
||||
system.f = t_f_array("F",system.natoms);
|
||||
|
||||
/* determine loop bounds of lattice subsection that overlaps my sub-box
|
||||
insure loop bounds do not exceed nx,ny,nz */
|
||||
|
||||
double alat = pow((4.0 / rho), (1.0 / 3.0));
|
||||
int ilo = static_cast<int>(system.box.xlo / (0.5 * alat) - 1);
|
||||
int ihi = static_cast<int>(system.box.xhi / (0.5 * alat) + 1);
|
||||
int jlo = static_cast<int>(system.box.ylo / (0.5 * alat) - 1);
|
||||
int jhi = static_cast<int>(system.box.yhi / (0.5 * alat) + 1);
|
||||
int klo = static_cast<int>(system.box.zlo / (0.5 * alat) - 1);
|
||||
int khi = static_cast<int>(system.box.zhi / (0.5 * alat) + 1);
|
||||
|
||||
ilo = MAX(ilo, 0);
|
||||
ihi = MIN(ihi, 2 * nx - 1);
|
||||
jlo = MAX(jlo, 0);
|
||||
jhi = MIN(jhi, 2 * ny - 1);
|
||||
klo = MAX(klo, 0);
|
||||
khi = MIN(khi, 2 * nz - 1);
|
||||
|
||||
|
||||
|
||||
/* generates positions of atoms on fcc sublattice*/
|
||||
|
||||
srand(3718273);
|
||||
/* create non-ghost atoms */
|
||||
{
|
||||
double xtmp, ytmp, ztmp;
|
||||
int sx = 0;
|
||||
int sy = 0;
|
||||
int sz = 0;
|
||||
int ox = 0;
|
||||
int oy = 0;
|
||||
int oz = 0;
|
||||
int subboxdim = 8;
|
||||
|
||||
int n = 0;
|
||||
int iflag = 0;
|
||||
|
||||
while(oz * subboxdim <= khi) {
|
||||
const int k = oz * subboxdim + sz;
|
||||
const int j = oy * subboxdim + sy;
|
||||
const int i = ox * subboxdim + sx;
|
||||
|
||||
if(iflag) continue;
|
||||
|
||||
if(((i + j + k) % 2 == 0) &&
|
||||
(i >= ilo) && (i <= ihi) &&
|
||||
(j >= jlo) && (j <= jhi) &&
|
||||
(k >= klo) && (k <= khi)) {
|
||||
|
||||
const int nold = n;
|
||||
while(nold == n) {
|
||||
xtmp = 0.5 * alat * i + system.delta/1000*(rand()%1000-500);
|
||||
ytmp = 0.5 * alat * j + system.delta/1000*(rand()%1000-500);
|
||||
ztmp = 0.5 * alat * k + system.delta/1000*(rand()%1000-500);
|
||||
|
||||
if(xtmp >= system.box.xlo && xtmp < system.box.xhi &&
|
||||
ytmp >= system.box.ylo && ytmp < system.box.yhi &&
|
||||
ztmp >= system.box.zlo && ztmp < system.box.zhi) {
|
||||
system.h_x(n,0) = xtmp;
|
||||
system.h_x(n,1) = ytmp;
|
||||
system.h_x(n,2) = ztmp;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sx++;
|
||||
|
||||
if(sx == subboxdim) {
|
||||
sx = 0;
|
||||
sy++;
|
||||
}
|
||||
|
||||
if(sy == subboxdim) {
|
||||
sy = 0;
|
||||
sz++;
|
||||
}
|
||||
|
||||
if(sz == subboxdim) {
|
||||
sz = 0;
|
||||
ox++;
|
||||
}
|
||||
|
||||
if(ox * subboxdim > ihi) {
|
||||
ox = 0;
|
||||
oy++;
|
||||
}
|
||||
|
||||
if(oy * subboxdim > jhi) {
|
||||
oy = 0;
|
||||
oz++;
|
||||
}
|
||||
}
|
||||
|
||||
/* check that correct # of atoms were created */
|
||||
|
||||
if(system.nlocal != n) {
|
||||
printf("Created incorrect # of atoms\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* create ghost atoms */
|
||||
|
||||
{
|
||||
double xtmp, ytmp, ztmp;
|
||||
|
||||
int ilo_g = ilo - 2 * ghost_dist;
|
||||
int jlo_g = jlo - 2 * ghost_dist;
|
||||
int klo_g = klo - 2 * ghost_dist;
|
||||
int ihi_g = ihi + 2 * ghost_dist;
|
||||
int jhi_g = jhi + 2 * ghost_dist;
|
||||
int khi_g = khi + 2 * ghost_dist;
|
||||
|
||||
int subboxdim = 8;
|
||||
int sx = 0;
|
||||
int sy = 0;
|
||||
int sz = 0;
|
||||
int ox = subboxdim * ilo_g;
|
||||
int oy = subboxdim * jlo_g;
|
||||
int oz = subboxdim * klo_g;
|
||||
|
||||
int n = system.nlocal;
|
||||
int iflag = 0;
|
||||
|
||||
|
||||
while(oz * subboxdim <= khi_g) {
|
||||
const int k = oz * subboxdim + sz;
|
||||
const int j = oy * subboxdim + sy;
|
||||
const int i = ox * subboxdim + sx;
|
||||
|
||||
if(iflag) continue;
|
||||
|
||||
if(((i + j + k) % 2 == 0) &&
|
||||
(i >= ilo_g) && (i <= ihi_g) &&
|
||||
(j >= jlo_g) && (j <= jhi_g) &&
|
||||
(k >= klo_g) && (k <= khi_g) &&
|
||||
((i < ilo) || (i > ihi) ||
|
||||
(j < jlo) || (j > jhi) ||
|
||||
(k < klo) || (k > khi))
|
||||
) {
|
||||
|
||||
xtmp = 0.5 * alat * i;
|
||||
ytmp = 0.5 * alat * j;
|
||||
ztmp = 0.5 * alat * k;
|
||||
|
||||
system.h_x(n,0) = xtmp + system.delta/1000*(rand()%1000-500);;
|
||||
system.h_x(n,1) = ytmp + system.delta/1000*(rand()%1000-500);;
|
||||
system.h_x(n,2) = ztmp + system.delta/1000*(rand()%1000-500);;
|
||||
n++;
|
||||
}
|
||||
|
||||
sx++;
|
||||
|
||||
if(sx == subboxdim) {
|
||||
sx = 0;
|
||||
sy++;
|
||||
}
|
||||
|
||||
if(sy == subboxdim) {
|
||||
sy = 0;
|
||||
sz++;
|
||||
}
|
||||
|
||||
if(sz == subboxdim) {
|
||||
sz = 0;
|
||||
ox++;
|
||||
//printf("%i %i %i // %i %i %i\n",ox,oy,oz,i,j,k);
|
||||
}
|
||||
|
||||
if(ox * subboxdim > ihi_g) {
|
||||
ox = subboxdim * ilo_g;
|
||||
oy++;
|
||||
}
|
||||
|
||||
if(oy * subboxdim > jhi_g) {
|
||||
oy = subboxdim * jlo_g;
|
||||
oz++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(system.d_x,system.h_x);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1,92 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef SYSTEM_H_
|
||||
#define SYSTEM_H_
|
||||
|
||||
#include <types.h>
|
||||
|
||||
struct Box {
|
||||
double xprd, yprd, zprd;
|
||||
double xlo, xhi;
|
||||
double ylo, yhi;
|
||||
double zlo, zhi;
|
||||
};
|
||||
|
||||
struct System {
|
||||
Box box;
|
||||
|
||||
int natoms;
|
||||
int nlocal;
|
||||
int nghost;
|
||||
|
||||
t_x_array d_x;
|
||||
t_x_array_host h_x;
|
||||
|
||||
t_f_array f;
|
||||
|
||||
t_neighbors neighbors;
|
||||
t_int_1d numneigh;
|
||||
|
||||
double delta;
|
||||
|
||||
double neigh_cut,neigh_cutsq;
|
||||
|
||||
int mbins;
|
||||
int nbinx,nbiny,nbinz;
|
||||
int mbinx,mbiny,mbinz;
|
||||
int mbinxlo,mbinylo,mbinzlo;
|
||||
double binsizex,binsizey,binsizez;
|
||||
double bininvx,bininvy,bininvz;
|
||||
|
||||
t_int_1d bincount;
|
||||
t_int_2d bins;
|
||||
t_int_scalar d_resize;
|
||||
t_int_scalar_host h_resize;
|
||||
t_int_1d d_stencil;
|
||||
t_int_1d_host h_stencil;
|
||||
int nstencil;
|
||||
|
||||
double force_cut,force_cutsq;
|
||||
};
|
||||
#endif
|
||||
@ -1,118 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef TYPES_H_
|
||||
#define TYPES_H_
|
||||
|
||||
/* Determine default device type and necessary includes */
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
typedef Kokkos::DefaultExecutionSpace execution_space ;
|
||||
|
||||
#if ! defined( KOKKOS_HAVE_CUDA )
|
||||
struct double2 {
|
||||
double x, y;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double2(double xinit, double yinit) {
|
||||
x = xinit;
|
||||
y = yinit;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double2() {
|
||||
x = 0.0;
|
||||
y = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double2& operator += (const double2& src) {
|
||||
x+=src.x;
|
||||
y+=src.y;
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
volatile double2& operator += (const volatile double2& src) volatile {
|
||||
x+=src.x;
|
||||
y+=src.y;
|
||||
return *this;
|
||||
}
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
/* Define types used throughout the code */
|
||||
|
||||
//Position arrays
|
||||
typedef Kokkos::View<double*[3], Kokkos::LayoutRight, execution_space> t_x_array ;
|
||||
typedef t_x_array::HostMirror t_x_array_host ;
|
||||
typedef Kokkos::View<const double*[3], Kokkos::LayoutRight, execution_space> t_x_array_const ;
|
||||
typedef Kokkos::View<const double*[3], Kokkos::LayoutRight, execution_space, Kokkos::MemoryRandomAccess > t_x_array_randomread ;
|
||||
|
||||
//Force array
|
||||
typedef Kokkos::View<double*[3], execution_space> t_f_array ;
|
||||
|
||||
|
||||
//Neighborlist
|
||||
typedef Kokkos::View<int**, execution_space > t_neighbors ;
|
||||
typedef Kokkos::View<const int**, execution_space > t_neighbors_const ;
|
||||
typedef Kokkos::View<int*, execution_space, Kokkos::MemoryUnmanaged > t_neighbors_sub ;
|
||||
typedef Kokkos::View<const int*, execution_space, Kokkos::MemoryUnmanaged > t_neighbors_const_sub ;
|
||||
|
||||
//1d int array
|
||||
typedef Kokkos::View<int*, execution_space > t_int_1d ;
|
||||
typedef t_int_1d::HostMirror t_int_1d_host ;
|
||||
typedef Kokkos::View<const int*, execution_space > t_int_1d_const ;
|
||||
typedef Kokkos::View<int*, execution_space , Kokkos::MemoryUnmanaged> t_int_1d_um ;
|
||||
typedef Kokkos::View<const int* , execution_space , Kokkos::MemoryUnmanaged> t_int_1d_const_um ;
|
||||
|
||||
//2d int array
|
||||
typedef Kokkos::View<int**, Kokkos::LayoutRight, execution_space > t_int_2d ;
|
||||
typedef t_int_2d::HostMirror t_int_2d_host ;
|
||||
|
||||
//Scalar ints
|
||||
typedef Kokkos::View<int[1], Kokkos::LayoutLeft, execution_space> t_int_scalar ;
|
||||
typedef t_int_scalar::HostMirror t_int_scalar_host ;
|
||||
|
||||
#endif /* TYPES_H_ */
|
||||
@ -1,610 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_BOXMESHFIXTURE_HPP
|
||||
#define KOKKOS_BOXMESHFIXTURE_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <BoxMeshPartition.hpp>
|
||||
#include <FEMesh.hpp>
|
||||
#include <HexElement.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
struct FixtureElementHex8 {
|
||||
|
||||
static const unsigned element_node_count = 8 ;
|
||||
|
||||
HybridFEM::HexElement_TensorData< element_node_count > elem_data ;
|
||||
BoxBoundsLinear box_bounds ;
|
||||
|
||||
FixtureElementHex8() : elem_data(), box_bounds() {}
|
||||
|
||||
static void create_node_boxes_from_vertex_boxes(
|
||||
const BoxType & vertex_box_global ,
|
||||
const std::vector< BoxType > & vertex_box_parts ,
|
||||
BoxType & node_box_global ,
|
||||
std::vector< BoxType > & node_box_parts )
|
||||
{
|
||||
node_box_global = vertex_box_global ;
|
||||
node_box_parts = vertex_box_parts ;
|
||||
}
|
||||
|
||||
void elem_to_node( const unsigned node_local , unsigned coord[] ) const
|
||||
{
|
||||
coord[0] += elem_data.eval_map[ node_local ][0] ;
|
||||
coord[1] += elem_data.eval_map[ node_local ][1] ;
|
||||
coord[2] += elem_data.eval_map[ node_local ][2] ;
|
||||
}
|
||||
};
|
||||
|
||||
struct FixtureElementHex27 {
|
||||
static const unsigned element_node_count = 27 ;
|
||||
|
||||
HybridFEM::HexElement_TensorData< element_node_count > elem_data ;
|
||||
BoxBoundsQuadratic box_bounds ;
|
||||
|
||||
FixtureElementHex27() : elem_data(), box_bounds() {}
|
||||
|
||||
static void create_node_boxes_from_vertex_boxes(
|
||||
const BoxType & vertex_box_global ,
|
||||
const std::vector< BoxType > & vertex_box_parts ,
|
||||
BoxType & node_box_global ,
|
||||
std::vector< BoxType > & node_box_parts )
|
||||
{
|
||||
node_box_global = vertex_box_global ;
|
||||
node_box_parts = vertex_box_parts ;
|
||||
|
||||
node_box_global[0][1] = 2 * node_box_global[0][1] - 1 ;
|
||||
node_box_global[1][1] = 2 * node_box_global[1][1] - 1 ;
|
||||
node_box_global[2][1] = 2 * node_box_global[2][1] - 1 ;
|
||||
|
||||
for ( unsigned i = 0 ; i < vertex_box_parts.size() ; ++i ) {
|
||||
node_box_parts[i][0][0] = 2 * node_box_parts[i][0][0] ;
|
||||
node_box_parts[i][1][0] = 2 * node_box_parts[i][1][0] ;
|
||||
node_box_parts[i][2][0] = 2 * node_box_parts[i][2][0] ;
|
||||
|
||||
node_box_parts[i][0][1] =
|
||||
std::min( node_box_global[0][1] , 2 * node_box_parts[i][0][1] );
|
||||
node_box_parts[i][1][1] =
|
||||
std::min( node_box_global[1][1] , 2 * node_box_parts[i][1][1] );
|
||||
node_box_parts[i][2][1] =
|
||||
std::min( node_box_global[2][1] , 2 * node_box_parts[i][2][1] );
|
||||
}
|
||||
}
|
||||
|
||||
void elem_to_node( const unsigned node_local , unsigned coord[] ) const
|
||||
{
|
||||
coord[0] = 2 * coord[0] + elem_data.eval_map[ node_local ][0] ;
|
||||
coord[1] = 2 * coord[1] + elem_data.eval_map[ node_local ][1] ;
|
||||
coord[2] = 2 * coord[2] + elem_data.eval_map[ node_local ][2] ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class Device , class ElementSpec >
|
||||
struct BoxMeshFixture {
|
||||
|
||||
typedef Scalar coordinate_scalar_type ;
|
||||
typedef Device execution_space ;
|
||||
|
||||
static const unsigned element_node_count = ElementSpec::element_node_count ;
|
||||
|
||||
typedef HybridFEM::FEMesh< coordinate_scalar_type ,
|
||||
element_node_count ,
|
||||
execution_space > FEMeshType ;
|
||||
|
||||
typedef typename FEMeshType::node_coords_type node_coords_type ;
|
||||
typedef typename FEMeshType::elem_node_ids_type elem_node_ids_type ;
|
||||
typedef typename FEMeshType::node_elem_ids_type node_elem_ids_type ;
|
||||
|
||||
|
||||
static void verify(
|
||||
const typename FEMeshType::node_coords_type::HostMirror & node_coords ,
|
||||
const typename FEMeshType::elem_node_ids_type::HostMirror & elem_node_ids ,
|
||||
const typename FEMeshType::node_elem_ids_type::HostMirror & node_elem_ids )
|
||||
{
|
||||
typedef typename FEMeshType::size_type size_type ;
|
||||
//typedef typename node_coords_type::value_type coords_type ; // unused
|
||||
|
||||
const size_type node_count_total = node_coords.dimension_0();
|
||||
const size_type elem_count_total = elem_node_ids.dimension_0();
|
||||
|
||||
const ElementSpec element ;
|
||||
|
||||
for ( size_type node_index = 0 ;
|
||||
node_index < node_count_total ; ++node_index ) {
|
||||
|
||||
for ( size_type
|
||||
j = node_elem_ids.row_map[ node_index ] ;
|
||||
j < node_elem_ids.row_map[ node_index + 1 ] ; ++j ) {
|
||||
|
||||
const size_type elem_index = node_elem_ids.entries(j,0);
|
||||
const size_type node_local = node_elem_ids.entries(j,1);
|
||||
const size_type en_id = elem_node_ids(elem_index,node_local);
|
||||
|
||||
if ( node_index != en_id ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "BoxMeshFixture node_elem_ids error"
|
||||
<< " : node_index(" << node_index
|
||||
<< ") entry(" << j
|
||||
<< ") elem_index(" << elem_index
|
||||
<< ") node_local(" << node_local
|
||||
<< ") elem_node_id(" << en_id
|
||||
<< ")" ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for ( size_type elem_index = 0 ;
|
||||
elem_index < elem_count_total; ++elem_index ) {
|
||||
|
||||
coordinate_scalar_type elem_node_coord[ element_node_count ][3] ;
|
||||
|
||||
for ( size_type nn = 0 ; nn < element_node_count ; ++nn ) {
|
||||
const size_type node_index = elem_node_ids( elem_index , nn );
|
||||
|
||||
for ( size_type nc = 0 ; nc < 3 ; ++nc ) {
|
||||
elem_node_coord[nn][nc] = node_coords( node_index , nc );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for ( size_type nn = 0 ; nn < element_node_count ; ++nn ) {
|
||||
|
||||
const unsigned ix = element.elem_data.eval_map[nn][0] ;
|
||||
const unsigned iy = element.elem_data.eval_map[nn][1] ;
|
||||
const unsigned iz = element.elem_data.eval_map[nn][2] ;
|
||||
|
||||
if ( elem_node_coord[nn][0] != elem_node_coord[0][0] + ix ||
|
||||
elem_node_coord[nn][1] != elem_node_coord[0][1] + iy ||
|
||||
elem_node_coord[nn][2] != elem_node_coord[0][2] + iz ) {
|
||||
|
||||
std::ostringstream msg ;
|
||||
msg << "BoxMeshFixture elem_node_coord mapping failure { "
|
||||
<< elem_node_coord[nn][0] << " "
|
||||
<< elem_node_coord[nn][1] << " "
|
||||
<< elem_node_coord[nn][2] << " } != { "
|
||||
<< elem_node_coord[ 0][0] + ix << " "
|
||||
<< elem_node_coord[ 0][1] + iy << " "
|
||||
<< elem_node_coord[ 0][2] + iz
|
||||
<< " }" ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Initialize element-node connectivity:
|
||||
// Order elements that only depend on owned nodes first.
|
||||
// These elements could be computed while waiting for
|
||||
// received node data.
|
||||
|
||||
static void layout_elements_interior_exterior(
|
||||
const BoxType vertex_box_local_used ,
|
||||
const BoxType vertex_box_local_owned ,
|
||||
const BoxType node_box_local_used ,
|
||||
const std::vector<size_t> & node_used_id_map ,
|
||||
const ElementSpec element_fixture ,
|
||||
const size_t elem_count_interior ,
|
||||
const typename elem_node_ids_type::HostMirror elem_node_ids )
|
||||
{
|
||||
size_t elem_index_interior = 0 ;
|
||||
size_t elem_index_boundary = elem_count_interior ;
|
||||
|
||||
for ( size_t iz = vertex_box_local_used[2][0] ;
|
||||
iz < vertex_box_local_used[2][1] - 1 ; ++iz ) {
|
||||
for ( size_t iy = vertex_box_local_used[1][0] ;
|
||||
iy < vertex_box_local_used[1][1] - 1 ; ++iy ) {
|
||||
for ( size_t ix = vertex_box_local_used[0][0] ;
|
||||
ix < vertex_box_local_used[0][1] - 1 ; ++ix ) {
|
||||
|
||||
size_t elem_index ;
|
||||
|
||||
// If lower and upper vertices are owned then element is interior
|
||||
if ( contain( vertex_box_local_owned, ix, iy, iz ) &&
|
||||
contain( vertex_box_local_owned, ix+1, iy+1, iz+1 ) ) {
|
||||
elem_index = elem_index_interior++ ;
|
||||
}
|
||||
else {
|
||||
elem_index = elem_index_boundary++ ;
|
||||
}
|
||||
|
||||
for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) {
|
||||
unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) };
|
||||
|
||||
element_fixture.elem_to_node( nn , coord );
|
||||
|
||||
const size_t node_local_id =
|
||||
box_map_id( node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
coord[0] , coord[1] , coord[2] );
|
||||
|
||||
elem_node_ids( elem_index , nn ) = node_local_id ;
|
||||
}
|
||||
}}}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Nested partitioning of elements by number of thread 'gangs'
|
||||
|
||||
static void layout_elements_partitioned(
|
||||
const BoxType vertex_box_local_used ,
|
||||
const BoxType /*vertex_box_local_owned*/ ,
|
||||
const BoxType node_box_local_used ,
|
||||
const std::vector<size_t> & node_used_id_map ,
|
||||
const ElementSpec element_fixture ,
|
||||
const size_t thread_gang_count ,
|
||||
const typename elem_node_ids_type::HostMirror elem_node_ids )
|
||||
{
|
||||
std::vector< BoxType > element_box_gangs( thread_gang_count );
|
||||
|
||||
BoxType element_box_local_used = vertex_box_local_used ;
|
||||
|
||||
element_box_local_used[0][1] -= 1 ;
|
||||
element_box_local_used[1][1] -= 1 ;
|
||||
element_box_local_used[2][1] -= 1 ;
|
||||
|
||||
box_partition_rcb( element_box_local_used , element_box_gangs );
|
||||
|
||||
size_t elem_index = 0 ;
|
||||
|
||||
for ( size_t ig = 0 ; ig < thread_gang_count ; ++ig ) {
|
||||
|
||||
const BoxType box = element_box_gangs[ig] ;
|
||||
|
||||
for ( size_t iz = box[2][0] ; iz < box[2][1] ; ++iz ) {
|
||||
for ( size_t iy = box[1][0] ; iy < box[1][1] ; ++iy ) {
|
||||
for ( size_t ix = box[0][0] ; ix < box[0][1] ; ++ix , ++elem_index ) {
|
||||
|
||||
for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) {
|
||||
unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) };
|
||||
|
||||
element_fixture.elem_to_node( nn , coord );
|
||||
|
||||
const size_t node_local_id =
|
||||
box_map_id( node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
coord[0] , coord[1] , coord[2] );
|
||||
|
||||
elem_node_ids( elem_index , nn ) = node_local_id ;
|
||||
}
|
||||
}}}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static FEMeshType create( const size_t proc_count ,
|
||||
const size_t proc_local ,
|
||||
const size_t gang_count ,
|
||||
const size_t elems_x ,
|
||||
const size_t elems_y ,
|
||||
const size_t elems_z ,
|
||||
const double x_coord_curve = 1 ,
|
||||
const double y_coord_curve = 1 ,
|
||||
const double z_coord_curve = 1 )
|
||||
{
|
||||
const size_t vertices_x = elems_x + 1 ;
|
||||
const size_t vertices_y = elems_y + 1 ;
|
||||
const size_t vertices_z = elems_z + 1 ;
|
||||
|
||||
const BoxBoundsLinear vertex_box_bounds ;
|
||||
const ElementSpec element ;
|
||||
|
||||
// Partition based upon vertices:
|
||||
|
||||
BoxType vertex_box_global ;
|
||||
std::vector< BoxType > vertex_box_parts( proc_count );
|
||||
|
||||
vertex_box_global[0][0] = 0 ; vertex_box_global[0][1] = vertices_x ;
|
||||
vertex_box_global[1][0] = 0 ; vertex_box_global[1][1] = vertices_y ;
|
||||
vertex_box_global[2][0] = 0 ; vertex_box_global[2][1] = vertices_z ;
|
||||
|
||||
box_partition_rcb( vertex_box_global , vertex_box_parts );
|
||||
|
||||
const BoxType vertex_box_local_owned = vertex_box_parts[ proc_local ];
|
||||
|
||||
// Determine interior and used vertices:
|
||||
|
||||
BoxType vertex_box_local_interior ;
|
||||
BoxType vertex_box_local_used ;
|
||||
|
||||
vertex_box_bounds.apply( vertex_box_global ,
|
||||
vertex_box_local_owned ,
|
||||
vertex_box_local_interior ,
|
||||
vertex_box_local_used );
|
||||
|
||||
// Element counts:
|
||||
|
||||
const long local_elems_x =
|
||||
( vertex_box_local_used[0][1] - vertex_box_local_used[0][0] ) - 1 ;
|
||||
const long local_elems_y =
|
||||
( vertex_box_local_used[1][1] - vertex_box_local_used[1][0] ) - 1 ;
|
||||
const long local_elems_z =
|
||||
( vertex_box_local_used[2][1] - vertex_box_local_used[2][0] ) - 1 ;
|
||||
|
||||
const size_t elem_count_total = std::max( long(0) , local_elems_x ) *
|
||||
std::max( long(0) , local_elems_y ) *
|
||||
std::max( long(0) , local_elems_z );
|
||||
|
||||
const long interior_elems_x =
|
||||
( vertex_box_local_owned[0][1] - vertex_box_local_owned[0][0] ) - 1 ;
|
||||
const long interior_elems_y =
|
||||
( vertex_box_local_owned[1][1] - vertex_box_local_owned[1][0] ) - 1 ;
|
||||
const long interior_elems_z =
|
||||
( vertex_box_local_owned[2][1] - vertex_box_local_owned[2][0] ) - 1 ;
|
||||
|
||||
const size_t elem_count_interior = std::max( long(0) , interior_elems_x ) *
|
||||
std::max( long(0) , interior_elems_y ) *
|
||||
std::max( long(0) , interior_elems_z );
|
||||
|
||||
// Expand vertex boxes to node boxes:
|
||||
|
||||
BoxType node_box_global ;
|
||||
BoxType node_box_local_used ;
|
||||
std::vector< BoxType > node_box_parts ;
|
||||
|
||||
element.create_node_boxes_from_vertex_boxes(
|
||||
vertex_box_global , vertex_box_parts ,
|
||||
node_box_global , node_box_parts );
|
||||
|
||||
// Node communication maps:
|
||||
|
||||
size_t node_count_interior = 0 ;
|
||||
size_t node_count_owned = 0 ;
|
||||
size_t node_count_total = 0 ;
|
||||
std::vector<size_t> node_used_id_map ;
|
||||
std::vector<size_t> node_part_counts ;
|
||||
std::vector< std::vector<size_t> > node_send_map ;
|
||||
|
||||
box_partition_maps( node_box_global ,
|
||||
node_box_parts ,
|
||||
element.box_bounds ,
|
||||
proc_local ,
|
||||
node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
node_count_interior ,
|
||||
node_count_owned ,
|
||||
node_count_total ,
|
||||
node_part_counts ,
|
||||
node_send_map );
|
||||
|
||||
size_t node_count_send = 0 ;
|
||||
for ( size_t i = 0 ; i < node_send_map.size() ; ++i ) {
|
||||
node_count_send += node_send_map[i].size();
|
||||
}
|
||||
|
||||
size_t recv_msg_count = 0 ;
|
||||
size_t send_msg_count = 0 ;
|
||||
size_t send_count = 0 ;
|
||||
|
||||
for ( size_t i = 1 ; i < proc_count ; ++i ) {
|
||||
if ( node_part_counts[i] ) ++recv_msg_count ;
|
||||
if ( node_send_map[i].size() ) {
|
||||
++send_msg_count ;
|
||||
send_count += node_send_map[i].size();
|
||||
}
|
||||
}
|
||||
|
||||
// Finite element mesh:
|
||||
|
||||
FEMeshType mesh ;
|
||||
|
||||
if ( node_count_total ) {
|
||||
mesh.node_coords = node_coords_type( "node_coords", node_count_total );
|
||||
}
|
||||
|
||||
if ( elem_count_total ) {
|
||||
mesh.elem_node_ids =
|
||||
elem_node_ids_type( "elem_node_ids", elem_count_total );
|
||||
}
|
||||
|
||||
mesh.parallel_data_map.assign( node_count_interior ,
|
||||
node_count_owned ,
|
||||
node_count_total ,
|
||||
recv_msg_count ,
|
||||
send_msg_count ,
|
||||
send_count );
|
||||
|
||||
typename node_coords_type::HostMirror node_coords =
|
||||
Kokkos::create_mirror( mesh.node_coords );
|
||||
|
||||
typename elem_node_ids_type::HostMirror elem_node_ids =
|
||||
Kokkos::create_mirror( mesh.elem_node_ids );
|
||||
|
||||
//------------------------------------
|
||||
// set node coordinates to grid location for subsequent verification
|
||||
|
||||
for ( size_t iz = node_box_local_used[2][0] ;
|
||||
iz < node_box_local_used[2][1] ; ++iz ) {
|
||||
|
||||
for ( size_t iy = node_box_local_used[1][0] ;
|
||||
iy < node_box_local_used[1][1] ; ++iy ) {
|
||||
|
||||
for ( size_t ix = node_box_local_used[0][0] ;
|
||||
ix < node_box_local_used[0][1] ; ++ix ) {
|
||||
|
||||
const size_t node_local_id =
|
||||
box_map_id( node_box_local_used , node_used_id_map , ix , iy , iz );
|
||||
|
||||
node_coords( node_local_id , 0 ) = ix ;
|
||||
node_coords( node_local_id , 1 ) = iy ;
|
||||
node_coords( node_local_id , 2 ) = iz ;
|
||||
}}}
|
||||
|
||||
//------------------------------------
|
||||
// Initialize element-node connectivity:
|
||||
|
||||
if ( 1 < gang_count ) {
|
||||
layout_elements_partitioned( vertex_box_local_used ,
|
||||
vertex_box_local_owned ,
|
||||
node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
element ,
|
||||
gang_count ,
|
||||
elem_node_ids );
|
||||
}
|
||||
else {
|
||||
layout_elements_interior_exterior( vertex_box_local_used ,
|
||||
vertex_box_local_owned ,
|
||||
node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
element ,
|
||||
elem_count_interior ,
|
||||
elem_node_ids );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Populate node->element connectivity:
|
||||
|
||||
std::vector<size_t> node_elem_work( node_count_total , (size_t) 0 );
|
||||
|
||||
for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
|
||||
for ( size_t n = 0 ; n < element_node_count ; ++n ) {
|
||||
++node_elem_work[ elem_node_ids(i,n) ];
|
||||
}
|
||||
}
|
||||
|
||||
mesh.node_elem_ids =
|
||||
Kokkos::create_staticcrsgraph< node_elem_ids_type >( "node_elem_ids" , node_elem_work );
|
||||
|
||||
typename node_elem_ids_type::HostMirror
|
||||
node_elem_ids = Kokkos::create_mirror( mesh.node_elem_ids );
|
||||
|
||||
for ( size_t i = 0 ; i < node_count_total ; ++i ) {
|
||||
node_elem_work[i] = node_elem_ids.row_map[i];
|
||||
}
|
||||
|
||||
// Looping in element order insures the list of elements
|
||||
// is sorted by element index.
|
||||
|
||||
for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
|
||||
for ( size_t n = 0 ; n < element_node_count ; ++n ) {
|
||||
const unsigned nid = elem_node_ids(i, n);
|
||||
const unsigned j = node_elem_work[nid] ; ++node_elem_work[nid] ;
|
||||
|
||||
node_elem_ids.entries( j , 0 ) = i ;
|
||||
node_elem_ids.entries( j , 1 ) = n ;
|
||||
}
|
||||
}
|
||||
//------------------------------------
|
||||
// Verify setup with node coordinates matching grid indices.
|
||||
verify( node_coords , elem_node_ids , node_elem_ids );
|
||||
|
||||
//------------------------------------
|
||||
// Scale node coordinates to problem extent with
|
||||
// nonlinear mapping.
|
||||
{
|
||||
const double problem_extent[3] =
|
||||
{ static_cast<double>( vertex_box_global[0][1] - 1 ) ,
|
||||
static_cast<double>( vertex_box_global[1][1] - 1 ) ,
|
||||
static_cast<double>( vertex_box_global[2][1] - 1 ) };
|
||||
|
||||
const double grid_extent[3] =
|
||||
{ static_cast<double>( node_box_global[0][1] - 1 ) ,
|
||||
static_cast<double>( node_box_global[1][1] - 1 ) ,
|
||||
static_cast<double>( node_box_global[2][1] - 1 ) };
|
||||
|
||||
for ( size_t i = 0 ; i < node_count_total ; ++i ) {
|
||||
const double x_unit = node_coords(i,0) / grid_extent[0] ;
|
||||
const double y_unit = node_coords(i,1) / grid_extent[1] ;
|
||||
const double z_unit = node_coords(i,2) / grid_extent[2] ;
|
||||
|
||||
node_coords(i,0) = coordinate_scalar_type( problem_extent[0] * std::pow( x_unit , x_coord_curve ) );
|
||||
node_coords(i,1) = coordinate_scalar_type( problem_extent[1] * std::pow( y_unit , y_coord_curve ) );
|
||||
node_coords(i,2) = coordinate_scalar_type( problem_extent[2] * std::pow( z_unit , z_coord_curve ) );
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::deep_copy( mesh.node_coords , node_coords );
|
||||
Kokkos::deep_copy( mesh.elem_node_ids , elem_node_ids );
|
||||
Kokkos::deep_copy( mesh.node_elem_ids.entries , node_elem_ids.entries );
|
||||
|
||||
//------------------------------------
|
||||
// Communication lists:
|
||||
{
|
||||
recv_msg_count = 0 ;
|
||||
send_msg_count = 0 ;
|
||||
send_count = 0 ;
|
||||
|
||||
for ( size_t i = 1 ; i < proc_count ; ++i ) {
|
||||
|
||||
// Order sending starting with the local processor rank
|
||||
// to try to smooth out the amount of messages simultaneously
|
||||
// send to a particular processor.
|
||||
|
||||
const int proc = ( proc_local + i ) % proc_count ;
|
||||
if ( node_part_counts[i] ) {
|
||||
mesh.parallel_data_map.host_recv(recv_msg_count,0) = proc ;
|
||||
mesh.parallel_data_map.host_recv(recv_msg_count,1) = node_part_counts[i] ;
|
||||
++recv_msg_count ;
|
||||
}
|
||||
if ( node_send_map[i].size() ) {
|
||||
mesh.parallel_data_map.host_send(send_msg_count,0) = proc ;
|
||||
mesh.parallel_data_map.host_send(send_msg_count,1) = node_send_map[i].size() ;
|
||||
for ( size_t j = 0 ; j < node_send_map[i].size() ; ++j , ++send_count ) {
|
||||
mesh.parallel_data_map.host_send_item(send_count) = node_send_map[i][j] - node_count_interior ;
|
||||
}
|
||||
++send_msg_count ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mesh ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_BOXMESHFIXTURE_HPP */
|
||||
|
||||
|
||||
@ -1,381 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <BoxMeshPartition.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
void box_partition( size_t ip , size_t up ,
|
||||
const BoxType & box ,
|
||||
BoxType * const p_box )
|
||||
{
|
||||
const size_t np = up - ip ;
|
||||
|
||||
if ( 1 == np ) {
|
||||
p_box[ip] = box ;
|
||||
}
|
||||
else {
|
||||
// Choose axis with largest count:
|
||||
|
||||
const size_t n0 = box[0][1] - box[0][0] ;
|
||||
const size_t n1 = box[1][1] - box[1][0] ;
|
||||
const size_t n2 = box[2][1] - box[2][0] ;
|
||||
|
||||
const size_t axis = n2 > n1 ? ( n2 > n0 ? 2 : ( n1 > n0 ? 1 : 0 ) ) :
|
||||
( n1 > n0 ? 1 : 0 );
|
||||
|
||||
const size_t n = box[ axis ][1] - box[ axis ][0] ;
|
||||
|
||||
if ( 0 == np % 3 ) {
|
||||
const size_t np_part = np / 3 ; // exact
|
||||
|
||||
const size_t nbox_low = (size_t)(( (double) n ) * ( 1.0 / 3.0 ));
|
||||
const size_t nbox_mid = (size_t)(( (double) n ) * ( 2.0 / 3.0 ));
|
||||
|
||||
BoxType dbox_low = box ; // P = [ip,ip+np/3)
|
||||
BoxType dbox_mid = box ; // P = [ip+np/3,ip+2*np/3)
|
||||
BoxType dbox_upp = box ; // P = [ip+2*np/3,ip+np)
|
||||
|
||||
dbox_low[ axis ][1] = box[ axis ][0] + nbox_low ;
|
||||
dbox_mid[ axis ][1] = box[ axis ][0] + nbox_mid ;
|
||||
|
||||
dbox_mid[ axis ][0] = dbox_low[ axis ][1];
|
||||
dbox_upp[ axis ][0] = dbox_mid[ axis ][1];
|
||||
|
||||
box_partition( ip, ip + np_part, dbox_low , p_box );
|
||||
box_partition( ip+ np_part, ip + 2*np_part, dbox_mid , p_box );
|
||||
box_partition( ip+2*np_part, up, dbox_upp , p_box );
|
||||
}
|
||||
else {
|
||||
const size_t np_low = np / 2 ; /* Rounded down */
|
||||
const size_t nbox_low = (size_t)
|
||||
(((double)n) * ( ((double) np_low ) / ((double) np ) ));
|
||||
|
||||
BoxType dbox_low = box ;
|
||||
BoxType dbox_upp = box ;
|
||||
|
||||
dbox_low[ axis ][1] = dbox_low[ axis ][0] + nbox_low ;
|
||||
dbox_upp[ axis ][0] = dbox_low[ axis ][1];
|
||||
|
||||
box_partition( ip, ip + np_low, dbox_low , p_box );
|
||||
box_partition( ip + np_low, up, dbox_upp , p_box );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t box_map_offset( const BoxType & local_use ,
|
||||
const size_t global_i ,
|
||||
const size_t global_j ,
|
||||
const size_t global_k )
|
||||
|
||||
{
|
||||
const size_t max = std::numeric_limits<size_t>::max();
|
||||
|
||||
const size_t n[3] =
|
||||
{ local_use[0][1] - local_use[0][0] ,
|
||||
local_use[1][1] - local_use[1][0] ,
|
||||
local_use[2][1] - local_use[2][0] };
|
||||
|
||||
const size_t use[3] = {
|
||||
( global_i >= local_use[0][0] ? global_i - local_use[0][0] : max ) ,
|
||||
( global_j >= local_use[1][0] ? global_j - local_use[1][0] : max ) ,
|
||||
( global_k >= local_use[2][0] ? global_k - local_use[2][0] : max ) };
|
||||
|
||||
const size_t offset =
|
||||
( use[0] < n[0] && use[1] < n[1] && use[2] < n[2] ) ?
|
||||
( use[0] + n[0] * ( use[1] + n[1] * use[2] ) ) : max ;
|
||||
|
||||
if ( offset == max ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "box_map_offset ERROR: "
|
||||
<< " use " << local_use
|
||||
<< " ( " << global_i
|
||||
<< " , " << global_j
|
||||
<< " , " << global_k
|
||||
<< " )" ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
|
||||
return offset ;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void BoxBoundsLinear::apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const
|
||||
{
|
||||
const unsigned ghost = 1 ;
|
||||
|
||||
if ( 0 == count( box_part ) ) {
|
||||
box_interior = box_part ;
|
||||
box_use = box_part ;
|
||||
}
|
||||
else {
|
||||
for ( size_t i = 0 ; i < 3 ; ++i ) {
|
||||
|
||||
box_interior[i][0] =
|
||||
( box_part[i][0] == box_global[i][0] ) ? box_part[i][0] : (
|
||||
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][0] + ghost :
|
||||
box_part[i][1] );
|
||||
|
||||
box_interior[i][1] =
|
||||
( box_part[i][1] == box_global[i][1] ) ? box_part[i][1] : (
|
||||
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][1] - ghost :
|
||||
box_part[i][0] );
|
||||
|
||||
box_use[i][0] =
|
||||
( box_part[i][0] > ghost + box_global[i][0] ) ? box_part[i][0] - ghost :
|
||||
box_global[i][0] ;
|
||||
box_use[i][1] =
|
||||
( box_part[i][1] + ghost < box_global[i][1] ) ? box_part[i][1] + ghost :
|
||||
box_global[i][1] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BoxBoundsQuadratic::apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const
|
||||
{
|
||||
if ( 0 == count( box_part ) ) {
|
||||
box_interior = box_part ;
|
||||
box_use = box_part ;
|
||||
}
|
||||
else {
|
||||
for ( size_t i = 0 ; i < 3 ; ++i ) {
|
||||
const bool odd = ( box_part[i][0] - box_global[i][0] ) & 01 ;
|
||||
|
||||
const unsigned ghost = odd ? 1 : 2 ;
|
||||
|
||||
box_interior[i][0] =
|
||||
( box_part[i][0] == box_global[i][0] ) ? box_part[i][0] : (
|
||||
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][0] + ghost :
|
||||
box_part[i][1] );
|
||||
|
||||
box_interior[i][1] =
|
||||
( box_part[i][1] == box_global[i][1] ) ? box_part[i][1] : (
|
||||
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][1] - ghost :
|
||||
box_part[i][0] );
|
||||
|
||||
box_use[i][0] =
|
||||
( box_part[i][0] > ghost + box_global[i][0] ) ? box_part[i][0] - ghost :
|
||||
box_global[i][0] ;
|
||||
box_use[i][1] =
|
||||
( box_part[i][1] + ghost < box_global[i][1] ) ? box_part[i][1] + ghost :
|
||||
box_global[i][1] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void box_partition_rcb( const BoxType & root_box ,
|
||||
std::vector<BoxType> & part_boxes )
|
||||
{
|
||||
const BoxBoundsLinear use_boxes ;
|
||||
|
||||
const size_t part_count = part_boxes.size();
|
||||
|
||||
box_partition( 0 , part_count , root_box , & part_boxes[0] );
|
||||
|
||||
// Verify partitioning
|
||||
|
||||
size_t total_cell = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < part_count ; ++i ) {
|
||||
|
||||
total_cell += count( part_boxes[i] );
|
||||
|
||||
BoxType box_interior , box_use ;
|
||||
|
||||
use_boxes.apply( root_box , part_boxes[i] , box_interior , box_use );
|
||||
|
||||
if ( count( box_use ) < count( part_boxes[i] ) ||
|
||||
count( part_boxes[i] ) < count( box_interior ) ||
|
||||
part_boxes[i] != intersect( part_boxes[i] , box_use ) ||
|
||||
box_interior != intersect( part_boxes[i] , box_interior )) {
|
||||
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "box_partition_rcb ERROR : "
|
||||
<< "part_boxes[" << i << "] = "
|
||||
<< part_boxes[i]
|
||||
<< " use " << box_use
|
||||
<< " interior " << box_interior
|
||||
<< std::endl
|
||||
<< " part ^ use " << intersect( part_boxes[i] , box_use )
|
||||
<< " part ^ interior " << intersect( part_boxes[i] , box_interior );
|
||||
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
|
||||
for ( size_t j = i + 1 ; j < part_count ; ++j ) {
|
||||
const BoxType tmp = intersect( part_boxes[i] , part_boxes[j] );
|
||||
|
||||
if ( count( tmp ) ) {
|
||||
throw std::runtime_error( std::string("box partition intersection") );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( total_cell != count( root_box ) ) {
|
||||
throw std::runtime_error( std::string("box partition count") );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
size_t box_map_id( const BoxType & local_use ,
|
||||
const std::vector<size_t> & local_use_id_map ,
|
||||
const size_t global_i ,
|
||||
const size_t global_j ,
|
||||
const size_t global_k )
|
||||
|
||||
{
|
||||
const size_t offset =
|
||||
box_map_offset( local_use , global_i , global_j , global_k );
|
||||
return local_use_id_map[ offset ];
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void box_partition_maps( const BoxType & root_box ,
|
||||
const std::vector<BoxType> & part_boxes ,
|
||||
const BoxBounds & use_boxes ,
|
||||
const size_t my_part ,
|
||||
BoxType & my_use_box ,
|
||||
std::vector<size_t> & my_use_id_map ,
|
||||
size_t & my_count_interior ,
|
||||
size_t & my_count_owned ,
|
||||
size_t & my_count_uses ,
|
||||
std::vector<size_t> & my_part_counts ,
|
||||
std::vector<std::vector<size_t> > & my_send_map )
|
||||
{
|
||||
const size_t np = part_boxes.size();
|
||||
|
||||
if ( np <= my_part ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "box_partition_maps ERROR : "
|
||||
<< " np(" << np << ") <= my_part(" << my_part << ")" ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
|
||||
const BoxType my_owned_box = part_boxes[my_part];
|
||||
BoxType my_interior_box ;
|
||||
|
||||
|
||||
use_boxes.apply( root_box, my_owned_box, my_interior_box, my_use_box );
|
||||
|
||||
my_count_interior = count( my_interior_box );
|
||||
my_count_owned = count( my_owned_box );
|
||||
my_count_uses = count( my_use_box );
|
||||
|
||||
my_use_id_map.assign( my_count_uses , std::numeric_limits<size_t>::max() );
|
||||
|
||||
// Order ids as { owned-interior , owned-parallel , received_{(p+i)%np} }
|
||||
|
||||
size_t offset_interior = 0 ;
|
||||
size_t offset_parallel = my_count_interior ;
|
||||
|
||||
for ( size_t iz = my_owned_box[2][0] ; iz < my_owned_box[2][1] ; ++iz ) {
|
||||
for ( size_t iy = my_owned_box[1][0] ; iy < my_owned_box[1][1] ; ++iy ) {
|
||||
for ( size_t ix = my_owned_box[0][0] ; ix < my_owned_box[0][1] ; ++ix ) {
|
||||
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
|
||||
if ( contain( my_interior_box , ix , iy , iz ) ) {
|
||||
my_use_id_map[ offset ] = offset_interior++ ;
|
||||
}
|
||||
else {
|
||||
my_use_id_map[ offset ] = offset_parallel++ ;
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
my_part_counts.assign( np , (size_t) 0 );
|
||||
my_send_map.assign( np , std::vector<size_t>() );
|
||||
|
||||
my_part_counts[0] = my_count_owned ;
|
||||
|
||||
for ( size_t i = 1 ; i < np ; ++i ) {
|
||||
|
||||
const size_t ip = ( my_part + i ) % np ;
|
||||
|
||||
const BoxType p_owned_box = part_boxes[ip];
|
||||
BoxType p_use_box , p_interior_box ;
|
||||
use_boxes.apply( root_box, p_owned_box, p_interior_box, p_use_box );
|
||||
|
||||
const BoxType recv_box = intersect( my_use_box , p_owned_box );
|
||||
const BoxType send_box = intersect( my_owned_box , p_use_box );
|
||||
|
||||
if ( 0 != ( my_part_counts[i] = count( recv_box ) ) ) {
|
||||
for ( size_t iz = recv_box[2][0] ; iz < recv_box[2][1] ; ++iz ) {
|
||||
for ( size_t iy = recv_box[1][0] ; iy < recv_box[1][1] ; ++iy ) {
|
||||
for ( size_t ix = recv_box[0][0] ; ix < recv_box[0][1] ; ++ix ) {
|
||||
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
|
||||
my_use_id_map[ offset ] = offset_parallel++ ;
|
||||
}}}
|
||||
}
|
||||
|
||||
if ( 0 != count( send_box ) ) {
|
||||
for ( size_t iz = send_box[2][0] ; iz < send_box[2][1] ; ++iz ) {
|
||||
for ( size_t iy = send_box[1][0] ; iy < send_box[1][1] ; ++iy ) {
|
||||
for ( size_t ix = send_box[0][0] ; ix < send_box[0][1] ; ++ix ) {
|
||||
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
|
||||
|
||||
my_send_map[ i ].push_back( my_use_id_map[ offset ] );
|
||||
}}}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,210 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef BOXMESHPARTITION_HPP
|
||||
#define BOXMESHPARTITION_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
struct BoxType {
|
||||
size_t data[3][2] ;
|
||||
|
||||
typedef size_t range_type[2] ;
|
||||
|
||||
inline
|
||||
const range_type & operator[]( size_t i ) const { return data[i]; }
|
||||
|
||||
inline
|
||||
range_type & operator[]( size_t i ) { return data[i]; }
|
||||
|
||||
inline
|
||||
bool operator == ( const BoxType & rhs ) const
|
||||
{
|
||||
return data[0][0] == rhs.data[0][0] && data[0][1] == rhs.data[0][1] &&
|
||||
data[1][0] == rhs.data[1][0] && data[1][1] == rhs.data[2][1] &&
|
||||
data[2][0] == rhs.data[2][0] && data[2][1] == rhs.data[2][1] ;
|
||||
}
|
||||
|
||||
inline
|
||||
bool operator != ( const BoxType & rhs ) const
|
||||
{
|
||||
return data[0][0] != rhs.data[0][0] || data[0][1] != rhs.data[0][1] ||
|
||||
data[1][0] != rhs.data[1][0] || data[1][1] != rhs.data[1][1] ||
|
||||
data[2][0] != rhs.data[2][0] || data[2][1] != rhs.data[2][1] ;
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
size_t count( const BoxType & b )
|
||||
{
|
||||
size_t n = 1 ;
|
||||
for ( size_t i = 0 ; i < 3 ; ++i ) {
|
||||
n *= b[i][1] > b[i][0] ? b[i][1] - b[i][0] : 0 ;
|
||||
}
|
||||
return n ;
|
||||
}
|
||||
|
||||
inline
|
||||
bool contain( const BoxType & b , size_t i , size_t j , size_t k )
|
||||
{
|
||||
return b[0][0] <= i && i < b[0][1] &&
|
||||
b[1][0] <= j && j < b[1][1] &&
|
||||
b[2][0] <= k && k < b[2][1] ;
|
||||
}
|
||||
|
||||
inline
|
||||
BoxType intersect( const BoxType & x , const BoxType & y )
|
||||
{
|
||||
BoxType z ;
|
||||
for ( size_t i = 0 ; i < 3 ; ++i ) {
|
||||
z[i][0] = std::max( x[i][0] , y[i][0] );
|
||||
z[i][1] = std::min( x[i][1] , y[i][1] );
|
||||
}
|
||||
|
||||
return z ;
|
||||
}
|
||||
|
||||
inline
|
||||
std::ostream & operator << ( std::ostream & s , const BoxType & box )
|
||||
{
|
||||
s << "{ "
|
||||
<< box[0][0] << " " << box[0][1] << " , "
|
||||
<< box[1][0] << " " << box[1][1] << " , "
|
||||
<< box[2][0] << " " << box[2][1] << " }" ;
|
||||
return s ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
class BoxBounds {
|
||||
public:
|
||||
/** \brief Default bounds to one layer of ghosting */
|
||||
virtual
|
||||
void apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const = 0 ;
|
||||
|
||||
virtual ~BoxBounds() {}
|
||||
BoxBounds() {}
|
||||
};
|
||||
|
||||
class BoxBoundsLinear : public BoxBounds
|
||||
{
|
||||
public:
|
||||
/** \brief Default bounds to one layer of ghosting */
|
||||
virtual
|
||||
void apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const ;
|
||||
|
||||
virtual ~BoxBoundsLinear() {}
|
||||
BoxBoundsLinear() {}
|
||||
};
|
||||
|
||||
class BoxBoundsQuadratic : public BoxBounds {
|
||||
public:
|
||||
/** \brief Quadratic mesh: even ordinates have two layers,
|
||||
* odd ordinates have one layer.
|
||||
*/
|
||||
virtual
|
||||
void apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const ;
|
||||
|
||||
virtual ~BoxBoundsQuadratic() {}
|
||||
BoxBoundsQuadratic() {}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* Partition box into part_boxes.size() sub-boxes */
|
||||
|
||||
void box_partition_rcb( const BoxType & root_box ,
|
||||
std::vector<BoxType> & part_boxes );
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* Determine local id layout and communication maps for partitioned boxes.
|
||||
*
|
||||
* Local ids are layed out as follows:
|
||||
* { [ owned-interior ids not sent ] ,
|
||||
* [ owned-boundary ids to be sent to other processes ] ,
|
||||
* [ received ids from processor ( my_part + 1 ) % part_count ]
|
||||
* [ received ids from processor ( my_part + 2 ) % part_count ]
|
||||
* [ received ids from processor ( my_part + 3 ) % part_count ]
|
||||
* ... };
|
||||
*
|
||||
* This layout allows
|
||||
* (1) received data to be copied into a contiguous block of memory
|
||||
* (2) send data to be extracted from a contiguous block of memory.
|
||||
*/
|
||||
void box_partition_maps(
|
||||
const BoxType & root_box , // [in] Global box
|
||||
const std::vector<BoxType> & part_boxes , // [in] Partitioned boxes
|
||||
const BoxBounds & use_boxes , // [in] Ghost boundaries
|
||||
const size_t my_part , // [in] My local part
|
||||
BoxType & my_use_box , // [out] My used box with ghost
|
||||
std::vector<size_t> & my_use_id_map , // [out] Local ordering map
|
||||
size_t & my_count_interior , // [out] How many interior
|
||||
size_t & my_count_owned , // [out] How many owned
|
||||
size_t & my_count_uses , // [out] How may used
|
||||
std::vector<size_t> & my_part_counts , // [out] Partitioning of my_use_id_map
|
||||
std::vector<std::vector<size_t> > & my_send_map ); // [out] Send id map
|
||||
|
||||
/* Mapping of cartesian coordinate to local id */
|
||||
size_t box_map_id( const BoxType & my_use_box ,
|
||||
const std::vector<size_t> & my_use_id_map ,
|
||||
const size_t global_i ,
|
||||
const size_t global_j ,
|
||||
const size_t global_k );
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef BOXMESHPARTITION_HPP */
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE(GLOB SOURCES *.cpp)
|
||||
|
||||
SET(LIBRARIES kokkoscore)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
multi_fem
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
)
|
||||
|
||||
@ -1,452 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef EXPLICIT_DRIVER_HPP
|
||||
#define EXPLICIT_DRIVER_HPP
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <ExplicitFunctors.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Explicit {
|
||||
|
||||
struct PerformanceData {
|
||||
double mesh_time ;
|
||||
double init_time ;
|
||||
double internal_force_time ;
|
||||
double central_diff ;
|
||||
double comm_time ;
|
||||
size_t number_of_steps ;
|
||||
|
||||
PerformanceData()
|
||||
: mesh_time(0)
|
||||
, init_time(0)
|
||||
, internal_force_time(0)
|
||||
, central_diff(0)
|
||||
, comm_time(0)
|
||||
, number_of_steps(0)
|
||||
{}
|
||||
|
||||
void best( const PerformanceData & rhs )
|
||||
{
|
||||
if ( rhs.mesh_time < mesh_time ) mesh_time = rhs.mesh_time ;
|
||||
if ( rhs.init_time < init_time ) init_time = rhs.init_time ;
|
||||
if ( rhs.internal_force_time < internal_force_time ) internal_force_time = rhs.internal_force_time ;
|
||||
if ( rhs.central_diff < central_diff ) central_diff = rhs.central_diff ;
|
||||
if ( rhs.comm_time < comm_time ) comm_time = rhs.comm_time ;
|
||||
}
|
||||
};
|
||||
|
||||
template< typename Scalar , class FixtureType >
|
||||
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
|
||||
const int global_max_x ,
|
||||
const int global_max_y ,
|
||||
const int global_max_z ,
|
||||
const int steps ,
|
||||
const int print_sample )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef FixtureType fixture_type ;
|
||||
typedef typename fixture_type::execution_space execution_space ;
|
||||
//typedef typename fixture_type::FEMeshType mesh_type ; // unused
|
||||
|
||||
enum { ElementNodeCount = fixture_type::element_node_count };
|
||||
|
||||
const int NumStates = 2;
|
||||
|
||||
const int total_num_steps = steps ;
|
||||
|
||||
const Scalar user_dt = 5.0e-6;
|
||||
//const Scalar end_time = 0.0050;
|
||||
|
||||
// element block parameters
|
||||
const Scalar lin_bulk_visc = 0.0;
|
||||
const Scalar quad_bulk_visc = 0.0;
|
||||
|
||||
// const Scalar lin_bulk_visc = 0.06;
|
||||
// const Scalar quad_bulk_visc = 1.2;
|
||||
// const Scalar hg_stiffness = 0.0;
|
||||
// const Scalar hg_viscosity = 0.0;
|
||||
// const Scalar hg_stiffness = 0.03;
|
||||
// const Scalar hg_viscosity = 0.001;
|
||||
|
||||
// material properties
|
||||
const Scalar youngs_modulus=1.0e6;
|
||||
const Scalar poissons_ratio=0.0;
|
||||
const Scalar density = 8.0e-4;
|
||||
|
||||
const comm::Machine machine = mesh.parallel_data_map.machine ;
|
||||
|
||||
PerformanceData perf_data ;
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
//------------------------------------
|
||||
// Generate fields
|
||||
|
||||
typedef Fields< scalar_type , execution_space > fields_type ;
|
||||
|
||||
fields_type mesh_fields( mesh ,
|
||||
lin_bulk_visc ,
|
||||
quad_bulk_visc ,
|
||||
youngs_modulus ,
|
||||
poissons_ratio ,
|
||||
density );
|
||||
|
||||
typename fields_type::node_coords_type::HostMirror
|
||||
model_coords_h = Kokkos::create_mirror( mesh_fields.model_coords );
|
||||
|
||||
typename fields_type::geom_state_array_type::HostMirror
|
||||
displacement_h = Kokkos::create_mirror( mesh_fields.displacement );
|
||||
|
||||
typename fields_type::geom_state_array_type::HostMirror
|
||||
velocity_h = Kokkos::create_mirror( mesh_fields.velocity );
|
||||
|
||||
Kokkos::deep_copy( model_coords_h , mesh_fields.model_coords );
|
||||
|
||||
//------------------------------------
|
||||
// Initialization
|
||||
|
||||
initialize_element<Scalar,execution_space>::apply( mesh_fields );
|
||||
initialize_node< Scalar,execution_space>::apply( mesh_fields );
|
||||
|
||||
const Scalar x_bc = global_max_x ;
|
||||
|
||||
// Initial condition on velocity to initiate a pulse along the X axis
|
||||
{
|
||||
const unsigned X = 0;
|
||||
for (int inode = 0; inode< mesh_fields.num_nodes; ++inode) {
|
||||
if ( model_coords_h(inode,X) == 0) {
|
||||
velocity_h(inode,X,0) = 1.0e3;
|
||||
velocity_h(inode,X,1) = 1.0e3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::deep_copy( mesh_fields.velocity , velocity_h );
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// We will call a sequence of functions. These functions have been
|
||||
// grouped into several functors to balance the number of global memory
|
||||
// accesses versus requiring too many registers or too much L1 cache.
|
||||
// Global memory accees have read/write cost and memory subsystem contention cost.
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
perf_data.init_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
// Parameters required for the internal force computations.
|
||||
|
||||
int current_state = 0;
|
||||
int previous_state = 0;
|
||||
int next_state = 0;
|
||||
|
||||
perf_data.number_of_steps = total_num_steps ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
typedef typename
|
||||
fields_type::geom_state_array_type::value_type comm_value_type ;
|
||||
|
||||
const unsigned comm_value_count = 6 ;
|
||||
|
||||
Kokkos::AsyncExchange< comm_value_type , execution_space ,
|
||||
Kokkos::ParallelDataMap >
|
||||
comm_exchange( mesh.parallel_data_map , comm_value_count );
|
||||
|
||||
#endif
|
||||
|
||||
for (int step = 0; step < total_num_steps; ++step) {
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
{
|
||||
// Communicate "send" nodes' displacement and velocity next_state
|
||||
// to the ghosted nodes.
|
||||
// buffer packages: { { dx , dy , dz , vx , vy , vz }_node }
|
||||
|
||||
pack_state< Scalar , execution_space >
|
||||
::apply( comm_exchange.buffer() ,
|
||||
mesh.parallel_data_map.count_interior ,
|
||||
mesh.parallel_data_map.count_send ,
|
||||
mesh_fields , next_state );
|
||||
|
||||
comm_exchange.setup();
|
||||
|
||||
comm_exchange.send_receive();
|
||||
|
||||
unpack_state< Scalar , execution_space >
|
||||
::apply( mesh_fields , next_state ,
|
||||
comm_exchange.buffer() ,
|
||||
mesh.parallel_data_map.count_owned ,
|
||||
mesh.parallel_data_map.count_receive );
|
||||
|
||||
execution_space::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
perf_data.comm_time += comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// rotate the states
|
||||
|
||||
previous_state = current_state;
|
||||
current_state = next_state;
|
||||
++next_state;
|
||||
next_state %= NumStates;
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
// First kernel 'grad_hgop' combines two functions:
|
||||
// gradient, velocity gradient
|
||||
grad< Scalar , execution_space >::apply( mesh_fields ,
|
||||
current_state ,
|
||||
previous_state );
|
||||
|
||||
// Combine tensor decomposition and rotation functions.
|
||||
decomp_rotate< Scalar , execution_space >::apply( mesh_fields ,
|
||||
current_state ,
|
||||
previous_state );
|
||||
|
||||
internal_force< Scalar , execution_space >::apply( mesh_fields ,
|
||||
user_dt ,
|
||||
current_state );
|
||||
|
||||
execution_space::fence();
|
||||
|
||||
perf_data.internal_force_time +=
|
||||
comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
// Assembly of elements' contributions to nodal force into
|
||||
// a nodal force vector. Update the accelerations, velocities,
|
||||
// displacements.
|
||||
// The same pattern can be used for matrix-free residual computations.
|
||||
nodal_step< Scalar , execution_space >::apply( mesh_fields ,
|
||||
x_bc ,
|
||||
current_state,
|
||||
next_state );
|
||||
execution_space::fence();
|
||||
|
||||
perf_data.central_diff +=
|
||||
comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
if ( print_sample && 0 == step % 100 ) {
|
||||
Kokkos::deep_copy( displacement_h , mesh_fields.displacement );
|
||||
Kokkos::deep_copy( velocity_h , mesh_fields.velocity );
|
||||
|
||||
if ( 1 == print_sample ) {
|
||||
|
||||
std::cout << "step " << step
|
||||
<< " : displacement(*,0,0) =" ;
|
||||
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
|
||||
if ( model_coords_h(i,1) == 0 && model_coords_h(i,2) == 0 ) {
|
||||
std::cout << " " << displacement_h(i,0,next_state);
|
||||
}
|
||||
}
|
||||
std::cout << std::endl ;
|
||||
|
||||
const float tol = 1.0e-6 ;
|
||||
const int yb = global_max_y ;
|
||||
const int zb = global_max_z ;
|
||||
std::cout << "step " << step
|
||||
<< " : displacement(*," << yb << "," << zb << ") =" ;
|
||||
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
|
||||
if ( fabs( model_coords_h(i,1) - yb ) < tol &&
|
||||
fabs( model_coords_h(i,2) - zb ) < tol ) {
|
||||
std::cout << " " << displacement_h(i,0,next_state);
|
||||
}
|
||||
}
|
||||
std::cout << std::endl ;
|
||||
}
|
||||
else if ( 2 == print_sample ) {
|
||||
|
||||
const float tol = 1.0e-6 ;
|
||||
const int xb = global_max_x / 2 ;
|
||||
const int yb = global_max_y / 2 ;
|
||||
const int zb = global_max_z / 2 ;
|
||||
|
||||
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
|
||||
if ( fabs( model_coords_h(i,0) - xb ) < tol &&
|
||||
fabs( model_coords_h(i,1) - yb ) < tol &&
|
||||
fabs( model_coords_h(i,2) - zb ) < tol ) {
|
||||
std::cout << "step " << step
|
||||
<< " : displacement("
|
||||
<< xb << "," << yb << "," << zb << ") = {"
|
||||
<< std::setprecision(6)
|
||||
<< " " << displacement_h(i,0,next_state)
|
||||
<< std::setprecision(2)
|
||||
<< " " << displacement_h(i,1,next_state)
|
||||
<< std::setprecision(2)
|
||||
<< " " << displacement_h(i,2,next_state)
|
||||
<< " }" << std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return perf_data ;
|
||||
}
|
||||
|
||||
|
||||
template <typename Scalar, typename Device>
|
||||
static void driver( const char * const label ,
|
||||
comm::Machine machine ,
|
||||
const int gang_count ,
|
||||
const int elem_count_beg ,
|
||||
const int elem_count_end ,
|
||||
const int runs )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef Device execution_space ;
|
||||
typedef double coordinate_scalar_type ;
|
||||
typedef FixtureElementHex8 fixture_element_type ;
|
||||
|
||||
typedef BoxMeshFixture< coordinate_scalar_type ,
|
||||
execution_space ,
|
||||
fixture_element_type > fixture_type ;
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
|
||||
const size_t proc_count = comm::size( machine );
|
||||
const size_t proc_rank = comm::rank( machine );
|
||||
|
||||
const int space = 15 ;
|
||||
const int steps = 1000 ;
|
||||
const int print_sample = 0 ;
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
|
||||
std::cout << std::endl ;
|
||||
std::cout << "\"MiniExplicitDynamics with Kokkos " << label
|
||||
<< " time_steps(" << steps << ")"
|
||||
<< "\"" << std::endl;
|
||||
std::cout << std::left << std::setw(space) << "\"Element\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"Node\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"Initialize\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"ElemForce\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"NodeUpdate\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"NodeComm\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"Time/Elem\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"Time/Node\"";
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << std::left << std::setw(space) << "\"count\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"count\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\"";
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
|
||||
{
|
||||
const int iz = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
|
||||
const int iy = iz + 1 ;
|
||||
const int ix = 2 * iy ;
|
||||
const int nelem = ix * iy * iz ;
|
||||
const int nnode = ( ix + 1 ) * ( iy + 1 ) * ( iz + 1 );
|
||||
|
||||
mesh_type mesh =
|
||||
fixture_type::create( proc_count , proc_rank , gang_count ,
|
||||
ix , iy , iz );
|
||||
|
||||
mesh.parallel_data_map.machine = machine ;
|
||||
|
||||
PerformanceData perf , best ;
|
||||
|
||||
for(int j = 0; j < runs; j++){
|
||||
|
||||
perf = run<scalar_type,fixture_type>(mesh,ix,iy,iz,steps,print_sample);
|
||||
|
||||
if( j == 0 ) {
|
||||
best = perf ;
|
||||
}
|
||||
else {
|
||||
best.best( perf );
|
||||
}
|
||||
}
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
double time_per_element =
|
||||
( best.internal_force_time ) / ( nelem * perf.number_of_steps );
|
||||
double time_per_node =
|
||||
( best.comm_time + best.central_diff ) / ( nnode * perf.number_of_steps );
|
||||
|
||||
std::cout << std::setw(space-3) << nelem << " , "
|
||||
<< std::setw(space-3) << nnode << " , "
|
||||
<< std::setw(space-3) << best.number_of_steps << " , "
|
||||
<< std::setw(space-3) << best.init_time * 1000000 << " , "
|
||||
<< std::setw(space-3)
|
||||
<< ( best.internal_force_time * 1000000 ) / best.number_of_steps << " , "
|
||||
<< std::setw(space-3)
|
||||
<< ( best.central_diff * 1000000 ) / best.number_of_steps << " , "
|
||||
<< std::setw(space-3)
|
||||
<< ( best.comm_time * 1000000 ) / best.number_of_steps << " , "
|
||||
<< std::setw(space-3) << time_per_element * 1000000 << " , "
|
||||
<< std::setw(space-3) << time_per_node * 1000000
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace Explicit
|
||||
|
||||
#endif /* #ifndef EXPLICIT_DRIVER_HPP */
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,86 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_FEMESH_HPP
|
||||
#define KOKKOS_FEMESH_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_StaticCrsGraph.hpp>
|
||||
|
||||
#include <ParallelComm.hpp>
|
||||
#include <ParallelDataMap.hpp>
|
||||
|
||||
namespace HybridFEM {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Finite element mesh fixture for hybrid parallel performance tests.
|
||||
*/
|
||||
template< typename CoordScalarType , unsigned ElemNodeCount , class Device >
|
||||
struct FEMesh {
|
||||
|
||||
typedef typename Device::size_type size_type ;
|
||||
|
||||
static const size_type element_node_count = ElemNodeCount ;
|
||||
|
||||
typedef Kokkos::View< CoordScalarType*[3] , Device > node_coords_type ;
|
||||
typedef Kokkos::View< size_type*[ElemNodeCount], Device > elem_node_ids_type ;
|
||||
typedef Kokkos::StaticCrsGraph< size_type[2] , Device > node_elem_ids_type ;
|
||||
|
||||
node_coords_type node_coords ;
|
||||
elem_node_ids_type elem_node_ids ;
|
||||
node_elem_ids_type node_elem_ids ;
|
||||
Kokkos::ParallelDataMap parallel_data_map ;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
#endif /* #ifndef KOKKOS_FEMESH_HPP */
|
||||
|
||||
@ -1,268 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef ELEMENTHEX_HPP
|
||||
#define ELEMENTHEX_HPP
|
||||
|
||||
namespace HybridFEM {
|
||||
|
||||
template< unsigned NodeCount >
|
||||
class HexElement_TensorData ;
|
||||
|
||||
template< unsigned NodeCount , class Device >
|
||||
class HexElement_TensorEval ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Evaluate Hex element on interval [-1,1]^3 */
|
||||
template<>
|
||||
class HexElement_TensorData< 8 > {
|
||||
public:
|
||||
|
||||
static const unsigned element_node_count = 8 ;
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned integration_count_1d = 2 ;
|
||||
static const unsigned function_count_1d = 2 ;
|
||||
|
||||
float values_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float derivs_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float weights_1d[ integration_count_1d ];
|
||||
|
||||
unsigned char eval_map[ element_node_count ][4] ;
|
||||
|
||||
static float eval_value_1d( const unsigned jf , const float x )
|
||||
{
|
||||
return 0 == jf ? 0.5 * ( 1.0 - x ) : (
|
||||
1 == jf ? 0.5 * ( 1.0 + x ) : 0 );
|
||||
}
|
||||
|
||||
static float eval_deriv_1d( const unsigned jf , const float )
|
||||
{
|
||||
return 0 == jf ? -0.5 : (
|
||||
1 == jf ? 0.5 : 0 );
|
||||
}
|
||||
|
||||
HexElement_TensorData()
|
||||
{
|
||||
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
|
||||
{ { 0 , 0 , 0 },
|
||||
{ 1 , 0 , 0 },
|
||||
{ 1 , 1 , 0 },
|
||||
{ 0 , 1 , 0 },
|
||||
{ 0 , 0 , 1 },
|
||||
{ 1 , 0 , 1 },
|
||||
{ 1 , 1 , 1 },
|
||||
{ 0 , 1 , 1 } };
|
||||
|
||||
weights_1d[0] = 1 ;
|
||||
weights_1d[1] = 1 ;
|
||||
|
||||
const float points_1d[ integration_count_1d ] =
|
||||
{ -0.577350269 , 0.577350269 };
|
||||
|
||||
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
|
||||
eval_map[i][0] = tmp_map[i][0];
|
||||
eval_map[i][1] = tmp_map[i][1];
|
||||
eval_map[i][2] = tmp_map[i][2];
|
||||
}
|
||||
|
||||
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
|
||||
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
|
||||
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
|
||||
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
class HexElement_TensorData< 27 > {
|
||||
public:
|
||||
|
||||
static const unsigned element_node_count = 27 ;
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned integration_count_1d = 3 ;
|
||||
static const unsigned function_count_1d = 3 ;
|
||||
|
||||
float values_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float derivs_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float weights_1d[ integration_count_1d ];
|
||||
|
||||
unsigned char eval_map[ element_node_count ][4] ;
|
||||
|
||||
// sizeof(EvaluateElementHex) = 111 bytes =
|
||||
// sizeof(float) * 9 +
|
||||
// sizeof(float) * 9 +
|
||||
// sizeof(float) * 3 +
|
||||
// sizeof(char) * 27
|
||||
|
||||
static float eval_value_1d( const unsigned jf , const float p )
|
||||
{
|
||||
return 0 == jf ? 0.5 * p * ( p - 1 ) : (
|
||||
1 == jf ? 1.0 - p * p : (
|
||||
2 == jf ? 0.5 * p * ( p + 1 ) : 0 ));
|
||||
}
|
||||
|
||||
static float eval_deriv_1d( const unsigned jf , const float p )
|
||||
{
|
||||
return 0 == jf ? p - 0.5 : (
|
||||
1 == jf ? -2.0 * p : (
|
||||
2 == jf ? p + 0.5 : 0 ));
|
||||
}
|
||||
|
||||
HexElement_TensorData()
|
||||
{
|
||||
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
|
||||
{ { 0 , 0 , 0 },
|
||||
{ 2 , 0 , 0 },
|
||||
{ 2 , 2 , 0 },
|
||||
{ 0 , 2 , 0 },
|
||||
{ 0 , 0 , 2 },
|
||||
{ 2 , 0 , 2 },
|
||||
{ 2 , 2 , 2 },
|
||||
{ 0 , 2 , 2 },
|
||||
{ 1 , 0 , 0 },
|
||||
{ 2 , 1 , 0 },
|
||||
{ 1 , 2 , 0 },
|
||||
{ 0 , 1 , 0 },
|
||||
{ 0 , 0 , 1 },
|
||||
{ 2 , 0 , 1 },
|
||||
{ 2 , 2 , 1 },
|
||||
{ 0 , 2 , 1 },
|
||||
{ 1 , 0 , 2 },
|
||||
{ 2 , 1 , 2 },
|
||||
{ 1 , 2 , 2 },
|
||||
{ 0 , 1 , 2 },
|
||||
{ 1 , 1 , 1 },
|
||||
{ 1 , 1 , 0 },
|
||||
{ 1 , 1 , 2 },
|
||||
{ 0 , 1 , 1 },
|
||||
{ 2 , 1 , 1 },
|
||||
{ 1 , 0 , 1 },
|
||||
{ 1 , 2 , 1 } };
|
||||
|
||||
// Interval [-1,1]
|
||||
|
||||
weights_1d[0] = 0.555555556 ;
|
||||
weights_1d[1] = 0.888888889 ;
|
||||
weights_1d[2] = 0.555555556 ;
|
||||
|
||||
const float points_1d[3] = { -0.774596669 ,
|
||||
0.000000000 ,
|
||||
0.774596669 };
|
||||
|
||||
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
|
||||
eval_map[i][0] = tmp_map[i][0];
|
||||
eval_map[i][1] = tmp_map[i][1];
|
||||
eval_map[i][2] = tmp_map[i][2];
|
||||
}
|
||||
|
||||
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
|
||||
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
|
||||
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
|
||||
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< unsigned NodeCount >
|
||||
class HexElement_Data {
|
||||
public:
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned element_node_count = NodeCount ;
|
||||
static const unsigned integration_count = NodeCount ;
|
||||
static const unsigned function_count = NodeCount ;
|
||||
|
||||
float weights[ integration_count ] ;
|
||||
float values[ integration_count ][ function_count ];
|
||||
float gradients[ integration_count ][ spatial_dimension ][ function_count ];
|
||||
|
||||
HexElement_Data()
|
||||
{
|
||||
HexElement_TensorData< NodeCount > tensor_data ;
|
||||
|
||||
for ( unsigned ip = 0 ; ip < integration_count ; ++ip ) {
|
||||
|
||||
const unsigned ipx = tensor_data.eval_map[ip][0] ;
|
||||
const unsigned ipy = tensor_data.eval_map[ip][1] ;
|
||||
const unsigned ipz = tensor_data.eval_map[ip][2] ;
|
||||
|
||||
weights[ip] = tensor_data.weights_1d[ ipx ] *
|
||||
tensor_data.weights_1d[ ipy ] *
|
||||
tensor_data.weights_1d[ ipz ] ;
|
||||
|
||||
for ( unsigned jf = 0 ; jf < function_count ; ++jf ) {
|
||||
|
||||
const unsigned jfx = tensor_data.eval_map[jf][0] ;
|
||||
const unsigned jfy = tensor_data.eval_map[jf][1] ;
|
||||
const unsigned jfz = tensor_data.eval_map[jf][2] ;
|
||||
|
||||
values[ip][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][0][jf] = tensor_data.derivs_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][1][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.derivs_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][2][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.derivs_1d[ ipz ][ jfz ] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
#endif /* #ifndef ELEMENTHEX_HPP */
|
||||
|
||||
|
||||
@ -1,443 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP
|
||||
#define KOKKOS_HEXEXPLICITFUNCTIONS_HPP
|
||||
|
||||
#include <math.h>
|
||||
|
||||
namespace Explicit {
|
||||
|
||||
struct Hex8Functions
|
||||
{
|
||||
static const unsigned SpatialDim = 3 ;
|
||||
static const unsigned ElemNodeCount = 8 ;
|
||||
|
||||
// Indices for full 3x3 tensor:
|
||||
|
||||
static const unsigned K_F_XX = 0 ;
|
||||
static const unsigned K_F_YY = 1 ;
|
||||
static const unsigned K_F_ZZ = 2 ;
|
||||
static const unsigned K_F_XY = 3 ;
|
||||
static const unsigned K_F_YZ = 4 ;
|
||||
static const unsigned K_F_ZX = 5 ;
|
||||
static const unsigned K_F_YX = 6 ;
|
||||
static const unsigned K_F_ZY = 7 ;
|
||||
static const unsigned K_F_XZ = 8 ;
|
||||
static const unsigned K_F_SIZE = 9 ;
|
||||
|
||||
// Indexes into a 3 by 3 symmetric tensor stored as a length 6 vector
|
||||
|
||||
static const unsigned K_S_XX = 0 ;
|
||||
static const unsigned K_S_YY = 1 ;
|
||||
static const unsigned K_S_ZZ = 2 ;
|
||||
static const unsigned K_S_XY = 3 ;
|
||||
static const unsigned K_S_YZ = 4 ;
|
||||
static const unsigned K_S_ZX = 5 ;
|
||||
static const unsigned K_S_YX = 3 ;
|
||||
static const unsigned K_S_ZY = 4 ;
|
||||
static const unsigned K_S_XZ = 5 ;
|
||||
static const unsigned K_S_SIZE = 6 ;
|
||||
|
||||
// Indexes into a 3 by 3 skew symmetric tensor stored as a length 3 vector
|
||||
|
||||
static const unsigned K_V_XY = 0 ;
|
||||
static const unsigned K_V_YZ = 1 ;
|
||||
static const unsigned K_V_ZX = 2 ;
|
||||
static const unsigned K_V_SIZE = 3 ;
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarA , typename ScalarB >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
double dot8( const ScalarA * const a , const ScalarB * const b )
|
||||
{ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] +
|
||||
a[4] * b[4] + a[5] * b[5] + a[6] * b[6] + a[7] * b[7] ; }
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void grad( const ScalarPrecise x[] ,
|
||||
const ScalarPrecise z[] ,
|
||||
ScalarCompact grad_y[] )
|
||||
{
|
||||
const ScalarCompact R42=(x[3] - x[1]);
|
||||
const ScalarCompact R52=(x[4] - x[1]);
|
||||
const ScalarCompact R54=(x[4] - x[3]);
|
||||
|
||||
const ScalarCompact R63=(x[5] - x[2]);
|
||||
const ScalarCompact R83=(x[7] - x[2]);
|
||||
const ScalarCompact R86=(x[7] - x[5]);
|
||||
|
||||
const ScalarCompact R31=(x[2] - x[0]);
|
||||
const ScalarCompact R61=(x[5] - x[0]);
|
||||
const ScalarCompact R74=(x[6] - x[3]);
|
||||
|
||||
const ScalarCompact R72=(x[6] - x[1]);
|
||||
const ScalarCompact R75=(x[6] - x[4]);
|
||||
const ScalarCompact R81=(x[7] - x[0]);
|
||||
|
||||
const ScalarCompact t1=(R63 + R54);
|
||||
const ScalarCompact t2=(R61 + R74);
|
||||
const ScalarCompact t3=(R72 + R81);
|
||||
|
||||
const ScalarCompact t4 =(R86 + R42);
|
||||
const ScalarCompact t5 =(R83 + R52);
|
||||
const ScalarCompact t6 =(R75 + R31);
|
||||
|
||||
// Calculate Y gradient from X and Z data
|
||||
|
||||
grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
|
||||
grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
|
||||
grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
|
||||
grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
|
||||
grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
|
||||
grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
|
||||
grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
|
||||
grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);
|
||||
}
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void grad( const ScalarPrecise x[] ,
|
||||
const ScalarPrecise y[] ,
|
||||
const ScalarPrecise z[] ,
|
||||
ScalarCompact grad_x[] ,
|
||||
ScalarCompact grad_y[] ,
|
||||
ScalarCompact grad_z[] )
|
||||
{
|
||||
grad( x , z , grad_y );
|
||||
grad( z , y , grad_x );
|
||||
grad( y , x , grad_z );
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void polar_decomp( const float dt ,
|
||||
const ScalarCompact v_gr[] ,
|
||||
ScalarPrecise stretch[] /* INOUT */ ,
|
||||
ScalarCompact str_ten[] /* OUT */ ,
|
||||
ScalarCompact rot[] /* OUT */ )
|
||||
{
|
||||
const float dt_half = 0.5 * dt;
|
||||
|
||||
ScalarCompact vort[ K_V_SIZE ]; // Vorticity
|
||||
|
||||
// Symmetric part
|
||||
str_ten[K_S_XX] = v_gr[K_F_XX];
|
||||
str_ten[K_S_YY] = v_gr[K_F_YY];
|
||||
str_ten[K_S_ZZ] = v_gr[K_F_ZZ];
|
||||
str_ten[K_S_XY] = 0.5 * ( v_gr[K_F_XY] + v_gr[K_F_YX] );
|
||||
str_ten[K_S_YZ] = 0.5 * ( v_gr[K_F_YZ] + v_gr[K_F_ZY] );
|
||||
str_ten[K_S_ZX] = 0.5 * ( v_gr[K_F_ZX] + v_gr[K_F_XZ] );
|
||||
|
||||
// Skew Symmetric part
|
||||
vort[K_V_XY] = 0.5 * ( v_gr[K_F_XY] - v_gr[K_F_YX] );
|
||||
vort[K_V_YZ] = 0.5 * ( v_gr[K_F_YZ] - v_gr[K_F_ZY] );
|
||||
vort[K_V_ZX] = 0.5 * ( v_gr[K_F_ZX] - v_gr[K_F_XZ] );
|
||||
|
||||
// calculate the rates of rotation via gauss elimination.
|
||||
|
||||
ScalarCompact z1 = str_ten[K_S_XY] * stretch[K_S_ZX] -
|
||||
str_ten[K_S_ZX] * stretch[K_S_XY] +
|
||||
str_ten[K_S_YY] * stretch[K_S_YZ] -
|
||||
str_ten[K_S_YZ] * stretch[K_S_YY] +
|
||||
str_ten[K_S_YZ] * stretch[K_S_ZZ] -
|
||||
str_ten[K_S_ZZ] * stretch[K_S_YZ];
|
||||
|
||||
ScalarCompact z2 = str_ten[K_S_ZX] * stretch[K_S_XX] -
|
||||
str_ten[K_S_XX] * stretch[K_S_ZX] +
|
||||
str_ten[K_S_YZ] * stretch[K_S_XY] -
|
||||
str_ten[K_S_XY] * stretch[K_S_YZ] +
|
||||
str_ten[K_S_ZZ] * stretch[K_S_ZX] -
|
||||
str_ten[K_S_ZX] * stretch[K_S_ZZ];
|
||||
|
||||
ScalarCompact z3 = str_ten[K_S_XX] * stretch[K_S_XY] -
|
||||
str_ten[K_S_XY] * stretch[K_S_XX] +
|
||||
str_ten[K_S_XY] * stretch[K_S_YY] -
|
||||
str_ten[K_S_YY] * stretch[K_S_XY] +
|
||||
str_ten[K_S_ZX] * stretch[K_S_YZ] -
|
||||
str_ten[K_S_YZ] * stretch[K_S_ZX];
|
||||
|
||||
{
|
||||
// forward elimination
|
||||
|
||||
const ScalarCompact a1inv = 1.0 / (stretch[K_S_YY] + stretch[K_S_ZZ]);
|
||||
const ScalarCompact a4BYa1 = -1 * stretch[K_S_XY] * a1inv;
|
||||
const ScalarCompact a2inv = 1.0 / (stretch[K_S_ZZ] + stretch[K_S_XX] + stretch[K_S_XY] * a4BYa1);
|
||||
|
||||
const ScalarCompact a5 = -stretch[K_S_YZ] + stretch[K_S_ZX] * a4BYa1;
|
||||
|
||||
z2 -= z1 * a4BYa1;
|
||||
const ScalarCompact a6BYa1 = -1 * stretch[K_S_ZX] * a1inv;
|
||||
const ScalarCompact a5BYa2 = a5 * a2inv;
|
||||
z3 -= z1 * a6BYa1 - z2 * a5BYa2;
|
||||
|
||||
// backward substitution -
|
||||
|
||||
z3 /= (stretch[K_S_XX] + stretch[K_S_YY] + stretch[K_S_ZX] * a6BYa1 + a5 * a5BYa2);
|
||||
z2 = (z2 - a5 * z3) * a2inv;
|
||||
z1 = (z1*a1inv - a6BYa1 * z3 -a4BYa1 * z2);
|
||||
}
|
||||
|
||||
// calculate rotation rates - recall that spin_rate is an asymmetric tensor,
|
||||
// so compute spin rate vector as dual of spin rate tensor,
|
||||
// i.e w_i = e_ijk * spin_rate_jk
|
||||
|
||||
z1 += vort[K_V_YZ];
|
||||
z2 += vort[K_V_ZX];
|
||||
z3 += vort[K_V_XY];
|
||||
|
||||
{
|
||||
// update rotation tensor:
|
||||
// 1) premultiply old rotation tensor to get right-hand side.
|
||||
|
||||
ScalarCompact r_XX = rot[K_F_XX] + dt_half*( z3 * rot[K_F_YX] - z2 * rot[K_F_ZX] );
|
||||
ScalarCompact r_YX = rot[K_F_YX] + dt_half*( z1 * rot[K_F_ZX] - z3 * rot[K_F_XX] );
|
||||
ScalarCompact r_ZX = rot[K_F_ZX] + dt_half*( z2 * rot[K_F_XX] - z1 * rot[K_F_YX] );
|
||||
ScalarCompact r_XY = rot[K_F_XY] + dt_half*( z3 * rot[K_F_YY] - z2 * rot[K_F_ZY] );
|
||||
ScalarCompact r_YY = rot[K_F_YY] + dt_half*( z1 * rot[K_F_ZY] - z3 * rot[K_F_XY] );
|
||||
ScalarCompact r_ZY = rot[K_F_ZY] + dt_half*( z2 * rot[K_F_XY] - z1 * rot[K_F_YY] );
|
||||
ScalarCompact r_XZ = rot[K_F_XZ] + dt_half*( z3 * rot[K_F_YZ] - z2 * rot[K_F_ZZ] );
|
||||
ScalarCompact r_YZ = rot[K_F_YZ] + dt_half*( z1 * rot[K_F_ZZ] - z3 * rot[K_F_XZ] );
|
||||
ScalarCompact r_ZZ = rot[K_F_ZZ] + dt_half*( z2 * rot[K_F_XZ] - z1 * rot[K_F_YZ] );
|
||||
|
||||
|
||||
// 2) solve for new rotation tensor via gauss elimination.
|
||||
// forward elimination -
|
||||
|
||||
const ScalarCompact a12 = - dt_half * z3;
|
||||
const ScalarCompact a13 = dt_half * z2;
|
||||
ScalarCompact b32 = - dt_half * z1;
|
||||
const ScalarCompact a22inv = 1.0 / (1.0 + a12 * a12);
|
||||
|
||||
const ScalarCompact a13a12 = a13*a12;
|
||||
const ScalarCompact a23 = b32 + a13a12;
|
||||
|
||||
r_YX += r_XX * a12;
|
||||
r_YY += r_XY * a12;
|
||||
r_YZ += r_XZ * a12;
|
||||
|
||||
b32 = (b32 - a13a12) * a22inv;
|
||||
|
||||
r_ZX += r_XX * a13 + r_YX * b32;
|
||||
r_ZY += r_XY * a13 + r_YY * b32;
|
||||
r_ZZ += r_XZ * a13 + r_YZ * b32;
|
||||
|
||||
// backward substitution -
|
||||
|
||||
const ScalarCompact a33inv = 1.0 / (1.0 + a13 * a13 + a23 * b32);
|
||||
|
||||
rot[K_F_ZX] = r_ZX * a33inv;
|
||||
rot[K_F_ZY] = r_ZY * a33inv;
|
||||
rot[K_F_ZZ] = r_ZZ * a33inv;
|
||||
rot[K_F_YX] = ( r_YX - rot[K_F_ZX] * a23 ) * a22inv;
|
||||
rot[K_F_YY] = ( r_YY - rot[K_F_ZY] * a23 ) * a22inv;
|
||||
rot[K_F_YZ] = ( r_YZ - rot[K_F_ZZ] * a23 ) * a22inv;
|
||||
rot[K_F_XX] = r_XX - rot[K_F_ZX] * a13 - rot[K_F_YX] * a12;
|
||||
rot[K_F_XY] = r_XY - rot[K_F_ZY] * a13 - rot[K_F_YY] * a12;
|
||||
rot[K_F_XZ] = r_XZ - rot[K_F_ZZ] * a13 - rot[K_F_YZ] * a12;
|
||||
}
|
||||
|
||||
// update stretch tensor in the new configuration -
|
||||
|
||||
const ScalarCompact a1 = str_ten[K_S_XY] + vort[K_V_XY];
|
||||
const ScalarCompact a2 = str_ten[K_S_YZ] + vort[K_V_YZ];
|
||||
const ScalarCompact a3 = str_ten[K_S_ZX] + vort[K_V_ZX];
|
||||
const ScalarCompact b1 = str_ten[K_S_ZX] - vort[K_V_ZX];
|
||||
const ScalarCompact b2 = str_ten[K_S_XY] - vort[K_V_XY];
|
||||
const ScalarCompact b3 = str_ten[K_S_YZ] - vort[K_V_YZ];
|
||||
|
||||
const ScalarCompact s_XX = stretch[K_S_XX];
|
||||
const ScalarCompact s_YY = stretch[K_S_YY];
|
||||
const ScalarCompact s_ZZ = stretch[K_S_ZZ];
|
||||
const ScalarCompact s_XY = stretch[K_S_XY];
|
||||
const ScalarCompact s_YZ = stretch[K_S_YZ];
|
||||
const ScalarCompact s_ZX = stretch[K_S_ZX];
|
||||
|
||||
stretch[K_S_XX] += dt * (str_ten[K_S_XX] * s_XX + ( a1 + z3 ) * s_XY + ( b1 - z2 ) * s_ZX);
|
||||
stretch[K_S_YY] += dt * (str_ten[K_S_YY] * s_YY + ( a2 + z1 ) * s_YZ + ( b2 - z3 ) * s_XY);
|
||||
stretch[K_S_ZZ] += dt * (str_ten[K_S_ZZ] * s_ZZ + ( a3 + z2 ) * s_ZX + ( b3 - z1 ) * s_YZ);
|
||||
stretch[K_S_XY] += dt * (str_ten[K_S_XX] * s_XY + ( a1 ) * s_YY + ( b1 ) * s_YZ - z3 * s_XX + z1 * s_ZX);
|
||||
stretch[K_S_YZ] += dt * (str_ten[K_S_YY] * s_YZ + ( a2 ) * s_ZZ + ( b2 ) * s_ZX - z1 * s_YY + z2 * s_XY);
|
||||
stretch[K_S_ZX] += dt * (str_ten[K_S_ZZ] * s_ZX + ( a3 ) * s_XX + ( b3 ) * s_XY - z2 * s_ZZ + z3 * s_YZ);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarCompact >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void rotate_tensor( const ScalarCompact str_ten[] ,
|
||||
const ScalarCompact rot[] ,
|
||||
ScalarCompact rot_str[] )
|
||||
{
|
||||
ScalarCompact t[9];
|
||||
|
||||
t[0] = str_ten[K_S_XX]*rot[K_F_XX] + str_ten[K_S_XY]*rot[K_F_YX] + str_ten[K_S_XZ]*rot[K_F_ZX];
|
||||
t[1] = str_ten[K_S_YX]*rot[K_F_XX] + str_ten[K_S_YY]*rot[K_F_YX] + str_ten[K_S_YZ]*rot[K_F_ZX];
|
||||
t[2] = str_ten[K_S_ZX]*rot[K_F_XX] + str_ten[K_S_ZY]*rot[K_F_YX] + str_ten[K_S_ZZ]*rot[K_F_ZX];
|
||||
|
||||
t[3] = str_ten[K_S_XX]*rot[K_F_XY] + str_ten[K_S_XY]*rot[K_F_YY] + str_ten[K_S_XZ]*rot[K_F_ZY];
|
||||
t[4] = str_ten[K_S_YX]*rot[K_F_XY] + str_ten[K_S_YY]*rot[K_F_YY] + str_ten[K_S_YZ]*rot[K_F_ZY];
|
||||
t[5] = str_ten[K_S_ZX]*rot[K_F_XY] + str_ten[K_S_ZY]*rot[K_F_YY] + str_ten[K_S_ZZ]*rot[K_F_ZY];
|
||||
|
||||
t[6] = str_ten[K_S_XX]*rot[K_F_XZ] + str_ten[K_S_XY]*rot[K_F_YZ] + str_ten[K_S_XZ]*rot[K_F_ZZ];
|
||||
t[7] = str_ten[K_S_YX]*rot[K_F_XZ] + str_ten[K_S_YY]*rot[K_F_YZ] + str_ten[K_S_YZ]*rot[K_F_ZZ];
|
||||
t[8] = str_ten[K_S_ZX]*rot[K_F_XZ] + str_ten[K_S_ZY]*rot[K_F_YZ] + str_ten[K_S_ZZ]*rot[K_F_ZZ];
|
||||
|
||||
|
||||
rot_str[ K_S_XX ] = rot[K_F_XX] * t[0] + rot[K_F_YX] * t[1] + rot[K_F_ZX] * t[2];
|
||||
rot_str[ K_S_YY ] = rot[K_F_XY] * t[3] + rot[K_F_YY] * t[4] + rot[K_F_ZY] * t[5];
|
||||
rot_str[ K_S_ZZ ] = rot[K_F_XZ] * t[6] + rot[K_F_YZ] * t[7] + rot[K_F_ZZ] * t[8];
|
||||
|
||||
rot_str[ K_S_XY ] = rot[K_F_XX] * t[3] + rot[K_F_YX] * t[4] + rot[K_F_ZX] * t[5];
|
||||
rot_str[ K_S_YZ ] = rot[K_F_XY] * t[6] + rot[K_F_YY] * t[7] + rot[K_F_ZY] * t[8];
|
||||
rot_str[ K_S_ZX ] = rot[K_F_XZ] * t[0] + rot[K_F_YZ] * t[1] + rot[K_F_ZZ] * t[2];
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void rotate_tensor_backward( const ScalarPrecise stress[] ,
|
||||
const ScalarCompact rot[] ,
|
||||
ScalarCompact rot_stress[] )
|
||||
{
|
||||
ScalarCompact t[9] ;
|
||||
|
||||
t[0] = stress[K_S_XX]*rot[K_F_XX]+ stress[K_S_XY]*rot[K_F_XY]+ stress[K_S_XZ]*rot[K_F_XZ];
|
||||
t[1] = stress[K_S_YX]*rot[K_F_XX]+ stress[K_S_YY]*rot[K_F_XY]+ stress[K_S_YZ]*rot[K_F_XZ];
|
||||
t[2] = stress[K_S_ZX]*rot[K_F_XX]+ stress[K_S_ZY]*rot[K_F_XY]+ stress[K_S_ZZ]*rot[K_F_XZ];
|
||||
t[3] = stress[K_S_XX]*rot[K_F_YX]+ stress[K_S_XY]*rot[K_F_YY]+ stress[K_S_XZ]*rot[K_F_YZ];
|
||||
t[4] = stress[K_S_YX]*rot[K_F_YX]+ stress[K_S_YY]*rot[K_F_YY]+ stress[K_S_YZ]*rot[K_F_YZ];
|
||||
t[5] = stress[K_S_ZX]*rot[K_F_YX]+ stress[K_S_ZY]*rot[K_F_YY]+ stress[K_S_ZZ]*rot[K_F_YZ];
|
||||
t[6] = stress[K_S_XX]*rot[K_F_ZX]+ stress[K_S_XY]*rot[K_F_ZY]+ stress[K_S_XZ]*rot[K_F_ZZ];
|
||||
t[7] = stress[K_S_YX]*rot[K_F_ZX]+ stress[K_S_YY]*rot[K_F_ZY]+ stress[K_S_YZ]*rot[K_F_ZZ];
|
||||
t[8] = stress[K_S_ZX]*rot[K_F_ZX]+ stress[K_S_ZY]*rot[K_F_ZY]+ stress[K_S_ZZ]*rot[K_F_ZZ];
|
||||
|
||||
rot_stress[ K_S_XX ] = rot[K_F_XX]*t[0] + rot[K_F_XY]*t[1] + rot[K_F_XZ]*t[2];
|
||||
rot_stress[ K_S_YY ] = rot[K_F_YX]*t[3] + rot[K_F_YY]*t[4] + rot[K_F_YZ]*t[5];
|
||||
rot_stress[ K_S_ZZ ] = rot[K_F_ZX]*t[6] + rot[K_F_ZY]*t[7] + rot[K_F_ZZ]*t[8];
|
||||
|
||||
rot_stress[ K_S_XY ] = rot[K_F_XX]*t[3] + rot[K_F_XY]*t[4] + rot[K_F_XZ]*t[5];
|
||||
rot_stress[ K_S_YZ ] = rot[K_F_YX]*t[6] + rot[K_F_YY]*t[7] + rot[K_F_YZ]*t[8];
|
||||
rot_stress[ K_S_ZX ] = rot[K_F_ZX]*t[0] + rot[K_F_ZY]*t[1] + rot[K_F_ZZ]*t[2];
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void update_stress( const float dt ,
|
||||
const float two_mu ,
|
||||
const float bulk_modulus ,
|
||||
const ScalarCompact rot_str[] ,
|
||||
ScalarPrecise stress[] )
|
||||
{
|
||||
const ScalarCompact e = rot_str[ K_S_XX ] + rot_str[ K_S_YY ] + rot_str[ K_S_ZZ ] ;
|
||||
const ScalarCompact eb = e * bulk_modulus ;
|
||||
const ScalarCompact e3 = e / 3.0 ;
|
||||
|
||||
stress[K_S_XX] += dt * ( two_mu * ( rot_str[K_S_XX] - e3 ) + eb );
|
||||
stress[K_S_YY] += dt * ( two_mu * ( rot_str[K_S_YY] - e3 ) + eb );
|
||||
stress[K_S_ZZ] += dt * ( two_mu * ( rot_str[K_S_ZZ] - e3 ) + eb );
|
||||
|
||||
stress[K_S_XY] += dt * two_mu * rot_str[K_S_XY];
|
||||
stress[K_S_YZ] += dt * two_mu * rot_str[K_S_YZ];
|
||||
stress[K_S_ZX] += dt * two_mu * rot_str[K_S_ZX];
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void comp_force( const ScalarPrecise vx[] ,
|
||||
const ScalarPrecise vy[] ,
|
||||
const ScalarPrecise vz[] ,
|
||||
const ScalarCompact grad_x[] ,
|
||||
const ScalarCompact grad_y[] ,
|
||||
const ScalarCompact grad_z[] ,
|
||||
const ScalarCompact total_stress12th[] ,
|
||||
ScalarCompact force[][ SpatialDim ] ,
|
||||
ScalarCompact & energy )
|
||||
{
|
||||
ScalarPrecise internal_energy = 0 ;
|
||||
|
||||
for ( unsigned inode = 0; inode < ElemNodeCount ; ++inode ) {
|
||||
|
||||
force[inode][0] = total_stress12th[K_S_XX] * grad_x[inode] +
|
||||
total_stress12th[K_S_XY] * grad_y[inode] +
|
||||
total_stress12th[K_S_XZ] * grad_z[inode] ;
|
||||
|
||||
force[inode][1] = total_stress12th[K_S_YX] * grad_x[inode] +
|
||||
total_stress12th[K_S_YY] * grad_y[inode] +
|
||||
total_stress12th[K_S_YZ] * grad_z[inode] ;
|
||||
|
||||
force[inode][2] = total_stress12th[K_S_ZX] * grad_x[inode] +
|
||||
total_stress12th[K_S_ZY] * grad_y[inode] +
|
||||
total_stress12th[K_S_ZZ] * grad_z[inode] ;
|
||||
|
||||
internal_energy += force[inode][0] * vx[inode] +
|
||||
force[inode][1] * vy[inode] +
|
||||
force[inode][2] * vz[inode] ;
|
||||
}
|
||||
|
||||
energy = internal_energy ;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
};
|
||||
|
||||
} // namespace Explicit
|
||||
|
||||
#endif /* #ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP */
|
||||
|
||||
@ -1,341 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef HYBRIDFEM_IMPLICIT_HPP
|
||||
#define HYBRIDFEM_IMPLICIT_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <SparseLinearSystem.hpp>
|
||||
#include <SparseLinearSystemFill.hpp>
|
||||
#include <ImplicitFunctors.hpp>
|
||||
#include <FEMesh.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Implicit {
|
||||
|
||||
struct PerformanceData {
|
||||
double mesh_time ;
|
||||
double graph_time ;
|
||||
double elem_time ;
|
||||
double matrix_gather_fill_time ;
|
||||
double matrix_boundary_condition_time ;
|
||||
double cg_iteration_time ;
|
||||
|
||||
PerformanceData()
|
||||
: mesh_time(0)
|
||||
, graph_time(0)
|
||||
, elem_time(0)
|
||||
, matrix_gather_fill_time(0)
|
||||
, matrix_boundary_condition_time(0)
|
||||
, cg_iteration_time(0)
|
||||
{}
|
||||
|
||||
void best( const PerformanceData & rhs )
|
||||
{
|
||||
mesh_time = std::min( mesh_time , rhs.mesh_time );
|
||||
graph_time = std::min( graph_time , rhs.graph_time );
|
||||
elem_time = std::min( elem_time , rhs.elem_time );
|
||||
matrix_gather_fill_time = std::min( matrix_gather_fill_time , rhs.matrix_gather_fill_time );
|
||||
matrix_boundary_condition_time = std::min( matrix_boundary_condition_time , rhs.matrix_boundary_condition_time );
|
||||
cg_iteration_time = std::min( cg_iteration_time , rhs.cg_iteration_time );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class FixtureType >
|
||||
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
|
||||
const int , // global_max_x ,
|
||||
const int , // global_max_y ,
|
||||
const int global_max_z ,
|
||||
const bool print_sample )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef FixtureType fixture_type ;
|
||||
typedef typename fixture_type::execution_space execution_space;
|
||||
//typedef typename execution_space::size_type size_type ; // unused
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
typedef typename fixture_type::coordinate_scalar_type coordinate_scalar_type ;
|
||||
|
||||
enum { ElementNodeCount = fixture_type::element_node_count };
|
||||
|
||||
const comm::Machine machine = mesh.parallel_data_map.machine ;
|
||||
|
||||
const size_t element_count = mesh.elem_node_ids.dimension_0();
|
||||
|
||||
const size_t iteration_limit = 200 ;
|
||||
const double residual_tolerance = 1e-14 ;
|
||||
|
||||
size_t iteration_count = 0 ;
|
||||
double residual_norm = 0 ;
|
||||
|
||||
PerformanceData perf_data ;
|
||||
|
||||
//------------------------------------
|
||||
// Sparse linear system types:
|
||||
|
||||
typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
|
||||
typedef Kokkos::CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
typedef typename matrix_type::graph_type matrix_graph_type ;
|
||||
typedef typename matrix_type::coefficients_type matrix_coefficients_type ;
|
||||
|
||||
typedef GraphFactory< matrix_graph_type , mesh_type > graph_factory ;
|
||||
|
||||
//------------------------------------
|
||||
// Problem setup types:
|
||||
|
||||
typedef ElementComputation< scalar_type , scalar_type , execution_space > ElementFunctor ;
|
||||
typedef DirichletBoundary< scalar_type , scalar_type , execution_space > BoundaryFunctor ;
|
||||
|
||||
typedef typename ElementFunctor::elem_matrices_type elem_matrices_type ;
|
||||
typedef typename ElementFunctor::elem_vectors_type elem_vectors_type ;
|
||||
|
||||
typedef GatherFill< matrix_type ,
|
||||
mesh_type ,
|
||||
elem_matrices_type ,
|
||||
elem_vectors_type > GatherFillFunctor ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const scalar_type elem_coeff_K = 2 ;
|
||||
const scalar_type elem_load_Q = 1 ;
|
||||
|
||||
matrix_type linsys_matrix ;
|
||||
vector_type linsys_rhs ;
|
||||
vector_type linsys_solution ;
|
||||
|
||||
typename graph_factory::element_map_type element_map ;
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
//------------------------------------
|
||||
// Generate sparse matrix graph and element->graph map.
|
||||
|
||||
graph_factory::create( mesh , linsys_matrix.graph , element_map );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.graph_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Allocate linear system coefficients and rhs:
|
||||
|
||||
const size_t local_owned_length =
|
||||
linsys_matrix.graph.row_map.dimension_0() - 1 ;
|
||||
|
||||
linsys_matrix.coefficients =
|
||||
matrix_coefficients_type( "coeff" , linsys_matrix.graph.entries.dimension_0() );
|
||||
|
||||
linsys_rhs = vector_type( "rhs" , local_owned_length );
|
||||
linsys_solution = vector_type( "solution" , local_owned_length );
|
||||
|
||||
//------------------------------------
|
||||
// Fill linear system
|
||||
{
|
||||
elem_matrices_type elem_matrices ;
|
||||
elem_vectors_type elem_vectors ;
|
||||
|
||||
if ( element_count ) {
|
||||
elem_matrices = elem_matrices_type( std::string("elem_matrices"), element_count );
|
||||
elem_vectors = elem_vectors_type ( std::string("elem_vectors"), element_count );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Compute element matrices and vectors:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
ElementFunctor::apply( mesh ,
|
||||
elem_matrices , elem_vectors ,
|
||||
elem_coeff_K , elem_load_Q );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.elem_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Fill linear system coefficients:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
GatherFillFunctor::apply( linsys_matrix , linsys_rhs ,
|
||||
mesh , element_map , elem_matrices , elem_vectors );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.matrix_gather_fill_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
// Apply boundary conditions:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
BoundaryFunctor::apply( linsys_matrix , linsys_rhs , mesh ,
|
||||
0 , global_max_z , 0 , global_max_z );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.matrix_boundary_condition_time = comm::max( machine , wall_clock.seconds() );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Solve linear sytem
|
||||
|
||||
cgsolve( mesh.parallel_data_map ,
|
||||
linsys_matrix , linsys_rhs , linsys_solution ,
|
||||
iteration_count , residual_norm ,
|
||||
perf_data.cg_iteration_time ,
|
||||
iteration_limit , residual_tolerance );
|
||||
|
||||
//------------------------------------
|
||||
|
||||
if ( print_sample ) {
|
||||
|
||||
typename mesh_type::node_coords_type::HostMirror coords_h =
|
||||
Kokkos::create_mirror( mesh.node_coords );
|
||||
|
||||
typename vector_type::HostMirror X_h =
|
||||
Kokkos::create_mirror( linsys_solution );
|
||||
|
||||
Kokkos::deep_copy( coords_h , mesh.node_coords );
|
||||
Kokkos::deep_copy( X_h , linsys_solution );
|
||||
|
||||
for ( size_t i = 0 ; i < mesh.parallel_data_map.count_owned ; ++i ) {
|
||||
const coordinate_scalar_type x = coords_h(i,0);
|
||||
const coordinate_scalar_type y = coords_h(i,1);
|
||||
const coordinate_scalar_type z = coords_h(i,2);
|
||||
|
||||
if ( x <= 0 && y <= 0 ) {
|
||||
std::cout << " node( " << x << " " << y << " " << z << " ) = "
|
||||
<< X_h(i) << std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return perf_data ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class Device >
|
||||
void driver( const char * const label ,
|
||||
comm::Machine machine ,
|
||||
const int gang_count ,
|
||||
const int elem_count_beg ,
|
||||
const int elem_count_end ,
|
||||
const int runs )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef Device execution_space ;
|
||||
typedef double coordinate_scalar_type ;
|
||||
typedef FixtureElementHex8 fixture_element_type ;
|
||||
|
||||
typedef BoxMeshFixture< coordinate_scalar_type ,
|
||||
execution_space ,
|
||||
fixture_element_type > fixture_type ;
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
|
||||
const size_t proc_count = comm::size( machine );
|
||||
const size_t proc_rank = comm::rank( machine );
|
||||
|
||||
if ( elem_count_beg == 0 || elem_count_end == 0 || runs == 0 ) return ;
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
std::cout << std::endl ;
|
||||
std::cout << "\"Kokkos::HybridFE::Implicit " << label << "\"" << std::endl;
|
||||
std::cout << "\"Size\" , \"Graphing\" , \"Element\" , \"Fill\" , \"Boundary\" , \"CG-Iter\"" << std::endl
|
||||
<< "\"elems\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\"" << std::endl ;
|
||||
}
|
||||
|
||||
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
|
||||
{
|
||||
const int ix = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
|
||||
const int iy = ix + 1 ;
|
||||
const int iz = 2 * iy ;
|
||||
const int n = ix * iy * iz ;
|
||||
|
||||
mesh_type mesh =
|
||||
fixture_type::create( proc_count , proc_rank , gang_count ,
|
||||
ix , iy , iz );
|
||||
|
||||
mesh.parallel_data_map.machine = machine ;
|
||||
|
||||
PerformanceData perf_data , perf_best ;
|
||||
|
||||
for(int j = 0; j < runs; j++){
|
||||
|
||||
perf_data = run<scalar_type,fixture_type>(mesh,ix,iy,iz, false );
|
||||
|
||||
if( j == 0 ) {
|
||||
perf_best = perf_data ;
|
||||
}
|
||||
else {
|
||||
perf_best.best( perf_data );
|
||||
}
|
||||
}
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
|
||||
std::cout << std::setw(8) << n << " , "
|
||||
<< std::setw(10) << perf_best.graph_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.elem_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.matrix_gather_fill_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.matrix_boundary_condition_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.cg_iteration_time * 1000
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Implicit */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
|
||||
#endif /* #ifndef HYBRIDFEM_IMPLICIT_HPP */
|
||||
|
||||
@ -1,585 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Implicit {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , unsigned Dim , unsigned N >
|
||||
struct TensorIntegration ;
|
||||
|
||||
template<typename Scalar >
|
||||
struct TensorIntegration<Scalar,1,1> {
|
||||
Scalar pts[1] ;
|
||||
Scalar wts[1] ;
|
||||
|
||||
TensorIntegration() { pts[0] = 0 ; wts[0] = 2 ; }
|
||||
};
|
||||
|
||||
template<typename Scalar >
|
||||
struct TensorIntegration<Scalar,1,2>
|
||||
{
|
||||
Scalar pts[2] ;
|
||||
Scalar wts[2] ;
|
||||
|
||||
TensorIntegration()
|
||||
{
|
||||
const Scalar x2 = 0.577350269 ;
|
||||
pts[0] = -x2; wts[0] = 1.0;
|
||||
pts[1] = x2; wts[1] = 1.0;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar >
|
||||
struct TensorIntegration<Scalar,1,3>
|
||||
{
|
||||
Scalar pts[3] ;
|
||||
Scalar wts[3] ;
|
||||
|
||||
TensorIntegration()
|
||||
{
|
||||
const Scalar x3 = 0.774596669 ;
|
||||
const Scalar w1 = 0.555555556 ;
|
||||
const Scalar w2 = 0.888888889 ;
|
||||
pts[0] = -x3 ; wts[0] = w1 ;
|
||||
pts[1] = 0 ; wts[1] = w2 ;
|
||||
pts[2] = x3 ; wts[2] = w1 ;
|
||||
}
|
||||
};
|
||||
|
||||
template< typename Scalar , unsigned Order >
|
||||
struct TensorIntegration<Scalar,3,Order>
|
||||
{
|
||||
static const unsigned N = Order * Order * Order ;
|
||||
|
||||
Scalar pts[N][3] ;
|
||||
Scalar wts[N];
|
||||
|
||||
TensorIntegration()
|
||||
{
|
||||
TensorIntegration<Scalar,1,Order> oneD ;
|
||||
|
||||
unsigned n = 0 ;
|
||||
for ( unsigned k = 0 ; k < Order ; ++k ) {
|
||||
for ( unsigned j = 0 ; j < Order ; ++j ) {
|
||||
for ( unsigned i = 0 ; i < Order ; ++i , ++n ) {
|
||||
pts[n][0] = oneD.pts[i] ;
|
||||
pts[n][1] = oneD.pts[j] ;
|
||||
pts[n][2] = oneD.pts[k] ;
|
||||
wts[n] = oneD.wts[i] * oneD.wts[j] * oneD.wts[k] ;
|
||||
}}}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar >
|
||||
struct ShapeFunctionEvaluation {
|
||||
|
||||
static const unsigned FunctionCount = 8 ;
|
||||
static const unsigned SpatialDimension = 3 ;
|
||||
static const unsigned IntegrationOrder = 2 ;
|
||||
|
||||
typedef TensorIntegration< Scalar , SpatialDimension , IntegrationOrder >
|
||||
TensorIntegrationType ;
|
||||
|
||||
static const unsigned PointCount = TensorIntegrationType::N ;
|
||||
|
||||
Scalar value [ PointCount ][ FunctionCount ] ;
|
||||
Scalar gradient[ PointCount ][ FunctionCount * SpatialDimension ];
|
||||
Scalar weight [ PointCount ];
|
||||
|
||||
ShapeFunctionEvaluation()
|
||||
{
|
||||
const TensorIntegration< Scalar , SpatialDimension , IntegrationOrder >
|
||||
integration ;
|
||||
|
||||
const Scalar ONE8TH = 0.125 ;
|
||||
|
||||
for ( unsigned i = 0 ; i < PointCount ; ++i ) {
|
||||
|
||||
const Scalar u = 1.0 - integration.pts[i][0];
|
||||
const Scalar v = 1.0 - integration.pts[i][1];
|
||||
const Scalar w = 1.0 - integration.pts[i][2];
|
||||
|
||||
const Scalar up1 = 1.0 + integration.pts[i][0];
|
||||
const Scalar vp1 = 1.0 + integration.pts[i][1];
|
||||
const Scalar wp1 = 1.0 + integration.pts[i][2];
|
||||
|
||||
weight[i] = integration.wts[i] ;
|
||||
|
||||
// Vaues:
|
||||
value[i][0] = ONE8TH * u * v * w ;
|
||||
value[i][1] = ONE8TH * up1 * v * w ;
|
||||
value[i][2] = ONE8TH * up1 * vp1 * w ;
|
||||
value[i][3] = ONE8TH * u * vp1 * w ;
|
||||
|
||||
value[i][4] = ONE8TH * u * v * wp1 ;
|
||||
value[i][5] = ONE8TH * up1 * v * wp1 ;
|
||||
value[i][6] = ONE8TH * up1 * vp1 * wp1 ;
|
||||
value[i][7] = ONE8TH * u * vp1 * wp1 ;
|
||||
|
||||
//fn 0 = u * v * w
|
||||
gradient[i][ 0] = ONE8TH * -1 * v * w ;
|
||||
gradient[i][ 1] = ONE8TH * u * -1 * w ;
|
||||
gradient[i][ 2] = ONE8TH * u * v * -1 ;
|
||||
|
||||
//fn 1 = up1 * v * w
|
||||
gradient[i][ 3] = ONE8TH * 1 * v * w ;
|
||||
gradient[i][ 4] = ONE8TH * up1 * -1 * w ;
|
||||
gradient[i][ 5] = ONE8TH * up1 * v * -1 ;
|
||||
|
||||
//fn 2 = up1 * vp1 * w
|
||||
gradient[i][ 6] = ONE8TH * 1 * vp1 * w ;
|
||||
gradient[i][ 7] = ONE8TH * up1 * 1 * w ;
|
||||
gradient[i][ 8] = ONE8TH * up1 * vp1 * -1 ;
|
||||
|
||||
//fn 3 = u * vp1 * w
|
||||
gradient[i][ 9] = ONE8TH * -1 * vp1 * w ;
|
||||
gradient[i][10] = ONE8TH * u * 1 * w ;
|
||||
gradient[i][11] = ONE8TH * u * vp1 * -1 ;
|
||||
|
||||
//fn 4 = u * v * wp1
|
||||
gradient[i][12] = ONE8TH * -1 * v * wp1 ;
|
||||
gradient[i][13] = ONE8TH * u * -1 * wp1 ;
|
||||
gradient[i][14] = ONE8TH * u * v * 1 ;
|
||||
|
||||
//fn 5 = up1 * v * wp1
|
||||
gradient[i][15] = ONE8TH * 1 * v * wp1 ;
|
||||
gradient[i][16] = ONE8TH * up1 * -1 * wp1 ;
|
||||
gradient[i][17] = ONE8TH * up1 * v * 1 ;
|
||||
|
||||
//fn 6 = up1 * vp1 * wp1
|
||||
gradient[i][18] = ONE8TH * 1 * vp1 * wp1 ;
|
||||
gradient[i][19] = ONE8TH * up1 * 1 * wp1 ;
|
||||
gradient[i][20] = ONE8TH * up1 * vp1 * 1 ;
|
||||
|
||||
//fn 7 = u * vp1 * wp1
|
||||
gradient[i][21] = ONE8TH * -1 * vp1 * wp1 ;
|
||||
gradient[i][22] = ONE8TH * u * 1 * wp1 ;
|
||||
gradient[i][23] = ONE8TH * u * vp1 * 1 ;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarType , typename ScalarCoordType , class DeviceType >
|
||||
struct ElementComputation
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef ScalarType scalar_type ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const size_type ElementNodeCount = 8 ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
typedef Kokkos::View< scalar_type[][ElementNodeCount][ElementNodeCount] , execution_space > elem_matrices_type ;
|
||||
typedef Kokkos::View< scalar_type[][ElementNodeCount] , execution_space > elem_vectors_type ;
|
||||
|
||||
typedef ShapeFunctionEvaluation< scalar_type > shape_function_data ;
|
||||
|
||||
static const unsigned SpatialDim = shape_function_data::SpatialDimension ;
|
||||
static const unsigned FunctionCount = shape_function_data::FunctionCount ;
|
||||
|
||||
private:
|
||||
|
||||
const shape_function_data shape_eval ;
|
||||
typename mesh_type::elem_node_ids_type elem_node_ids ;
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
elem_matrices_type element_matrices ;
|
||||
elem_vectors_type element_vectors ;
|
||||
scalar_type coeff_K ;
|
||||
scalar_type coeff_Q ;
|
||||
|
||||
ElementComputation( const mesh_type & arg_mesh ,
|
||||
const elem_matrices_type & arg_element_matrices ,
|
||||
const elem_vectors_type & arg_element_vectors ,
|
||||
const scalar_type arg_coeff_K ,
|
||||
const scalar_type arg_coeff_Q )
|
||||
: shape_eval()
|
||||
, elem_node_ids( arg_mesh.elem_node_ids )
|
||||
, node_coords( arg_mesh.node_coords )
|
||||
, element_matrices( arg_element_matrices )
|
||||
, element_vectors( arg_element_vectors )
|
||||
, coeff_K( arg_coeff_K )
|
||||
, coeff_Q( arg_coeff_Q )
|
||||
{}
|
||||
|
||||
public:
|
||||
|
||||
static void apply( const mesh_type & mesh ,
|
||||
const elem_matrices_type & elem_matrices ,
|
||||
const elem_vectors_type & elem_vectors ,
|
||||
const scalar_type elem_coeff_K ,
|
||||
const scalar_type elem_coeff_Q )
|
||||
{
|
||||
ElementComputation comp( mesh , elem_matrices , elem_vectors , elem_coeff_K , elem_coeff_Q );
|
||||
const size_t elem_count = mesh.elem_node_ids.dimension_0();
|
||||
|
||||
parallel_for( elem_count , comp );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_jacobian =
|
||||
FunctionCount * SpatialDim * SpatialDim * 2 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void jacobian( const ScalarCoordType * x,
|
||||
const ScalarCoordType * y,
|
||||
const ScalarCoordType * z,
|
||||
const scalar_type * grad_vals,
|
||||
scalar_type * J) const
|
||||
{
|
||||
int i_grad = 0 ;
|
||||
|
||||
for( unsigned i = 0; i < ElementNodeCount ; ++i , i_grad += SpatialDim ) {
|
||||
const scalar_type g0 = grad_vals[ i_grad ];
|
||||
const scalar_type g1 = grad_vals[ i_grad + 1 ];
|
||||
const scalar_type g2 = grad_vals[ i_grad + 2 ];
|
||||
const scalar_type x0 = x[i] ;
|
||||
const scalar_type x1 = y[i] ;
|
||||
const scalar_type x2 = z[i] ;
|
||||
|
||||
J[0] += g0 * x0 ;
|
||||
J[1] += g0 * x1 ;
|
||||
J[2] += g0 * x2 ;
|
||||
|
||||
J[3] += g1 * x0 ;
|
||||
J[4] += g1 * x1 ;
|
||||
J[5] += g1 * x2 ;
|
||||
|
||||
J[6] += g2 * x0 ;
|
||||
J[7] += g2 * x1 ;
|
||||
J[8] += g2 * x2 ;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_inverse_and_det = 46 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
scalar_type inverse_and_determinant3x3( scalar_type * const J ) const
|
||||
{
|
||||
const scalar_type J00 = J[0];
|
||||
const scalar_type J01 = J[1];
|
||||
const scalar_type J02 = J[2];
|
||||
|
||||
const scalar_type J10 = J[3];
|
||||
const scalar_type J11 = J[4];
|
||||
const scalar_type J12 = J[5];
|
||||
|
||||
const scalar_type J20 = J[6];
|
||||
const scalar_type J21 = J[7];
|
||||
const scalar_type J22 = J[8];
|
||||
|
||||
const scalar_type term0 = J22*J11 - J21*J12;
|
||||
const scalar_type term1 = J22*J01 - J21*J02;
|
||||
const scalar_type term2 = J12*J01 - J11*J02;
|
||||
|
||||
const scalar_type detJ = J00*term0 - J10*term1 + J20*term2;
|
||||
const scalar_type inv_detJ = 1.0/detJ;
|
||||
|
||||
J[0] = term0*inv_detJ;
|
||||
J[1] = -term1*inv_detJ;
|
||||
J[2] = term2*inv_detJ;
|
||||
|
||||
J[3] = -(J22*J10 - J20*J12)*inv_detJ;
|
||||
J[4] = (J22*J00 - J20*J02)*inv_detJ;
|
||||
J[5] = -(J12*J00 - J10*J02)*inv_detJ;
|
||||
|
||||
J[6] = (J21*J10 - J20*J11)*inv_detJ;
|
||||
J[7] = -(J21*J00 - J20*J01)*inv_detJ;
|
||||
J[8] = (J11*J00 - J10*J01)*inv_detJ;
|
||||
|
||||
return detJ ;
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void matTransMat3x3_X_3xn( const scalar_type * A, int n,
|
||||
const scalar_type * B,
|
||||
scalar_type * C ) const
|
||||
{
|
||||
//A is 3x3, B is 3xn. So C is also 3xn.
|
||||
//A,B,C are all assumed to be ordered such that columns are contiguous.
|
||||
|
||||
scalar_type * Cj = C;
|
||||
const scalar_type * Bj = B;
|
||||
|
||||
for(int j=0; j<n; ++j) {
|
||||
Cj[0] = A[0]*Bj[0] + A[1]*Bj[1] + A[2]*Bj[2];
|
||||
Cj[1] = A[3]*Bj[0] + A[4]*Bj[1] + A[5]*Bj[2];
|
||||
Cj[2] = A[6]*Bj[0] + A[7]*Bj[1] + A[8]*Bj[2];
|
||||
Bj += 3;
|
||||
Cj += 3;
|
||||
}
|
||||
|
||||
}
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_contributeDiffusionMatrix = FunctionCount * ( 3 * 5 + FunctionCount * 7 ) ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void contributeDiffusionMatrix(
|
||||
const scalar_type weight ,
|
||||
const scalar_type grad_vals[] ,
|
||||
const scalar_type invJ[] ,
|
||||
scalar_type elem_mat[][8] ) const
|
||||
{
|
||||
scalar_type dpsidx[8], dpsidy[8], dpsidz[8];
|
||||
|
||||
int i_grad = 0 ;
|
||||
for( unsigned i = 0; i < FunctionCount ; ++i , i_grad += 3 ) {
|
||||
const scalar_type g0 = grad_vals[i_grad+0];
|
||||
const scalar_type g1 = grad_vals[i_grad+1];
|
||||
const scalar_type g2 = grad_vals[i_grad+2];
|
||||
|
||||
dpsidx[i] = g0 * invJ[0] + g1 * invJ[1] + g2 * invJ[2];
|
||||
dpsidy[i] = g0 * invJ[3] + g1 * invJ[4] + g2 * invJ[5];
|
||||
dpsidz[i] = g0 * invJ[6] + g1 * invJ[7] + g2 * invJ[8];
|
||||
}
|
||||
|
||||
for( unsigned m = 0; m < FunctionCount; m++) {
|
||||
for( unsigned n = 0; n < FunctionCount; n++) {
|
||||
|
||||
elem_mat[m][n] += weight *
|
||||
((dpsidx[m] * dpsidx[n]) +
|
||||
(dpsidy[m] * dpsidy[n]) +
|
||||
(dpsidz[m] * dpsidz[n]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_contributeSourceVector = FunctionCount * 2 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void contributeSourceVector( const scalar_type term ,
|
||||
const scalar_type psi[] ,
|
||||
scalar_type elem_vec[] ) const
|
||||
{
|
||||
for( unsigned i=0; i< FunctionCount ; ++i) {
|
||||
elem_vec[i] += psi[i] * term ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const unsigned FLOPS_operator =
|
||||
shape_function_data::PointCount * ( 3
|
||||
+ FLOPS_jacobian
|
||||
+ FLOPS_inverse_and_det
|
||||
+ FLOPS_contributeDiffusionMatrix
|
||||
+ FLOPS_contributeSourceVector ) ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int ielem )const {
|
||||
|
||||
scalar_type elem_vec[8] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
|
||||
scalar_type elem_mat[8][8] =
|
||||
{ { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } };
|
||||
|
||||
ScalarCoordType x[8], y[8], z[8];
|
||||
|
||||
for ( int i = 0 ; i < 8 ; ++i ) {
|
||||
const int node_index = elem_node_ids( ielem , i );
|
||||
x[i] = node_coords( node_index , 0 );
|
||||
y[i] = node_coords( node_index , 1 );
|
||||
z[i] = node_coords( node_index , 2 );
|
||||
}
|
||||
|
||||
// This loop could be parallelized; however,
|
||||
// it would require additional per-thread temporaries
|
||||
// of 'elem_vec' and 'elem_mat' which would
|
||||
// consume more local memory and have to be reduced.
|
||||
|
||||
for ( unsigned i = 0 ; i < shape_function_data::PointCount ; ++i ) {
|
||||
|
||||
scalar_type J[SpatialDim*SpatialDim] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
jacobian( x, y, z, shape_eval.gradient[i] , J );
|
||||
|
||||
// Overwrite J with its inverse to save scratch memory space.
|
||||
const scalar_type detJ_w = shape_eval.weight[i] * inverse_and_determinant3x3(J);
|
||||
const scalar_type k_detJ_w = coeff_K * detJ_w ;
|
||||
const scalar_type Q_detJ_w = coeff_Q * detJ_w ;
|
||||
|
||||
contributeDiffusionMatrix( k_detJ_w , shape_eval.gradient[i] , J , elem_mat );
|
||||
|
||||
contributeSourceVector( Q_detJ_w , shape_eval.value[i] , elem_vec );
|
||||
}
|
||||
|
||||
for( size_type i=0; i< ElementNodeCount ; ++i) {
|
||||
element_vectors(ielem, i) = elem_vec[i] ;
|
||||
}
|
||||
|
||||
for( size_type i = 0; i < ElementNodeCount ; i++){
|
||||
for( size_type j = 0; j < ElementNodeCount ; j++){
|
||||
element_matrices(ielem, i, j) = elem_mat[i][j] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}; /* ElementComputation */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarType , typename ScalarCoordType , class DeviceType >
|
||||
struct DirichletBoundary
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const size_type ElementNodeCount = 8 ;
|
||||
|
||||
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
|
||||
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
matrix_type matrix ;
|
||||
vector_type rhs ;
|
||||
ScalarCoordType bc_lower_z ;
|
||||
ScalarCoordType bc_upper_z ;
|
||||
ScalarType bc_lower_value ;
|
||||
ScalarType bc_upper_value ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type inode ) const
|
||||
{
|
||||
// Apply a dirichlet boundary condition to 'irow'
|
||||
// to maintain the symmetry of the original
|
||||
// global stiffness matrix, zero out the columns
|
||||
// that correspond to boundary conditions, and
|
||||
// adjust the load vector accordingly
|
||||
|
||||
const size_type iBeg = matrix.graph.row_map[inode];
|
||||
const size_type iEnd = matrix.graph.row_map[inode+1];
|
||||
|
||||
const ScalarCoordType z = node_coords(inode,2);
|
||||
const bool bc_lower = z <= bc_lower_z ;
|
||||
const bool bc_upper = bc_upper_z <= z ;
|
||||
|
||||
if ( bc_lower || bc_upper ) {
|
||||
const ScalarType bc_value = bc_lower ? bc_lower_value
|
||||
: bc_upper_value ;
|
||||
|
||||
rhs(inode) = bc_value ; // set the rhs vector
|
||||
|
||||
// zero each value on the row, and leave a one
|
||||
// on the diagonal
|
||||
|
||||
for( size_type i = iBeg ; i < iEnd ; i++) {
|
||||
matrix.coefficients(i) =
|
||||
(int) inode == matrix.graph.entries(i) ? 1 : 0 ;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Find any columns that are boundary conditions.
|
||||
// Clear them and adjust the load vector
|
||||
|
||||
for( size_type i = iBeg ; i < iEnd ; i++ ) {
|
||||
const size_type cnode = matrix.graph.entries(i) ;
|
||||
|
||||
const ScalarCoordType zc = node_coords(cnode,2);
|
||||
const bool c_bc_lower = zc <= bc_lower_z ;
|
||||
const bool c_bc_upper = bc_upper_z <= zc ;
|
||||
|
||||
if ( c_bc_lower || c_bc_upper ) {
|
||||
|
||||
const ScalarType c_bc_value = c_bc_lower ? bc_lower_value
|
||||
: bc_upper_value ;
|
||||
|
||||
rhs( inode ) -= c_bc_value * matrix.coefficients(i);
|
||||
|
||||
matrix.coefficients(i) = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void apply( const matrix_type & linsys_matrix ,
|
||||
const vector_type & linsys_rhs ,
|
||||
const mesh_type & mesh ,
|
||||
const ScalarCoordType bc_lower_z ,
|
||||
const ScalarCoordType bc_upper_z ,
|
||||
const ScalarType bc_lower_value ,
|
||||
const ScalarType bc_upper_value )
|
||||
{
|
||||
const size_t row_count = linsys_matrix.graph.row_map.dimension_0() - 1 ;
|
||||
DirichletBoundary op ;
|
||||
op.node_coords = mesh.node_coords ;
|
||||
op.matrix = linsys_matrix ;
|
||||
op.rhs = linsys_rhs ;
|
||||
op.bc_lower_z = bc_lower_z ;
|
||||
op.bc_upper_z = bc_upper_z ;
|
||||
op.bc_lower_value = bc_lower_value ;
|
||||
op.bc_upper_value = bc_upper_value ;
|
||||
parallel_for( row_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Implicit */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
@ -1,567 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef USESCASES_LINALG_BLAS_HPP
|
||||
#define USESCASES_LINALG_BLAS_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include <ParallelComm.hpp>
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class Scalar , class Layout , class DeviceType > struct Dot ;
|
||||
|
||||
template< class Scalar , class Layout , class DeviceType > struct Dot1 ;
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarY ,
|
||||
class Layout , class Device >
|
||||
struct Scale ;
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarY ,
|
||||
class Layout , class Device >
|
||||
struct Fill ;
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarY ,
|
||||
class Layout , class Device >
|
||||
struct AXPY ;
|
||||
|
||||
template< typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
class Layout , class Device >
|
||||
struct XPBY ;
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
typename ScalarW ,
|
||||
class Layout , class Device >
|
||||
struct WAXPBY ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
typename ScalarY /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ ,
|
||||
class MY /* Allow any management type */ >
|
||||
inline
|
||||
double dot( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
const View< ScalarY * , L , D , MY > & y ,
|
||||
comm::Machine machine )
|
||||
{
|
||||
double global_result = 0 ;
|
||||
double local_result = 0 ;
|
||||
|
||||
Impl::Dot< ScalarX , L , D >( n , x , y , local_result );
|
||||
|
||||
MPI_Allreduce( & local_result , & global_result , 1 ,
|
||||
MPI_DOUBLE , MPI_SUM , machine.mpi_comm );
|
||||
|
||||
return global_result ;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
typename ScalarY /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ ,
|
||||
class MY /* Allow any management type */ >
|
||||
inline
|
||||
double dot( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
const View< ScalarY * , L , D , MY > & y ,
|
||||
comm::Machine )
|
||||
{
|
||||
double global_result = 0 ;
|
||||
|
||||
Impl::Dot< ScalarX , L , D >( n , x , y , global_result );
|
||||
|
||||
return global_result ;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ >
|
||||
inline
|
||||
double dot( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
comm::Machine machine )
|
||||
{
|
||||
double global_result = 0 ;
|
||||
double local_result = 0 ;
|
||||
|
||||
Impl::Dot1< ScalarX , L , D >( n , x , local_result );
|
||||
|
||||
MPI_Allreduce( & local_result , & global_result , 1 ,
|
||||
MPI_DOUBLE , MPI_SUM , machine.mpi_comm );
|
||||
|
||||
return global_result ;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ >
|
||||
inline
|
||||
double dot( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
comm::Machine )
|
||||
{
|
||||
double global_result = 0 ;
|
||||
|
||||
Impl::Dot1< ScalarX , L , D >( n , x , global_result );
|
||||
|
||||
return global_result ;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ >
|
||||
inline
|
||||
double norm2( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
comm::Machine machine )
|
||||
{
|
||||
return std::sqrt( dot( n , x , machine ) );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
class L ,
|
||||
class D ,
|
||||
class MX >
|
||||
void scale( const size_t n ,
|
||||
const ScalarA & alpha ,
|
||||
const View< ScalarX * , L , D , MX > & x )
|
||||
{
|
||||
Impl::Scale< ScalarA , ScalarX , L , D >( n , alpha , x );
|
||||
}
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
class L ,
|
||||
class D ,
|
||||
class MX >
|
||||
void fill( const size_t n ,
|
||||
const ScalarA & alpha ,
|
||||
const View< ScalarX * , L , D , MX > & x )
|
||||
{
|
||||
Impl::Fill< ScalarA , ScalarX , L , D >( n , alpha , x );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarY ,
|
||||
class L ,
|
||||
class D ,
|
||||
class MX ,
|
||||
class MY >
|
||||
void axpy( const size_t n ,
|
||||
const ScalarA & alpha ,
|
||||
const View< ScalarX *, L , D , MX > & x ,
|
||||
const View< ScalarY *, L , D , MY > & y )
|
||||
{
|
||||
Impl::AXPY< ScalarA, ScalarX, ScalarY , L , D >( n, alpha, x, y );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
class L ,
|
||||
class D ,
|
||||
class MX ,
|
||||
class MY >
|
||||
void xpby( const size_t n ,
|
||||
const View< ScalarX *, L , D , MX > & x ,
|
||||
const ScalarB & beta ,
|
||||
const View< ScalarY *, L , D , MY > & y )
|
||||
{
|
||||
Impl::XPBY< ScalarX, ScalarB, ScalarY , L , D >( n, x, beta, y );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// w = alpha * x + beta * y
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
typename ScalarW ,
|
||||
class L , class D ,
|
||||
class MX , class MY , class MW >
|
||||
void waxpby( const size_t n ,
|
||||
const ScalarA & alpha ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
const ScalarB & beta ,
|
||||
const View< ScalarY * , L , D , MY > & y ,
|
||||
const View< ScalarW * , L , D , MW > & w )
|
||||
{
|
||||
Impl::WAXPBY<ScalarA,ScalarX,ScalarB,ScalarY,ScalarW,L,D>
|
||||
( n , alpha , x , beta , y , w );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename Scalar , class L , class D >
|
||||
struct Dot
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< const Scalar*, L, D, MemoryUnmanaged > vector_const_type ;
|
||||
|
||||
const vector_const_type x ;
|
||||
const vector_const_type y ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename vector_const_type::execution_space execution_space ; // Manycore device
|
||||
typedef double value_type ; // Reduction value
|
||||
|
||||
template< class ArgX , class ArgY >
|
||||
inline
|
||||
Dot( const size_t n , const ArgX & arg_x , const ArgY & arg_y , double & result )
|
||||
: x( arg_x ), y( arg_y )
|
||||
{
|
||||
parallel_reduce( n , *this , result );
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i , value_type & update ) const
|
||||
{ update += x(i) * y(i); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & update ,
|
||||
const volatile value_type & source )
|
||||
{ update += source; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & update )
|
||||
{ update = 0 ; }
|
||||
}; // Dot
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class L , class D >
|
||||
struct Dot1
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< const Scalar*, L, D , MemoryUnmanaged > vector_const_type ;
|
||||
|
||||
const vector_const_type x ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename vector_const_type::execution_space execution_space ; // Manycore device
|
||||
typedef double value_type ; // Reduction value
|
||||
|
||||
template< class ArgX >
|
||||
inline
|
||||
Dot1( const size_t n , const ArgX & arg_x , double & result )
|
||||
: x( arg_x )
|
||||
{
|
||||
parallel_reduce( n , *this , result );
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i , value_type & update ) const
|
||||
{ update += x(i) * x(i) ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & update ,
|
||||
const volatile value_type & source )
|
||||
{ update += source ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & update )
|
||||
{ update = 0 ; }
|
||||
}; // Dot
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct WAXPBY
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
|
||||
typedef View< const ScalarY *, L , D , MemoryUnmanaged > ViewY ;
|
||||
|
||||
const ViewW w ;
|
||||
const ViewX x ;
|
||||
const ViewY y ;
|
||||
const ScalarA alpha ;
|
||||
const ScalarB beta ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType inode ) const
|
||||
{
|
||||
w(inode) = alpha * x(inode) + beta * y(inode);
|
||||
}
|
||||
|
||||
template< class ArgX , class ArgY , class ArgW >
|
||||
inline
|
||||
WAXPBY( const size_t n ,
|
||||
const ScalarA & arg_alpha ,
|
||||
const ArgX & arg_x ,
|
||||
const ScalarB & arg_beta ,
|
||||
const ArgY & arg_y ,
|
||||
const ArgW & arg_w )
|
||||
: w( arg_w ), x( arg_x ), y( arg_y )
|
||||
, alpha( arg_alpha ), beta( arg_beta )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
}; // WAXPBY
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename ScalarB ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct Scale
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
const ViewW w ;
|
||||
const ScalarB beta ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i ) const
|
||||
{ w(i) *= beta ; }
|
||||
|
||||
template< class ArgW >
|
||||
inline
|
||||
Scale( const size_t n , const ScalarB & arg_beta , const ArgW & arg_w )
|
||||
: w( arg_w )
|
||||
, beta( arg_beta )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
};
|
||||
|
||||
template < typename ScalarB ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct Fill
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
const ViewW w ;
|
||||
const ScalarB beta ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i ) const
|
||||
{ w(i) = beta ; }
|
||||
|
||||
template< class ArgW >
|
||||
inline
|
||||
Fill( const size_t n , const ScalarB & arg_beta , const ArgW & arg_w )
|
||||
: w( arg_w )
|
||||
, beta( arg_beta )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct AXPY
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
|
||||
|
||||
const ViewW w ;
|
||||
const ViewX x ;
|
||||
const ScalarA alpha ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i ) const
|
||||
{ w(i) += alpha * x(i); }
|
||||
|
||||
template< class ArgX , class ArgW >
|
||||
inline
|
||||
AXPY( const size_t n ,
|
||||
const ScalarA & arg_alpha ,
|
||||
const ArgX & arg_x ,
|
||||
const ArgW & arg_w )
|
||||
: w( arg_w ), x( arg_x )
|
||||
, alpha( arg_alpha )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
}; // AXPY
|
||||
|
||||
template< typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct XPBY
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
|
||||
|
||||
const ViewW w ;
|
||||
const ViewX x ;
|
||||
const ScalarB beta ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i ) const
|
||||
{ w(i) = x(i) + beta * w(i); }
|
||||
|
||||
template< class ArgX , class ArgW >
|
||||
inline
|
||||
XPBY( const size_t n ,
|
||||
const ArgX & arg_x ,
|
||||
const ScalarB & arg_beta ,
|
||||
const ArgW & arg_w )
|
||||
: w( arg_w ), x( arg_x )
|
||||
, beta( arg_beta )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
}; // XPBY
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef USESCASES_LINALG_BLAS_HPP */
|
||||
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
KOKKOS_PATH ?= ../..
|
||||
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
SRC_DIR := $(dir $(MAKEFILE_PATH))
|
||||
|
||||
SRC = $(wildcard $(SRC_DIR)/*.cpp)
|
||||
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
|
||||
#SRC = $(wildcard *.cpp)
|
||||
#OBJ = $(SRC:%.cpp=%.o)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
# use installed Makefile.kokkos
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS = -I$(SRC_DIR) -I$(CUDA_PATH) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS = -L$(CUDA_PATH)/lib64 -lcusparse
|
||||
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
#KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "OpenMP"
|
||||
#KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
LIB =
|
||||
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean:
|
||||
rm -f *.a *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,573 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef HYBRIDFEM_NONLINEAR_HPP
|
||||
#define HYBRIDFEM_NONLINEAR_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <SparseLinearSystem.hpp>
|
||||
#include <SparseLinearSystemFill.hpp>
|
||||
#include <NonlinearFunctors.hpp>
|
||||
|
||||
#include <FEMesh.hpp>
|
||||
#include <HexElement.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Nonlinear {
|
||||
|
||||
struct PerformanceData {
|
||||
double mesh_time ;
|
||||
double graph_time ;
|
||||
double elem_time ;
|
||||
double matrix_gather_fill_time ;
|
||||
double matrix_boundary_condition_time ;
|
||||
double cg_iteration_time ;
|
||||
size_t cg_iteration_count ;
|
||||
size_t newton_iteration_count ;
|
||||
double error_max ;
|
||||
|
||||
PerformanceData()
|
||||
: mesh_time(0)
|
||||
, graph_time(0)
|
||||
, elem_time(0)
|
||||
, matrix_gather_fill_time(0)
|
||||
, matrix_boundary_condition_time(0)
|
||||
, cg_iteration_time(0)
|
||||
, cg_iteration_count(0)
|
||||
, newton_iteration_count(0)
|
||||
, error_max(0)
|
||||
{}
|
||||
|
||||
void best( const PerformanceData & rhs )
|
||||
{
|
||||
mesh_time = std::min( mesh_time , rhs.mesh_time );
|
||||
graph_time = std::min( graph_time , rhs.graph_time );
|
||||
elem_time = std::min( elem_time , rhs.elem_time );
|
||||
matrix_gather_fill_time = std::min( matrix_gather_fill_time , rhs.matrix_gather_fill_time );
|
||||
matrix_boundary_condition_time = std::min( matrix_boundary_condition_time , rhs.matrix_boundary_condition_time );
|
||||
cg_iteration_time = std::min( cg_iteration_time , rhs.cg_iteration_time );
|
||||
cg_iteration_count = std::min( cg_iteration_count , rhs.cg_iteration_count );
|
||||
newton_iteration_count = std::min( newton_iteration_count , rhs.newton_iteration_count );
|
||||
error_max = std::min( error_max , rhs.error_max );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
class ManufacturedSolution {
|
||||
public:
|
||||
|
||||
// Manufactured solution for one dimensional nonlinear PDE
|
||||
//
|
||||
// -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax
|
||||
//
|
||||
// Has an analytic solution of the form:
|
||||
//
|
||||
// T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 )
|
||||
//
|
||||
// Given T_0 and T_L compute K for this analytic solution.
|
||||
//
|
||||
// Two analytic solutions:
|
||||
//
|
||||
// Solution with singularity:
|
||||
// , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
// , b( -1.0 / sqrt(T_zmin) )
|
||||
//
|
||||
// Solution without singularity:
|
||||
// , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
// , b( 1.0 / sqrt(T_zmin) )
|
||||
|
||||
const double zmin ;
|
||||
const double zmax ;
|
||||
const double T_zmin ;
|
||||
const double T_zmax ;
|
||||
const double a ;
|
||||
const double b ;
|
||||
const double K ;
|
||||
|
||||
ManufacturedSolution( const double arg_zmin ,
|
||||
const double arg_zmax ,
|
||||
const double arg_T_zmin ,
|
||||
const double arg_T_zmax )
|
||||
: zmin( arg_zmin )
|
||||
, zmax( arg_zmax )
|
||||
, T_zmin( arg_T_zmin )
|
||||
, T_zmax( arg_T_zmax )
|
||||
, a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
, b( 1.0 / sqrt(T_zmin) )
|
||||
, K( 1.0 / ( 6.0 * a * a ) )
|
||||
{}
|
||||
|
||||
double operator()( const double z ) const
|
||||
{
|
||||
const double tmp = a * ( z - zmin ) + b ;
|
||||
return 1.0 / ( tmp * tmp );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class FixtureType >
|
||||
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
|
||||
const int , // global_max_x ,
|
||||
const int , // global_max_y ,
|
||||
const int global_max_z ,
|
||||
const bool print_error )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef FixtureType fixture_type ;
|
||||
typedef typename fixture_type::execution_space execution_space;
|
||||
//typedef typename execution_space::size_type size_type ; // unused
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
typedef typename fixture_type::coordinate_scalar_type coordinate_scalar_type ;
|
||||
|
||||
enum { ElementNodeCount = fixture_type::element_node_count };
|
||||
|
||||
const comm::Machine machine = mesh.parallel_data_map.machine ;
|
||||
|
||||
const size_t element_count = mesh.elem_node_ids.dimension_0();
|
||||
|
||||
//------------------------------------
|
||||
// The amount of nonlinearity is proportional to the ratio
|
||||
// between T(zmax) and T(zmin). For the manufactured solution
|
||||
// 0 < T(zmin) and 0 < T(zmax)
|
||||
|
||||
const ManufacturedSolution
|
||||
exact_solution( /* zmin */ 0 ,
|
||||
/* zmax */ global_max_z ,
|
||||
/* T(zmin) */ 1 ,
|
||||
/* T(zmax) */ 20 );
|
||||
|
||||
//-----------------------------------
|
||||
// Convergence Criteria and perf data:
|
||||
|
||||
const size_t cg_iteration_limit = 200 ;
|
||||
const double cg_tolerance = 1e-14 ;
|
||||
|
||||
const size_t newton_iteration_limit = 150 ;
|
||||
const double newton_tolerance = 1e-14 ;
|
||||
|
||||
size_t cg_iteration_count_total = 0 ;
|
||||
double cg_iteration_time = 0 ;
|
||||
|
||||
size_t newton_iteration_count = 0 ;
|
||||
double residual_norm_init = 0 ;
|
||||
double residual_norm = 0 ;
|
||||
|
||||
PerformanceData perf_data ;
|
||||
|
||||
//------------------------------------
|
||||
// Sparse linear system types:
|
||||
|
||||
typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
|
||||
typedef Kokkos::CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
typedef typename matrix_type::graph_type matrix_graph_type ;
|
||||
typedef typename matrix_type::coefficients_type matrix_coefficients_type ;
|
||||
|
||||
typedef GraphFactory< matrix_graph_type , mesh_type > graph_factory ;
|
||||
|
||||
//------------------------------------
|
||||
// Problem setup types:
|
||||
|
||||
typedef ElementComputation < mesh_type , scalar_type > ElementFunctor ;
|
||||
typedef DirichletSolution < mesh_type , scalar_type > DirichletSolutionFunctor ;
|
||||
typedef DirichletResidual < mesh_type , scalar_type > DirichletResidualFunctor ;
|
||||
|
||||
typedef typename ElementFunctor::elem_matrices_type elem_matrices_type ;
|
||||
typedef typename ElementFunctor::elem_vectors_type elem_vectors_type ;
|
||||
|
||||
typedef GatherFill< matrix_type ,
|
||||
mesh_type ,
|
||||
elem_matrices_type ,
|
||||
elem_vectors_type > GatherFillFunctor ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
matrix_type jacobian ;
|
||||
vector_type residual ;
|
||||
vector_type delta ;
|
||||
vector_type nodal_solution ;
|
||||
|
||||
typename graph_factory::element_map_type element_map ;
|
||||
|
||||
//------------------------------------
|
||||
// Generate mesh and corresponding sparse matrix graph
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
//------------------------------------
|
||||
// Generate sparse matrix graph and element->graph map.
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
graph_factory::create( mesh , jacobian.graph , element_map );
|
||||
|
||||
execution_space::fence();
|
||||
|
||||
perf_data.graph_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Allocate linear system coefficients and rhs:
|
||||
|
||||
const size_t local_owned_length = jacobian.graph.row_map.dimension_0() - 1 ;
|
||||
const size_t local_total_length = mesh.node_coords.dimension_0();
|
||||
|
||||
jacobian.coefficients =
|
||||
matrix_coefficients_type( "jacobian_coeff" , jacobian.graph.entries.dimension_0() );
|
||||
|
||||
// Nonlinear residual for owned nodes:
|
||||
residual = vector_type( "residual" , local_owned_length );
|
||||
|
||||
// Nonlinear solution for owned and ghosted nodes:
|
||||
nodal_solution = vector_type( "solution" , local_total_length );
|
||||
|
||||
// Nonlinear solution update for owned nodes:
|
||||
delta = vector_type( "delta" , local_owned_length );
|
||||
|
||||
//------------------------------------
|
||||
// Allocation of arrays to fill the linear system
|
||||
|
||||
elem_matrices_type elem_matrices ; // Jacobian matrices
|
||||
elem_vectors_type elem_vectors ; // Residual vectors
|
||||
|
||||
if ( element_count ) {
|
||||
elem_matrices = elem_matrices_type( std::string("elem_matrices"), element_count );
|
||||
elem_vectors = elem_vectors_type( std::string("elem_vectors"), element_count );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// For boundary condition set the correct values in the solution vector
|
||||
// The 'zmin' face is assigned to 'T_zmin'.
|
||||
// The 'zmax' face is assigned to 'T_zmax'.
|
||||
// The resulting solution is one dimensional along the 'Z' axis.
|
||||
|
||||
DirichletSolutionFunctor::apply( nodal_solution , mesh ,
|
||||
exact_solution.zmin ,
|
||||
exact_solution.zmax ,
|
||||
exact_solution.T_zmin ,
|
||||
exact_solution.T_zmax );
|
||||
|
||||
for(;;) { // Nonlinear loop
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
{ //------------------------------------
|
||||
// Import off-processor nodal solution values
|
||||
// for residual and jacobian computations
|
||||
|
||||
Kokkos::AsyncExchange< typename vector_type::value_type , execution_space ,
|
||||
Kokkos::ParallelDataMap >
|
||||
exchange( mesh.parallel_data_map , 1 );
|
||||
|
||||
Kokkos::PackArray< vector_type >
|
||||
::pack( exchange.buffer() ,
|
||||
mesh.parallel_data_map.count_interior ,
|
||||
mesh.parallel_data_map.count_send ,
|
||||
nodal_solution );
|
||||
|
||||
exchange.setup();
|
||||
|
||||
exchange.send_receive();
|
||||
|
||||
Kokkos::UnpackArray< vector_type >
|
||||
::unpack( nodal_solution , exchange.buffer() ,
|
||||
mesh.parallel_data_map.count_owned ,
|
||||
mesh.parallel_data_map.count_receive );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------
|
||||
// Compute element matrices and vectors:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
ElementFunctor( mesh ,
|
||||
elem_matrices ,
|
||||
elem_vectors ,
|
||||
nodal_solution ,
|
||||
exact_solution.K );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.elem_time += comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Fill linear system coefficients:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
fill( jacobian.coefficients.dimension_0(), 0 , jacobian.coefficients );
|
||||
fill( residual.dimension_0() , 0 , residual );
|
||||
|
||||
GatherFillFunctor::apply( jacobian ,
|
||||
residual ,
|
||||
mesh ,
|
||||
element_map ,
|
||||
elem_matrices ,
|
||||
elem_vectors );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.matrix_gather_fill_time += comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
// Apply boundary conditions:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
// Updates jacobian matrix to 1 on the diagonal, zero elsewhere,
|
||||
// and 0 in the residual due to the solution vector having the correct value
|
||||
DirichletResidualFunctor::apply( jacobian, residual, mesh ,
|
||||
exact_solution.zmin ,
|
||||
exact_solution.zmax );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.matrix_boundary_condition_time +=
|
||||
comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Has the residual converged?
|
||||
|
||||
residual_norm = norm2( mesh.parallel_data_map.count_owned,
|
||||
residual,
|
||||
mesh.parallel_data_map.machine );
|
||||
|
||||
if ( 0 == newton_iteration_count ) {
|
||||
residual_norm_init = residual_norm ;
|
||||
}
|
||||
|
||||
if ( residual_norm / residual_norm_init < newton_tolerance ) {
|
||||
break ;
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Solve linear sytem
|
||||
|
||||
size_t cg_iteration_count = 0 ;
|
||||
double cg_residual_norm = 0 ;
|
||||
|
||||
cgsolve( mesh.parallel_data_map ,
|
||||
jacobian , residual , delta ,
|
||||
cg_iteration_count ,
|
||||
cg_residual_norm ,
|
||||
cg_iteration_time ,
|
||||
cg_iteration_limit , cg_tolerance ) ;
|
||||
|
||||
perf_data.cg_iteration_time += cg_iteration_time ;
|
||||
cg_iteration_count_total += cg_iteration_count ;
|
||||
|
||||
// Update non-linear solution with delta...
|
||||
// delta is : - Dx = [Jacobian]^1 * Residual which is the negative update
|
||||
// LaTeX:
|
||||
// \vec {x}_{n+1} = \vec {x}_{n} - ( - \Delta \vec{x}_{n} )
|
||||
// text:
|
||||
// x[n+1] = x[n] + Dx
|
||||
|
||||
axpy( mesh.parallel_data_map.count_owned ,
|
||||
-1.0, delta, nodal_solution);
|
||||
|
||||
++newton_iteration_count ;
|
||||
|
||||
if ( newton_iteration_limit < newton_iteration_count ) {
|
||||
break ;
|
||||
}
|
||||
};
|
||||
|
||||
if ( newton_iteration_count ) {
|
||||
perf_data.elem_time /= newton_iteration_count ;
|
||||
perf_data.matrix_gather_fill_time /= newton_iteration_count ;
|
||||
perf_data.matrix_boundary_condition_time /= newton_iteration_count ;
|
||||
}
|
||||
|
||||
if ( cg_iteration_count_total ) {
|
||||
perf_data.cg_iteration_time /= cg_iteration_count_total ;
|
||||
}
|
||||
|
||||
perf_data.newton_iteration_count = newton_iteration_count ;
|
||||
perf_data.cg_iteration_count = cg_iteration_count_total ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
{
|
||||
// For extracting the nodal solution and its coordinates:
|
||||
|
||||
typename mesh_type::node_coords_type::HostMirror node_coords_host =
|
||||
Kokkos::create_mirror( mesh.node_coords );
|
||||
|
||||
typename vector_type::HostMirror nodal_solution_host =
|
||||
Kokkos::create_mirror( nodal_solution );
|
||||
|
||||
Kokkos::deep_copy( node_coords_host , mesh.node_coords );
|
||||
Kokkos::deep_copy( nodal_solution_host , nodal_solution );
|
||||
|
||||
double tmp = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < mesh.parallel_data_map.count_owned ; ++i ) {
|
||||
const coordinate_scalar_type x = node_coords_host(i,0);
|
||||
const coordinate_scalar_type y = node_coords_host(i,1);
|
||||
const coordinate_scalar_type z = node_coords_host(i,2);
|
||||
|
||||
const double Tx = exact_solution(z);
|
||||
const double Ts = nodal_solution_host(i);
|
||||
const double Te = std::abs( Tx - Ts ) / std::abs( Tx );
|
||||
|
||||
tmp = std::max( tmp , Te );
|
||||
|
||||
if ( print_error && 0.02 < Te ) {
|
||||
std::cout << " node( " << x << " " << y << " " << z << " ) = "
|
||||
<< Ts << " != exact_solution " << Tx
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
perf_data.error_max = comm::max( machine , tmp );
|
||||
}
|
||||
|
||||
return perf_data ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class Device , class FixtureElement >
|
||||
void driver( const char * const label ,
|
||||
comm::Machine machine ,
|
||||
const int gang_count ,
|
||||
const int elem_count_beg ,
|
||||
const int elem_count_end ,
|
||||
const int runs )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef Device execution_space ;
|
||||
typedef double coordinate_scalar_type ;
|
||||
typedef FixtureElement fixture_element_type ;
|
||||
|
||||
typedef BoxMeshFixture< coordinate_scalar_type ,
|
||||
execution_space ,
|
||||
fixture_element_type > fixture_type ;
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
|
||||
const size_t proc_count = comm::size( machine );
|
||||
const size_t proc_rank = comm::rank( machine );
|
||||
|
||||
if ( elem_count_beg == 0 || elem_count_end == 0 || runs == 0 ) return ;
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
std::cout << std::endl ;
|
||||
std::cout << "\"Kokkos::HybridFE::Nonlinear " << label << "\"" << std::endl;
|
||||
std::cout
|
||||
<< "\"Size\" , \"Size\" , \"Graphing\" , \"Element\" , \"Fill\" , \"Boundary\" , \"CG-Iter\" , \"CG-Iter\" , \"Newton-Iter\" , \"Max-node-error\""
|
||||
<< std::endl
|
||||
<< "\"elems\" , \"nodes\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"total-count\" , \"total-count\" , \"ratio\""
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
const bool print_sample = 0 ;
|
||||
const double x_curve = 1.0 ;
|
||||
const double y_curve = 1.0 ;
|
||||
const double z_curve = 0.8 ;
|
||||
|
||||
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
|
||||
{
|
||||
const int ix = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
|
||||
const int iy = 1 + ix ;
|
||||
const int iz = 2 * iy ;
|
||||
const int global_elem_count = ix * iy * iz ;
|
||||
const int global_node_count = ( 2 * ix + 1 ) *
|
||||
( 2 * iy + 1 ) *
|
||||
( 2 * iz + 1 );
|
||||
|
||||
mesh_type mesh =
|
||||
fixture_type::create( proc_count , proc_rank , gang_count ,
|
||||
ix , iy , iz ,
|
||||
x_curve , y_curve , z_curve );
|
||||
|
||||
mesh.parallel_data_map.machine = machine ;
|
||||
|
||||
|
||||
PerformanceData perf_data , perf_best ;
|
||||
|
||||
for(int j = 0; j < runs; j++){
|
||||
|
||||
perf_data = run<scalar_type,fixture_type>(mesh,ix,iy,iz, print_sample );
|
||||
|
||||
if( j == 0 ) {
|
||||
perf_best = perf_data ;
|
||||
}
|
||||
else {
|
||||
perf_best.best( perf_data );
|
||||
}
|
||||
}
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
|
||||
std::cout << std::setw(8) << global_elem_count << " , "
|
||||
<< std::setw(8) << global_node_count << " , "
|
||||
<< std::setw(10) << perf_best.graph_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.elem_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.matrix_gather_fill_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.matrix_boundary_condition_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.cg_iteration_time * 1000 << " , "
|
||||
<< std::setw(7) << perf_best.cg_iteration_count << " , "
|
||||
<< std::setw(3) << perf_best.newton_iteration_count << " , "
|
||||
<< std::setw(10) << perf_best.error_max
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Nonlinear */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
|
||||
#endif /* #ifndef HYBRIDFEM_IMPLICIT_HPP */
|
||||
|
||||
@ -1,390 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <HexElement.hpp>
|
||||
#include <FEMesh.hpp>
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Nonlinear {
|
||||
|
||||
template< class MeshType , typename ScalarType > struct ElementComputation ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
struct ElementComputation< FEMesh< double , 27 , Kokkos::Cuda > , double >
|
||||
{
|
||||
typedef Kokkos::Cuda execution_space ;
|
||||
|
||||
static const unsigned ElementNodeCount = 27 ;
|
||||
|
||||
typedef HexElement_Data< ElementNodeCount > element_data_type ;
|
||||
typedef FEMesh< double , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
static const unsigned SpatialDim = element_data_type::spatial_dimension ;
|
||||
static const unsigned FunctionCount = element_data_type::function_count ;
|
||||
static const unsigned IntegrationCount = element_data_type::integration_count ;
|
||||
static const unsigned TensorDim = SpatialDim * SpatialDim ;
|
||||
|
||||
typedef Kokkos::View< double[][FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
|
||||
typedef Kokkos::View< double[][FunctionCount] , execution_space > elem_vectors_type ;
|
||||
typedef Kokkos::View< double[] , execution_space > value_vector_type ;
|
||||
|
||||
private:
|
||||
|
||||
const element_data_type elem_data ;
|
||||
const typename mesh_type::elem_node_ids_type elem_node_ids ;
|
||||
const typename mesh_type::node_coords_type node_coords ;
|
||||
const value_vector_type nodal_values ;
|
||||
const elem_matrices_type element_matrices ;
|
||||
const elem_vectors_type element_vectors ;
|
||||
const float coeff_K ;
|
||||
const unsigned elem_count ;
|
||||
unsigned invJacIndex[9][4] ;
|
||||
|
||||
static const unsigned j11 = 0 , j12 = 1 , j13 = 2 ,
|
||||
j21 = 3 , j22 = 4 , j23 = 5 ,
|
||||
j31 = 6 , j32 = 7 , j33 = 8 ;
|
||||
|
||||
// Can only handle up to 16 warps:
|
||||
static const unsigned BlockDimX = 32 ;
|
||||
static const unsigned BlockDimY = 7 ;
|
||||
|
||||
struct WorkSpace {
|
||||
double sum[ BlockDimY ][ BlockDimX ];
|
||||
|
||||
double value_at_integ[ IntegrationCount ];
|
||||
double gradx_at_integ[ IntegrationCount ];
|
||||
double grady_at_integ[ IntegrationCount ];
|
||||
double gradz_at_integ[ IntegrationCount ];
|
||||
|
||||
float spaceJac[ BlockDimY ][ 9 ];
|
||||
float spaceInvJac[ BlockDimY ][ 9 ];
|
||||
|
||||
float detJweight[ IntegrationCount ];
|
||||
|
||||
float dpsidx[ FunctionCount ][ IntegrationCount ];
|
||||
float dpsidy[ FunctionCount ][ IntegrationCount ];
|
||||
float dpsidz[ FunctionCount ][ IntegrationCount ];
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
ElementComputation ( const mesh_type & arg_mesh ,
|
||||
const elem_matrices_type & arg_element_matrices ,
|
||||
const elem_vectors_type & arg_element_vectors ,
|
||||
const value_vector_type & arg_nodal_values ,
|
||||
const float arg_coeff_K )
|
||||
: elem_data()
|
||||
, elem_node_ids( arg_mesh.elem_node_ids )
|
||||
, node_coords( arg_mesh.node_coords )
|
||||
, nodal_values( arg_nodal_values )
|
||||
, element_matrices( arg_element_matrices )
|
||||
, element_vectors( arg_element_vectors )
|
||||
, coeff_K( arg_coeff_K )
|
||||
, elem_count( arg_mesh.elem_node_ids.dimension_0() )
|
||||
{
|
||||
const unsigned jInvJ[9][4] =
|
||||
{ { j22 , j33 , j23 , j32 } ,
|
||||
{ j13 , j32 , j12 , j33 } ,
|
||||
{ j12 , j23 , j13 , j22 } ,
|
||||
|
||||
{ j23 , j31 , j21 , j33 } ,
|
||||
{ j11 , j33 , j13 , j31 } ,
|
||||
{ j13 , j21 , j11 , j23 } ,
|
||||
|
||||
{ j21 , j32 , j22 , j31 } ,
|
||||
{ j12 , j31 , j11 , j32 } ,
|
||||
{ j11 , j22 , j12 , j21 } };
|
||||
|
||||
for ( unsigned i = 0 ; i < 9 ; ++i ) {
|
||||
for ( unsigned j = 0 ; j < 4 ; ++j ) {
|
||||
invJacIndex[i][j] = jInvJ[i][j] ;
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned shmem = sizeof(WorkSpace);
|
||||
const unsigned grid_max = 65535 ;
|
||||
const unsigned grid_count = std::min( grid_max , elem_count );
|
||||
|
||||
// For compute capability 2.x up to 1024 threads per block
|
||||
const dim3 block( BlockDimX , BlockDimY , 1 );
|
||||
const dim3 grid( grid_count , 1 , 1 );
|
||||
|
||||
Kokkos::Impl::CudaParallelLaunch< ElementComputation >( *this , grid , block , shmem );
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
//------------------------------------
|
||||
// Sum among the threadIdx.x
|
||||
|
||||
template< typename Type >
|
||||
__device__ inline static
|
||||
void sum_x( Type & result , const double value )
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
volatile double * const base_sum =
|
||||
& work_data->sum[ threadIdx.y ][ threadIdx.x ] ;
|
||||
|
||||
base_sum[ 0] = value ;
|
||||
|
||||
if ( threadIdx.x < 16 ) {
|
||||
base_sum[0] += base_sum[16];
|
||||
base_sum[0] += base_sum[ 8];
|
||||
base_sum[0] += base_sum[ 4];
|
||||
base_sum[0] += base_sum[ 2];
|
||||
base_sum[0] += base_sum[ 1];
|
||||
}
|
||||
|
||||
if ( 0 == threadIdx.x ) {
|
||||
result = base_sum[0] ;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void sum_x_clear()
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
work_data->sum[ threadIdx.y ][ threadIdx.x ] = 0 ;
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
//------------------------------------
|
||||
|
||||
__device__ inline
|
||||
void evaluateFunctions( const unsigned ielem ) const
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
// Each warp (threadIdx.y) computes an integration point
|
||||
// Each thread is responsible for a node / function.
|
||||
|
||||
const unsigned iFunc = threadIdx.x ;
|
||||
const bool hasFunc = iFunc < FunctionCount ;
|
||||
|
||||
//------------------------------------
|
||||
// Each warp gathers a different variable into 'elem_mat' shared memory.
|
||||
|
||||
if ( hasFunc ) {
|
||||
|
||||
const unsigned node = elem_node_ids( ielem , iFunc );
|
||||
|
||||
for ( unsigned iy = threadIdx.y ; iy < 4 ; iy += blockDim.y ) {
|
||||
switch( iy ) {
|
||||
case 0 : work_data->sum[0][iFunc] = node_coords(node,0); break ;
|
||||
case 1 : work_data->sum[1][iFunc] = node_coords(node,1); break ;
|
||||
case 2 : work_data->sum[2][iFunc] = node_coords(node,2); break ;
|
||||
case 3 : work_data->sum[3][iFunc] = nodal_values(node); break ;
|
||||
default: break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); // Wait for all warps to finish gathering
|
||||
|
||||
// now get local 'const' copies in register space:
|
||||
|
||||
const double x = work_data->sum[0][ iFunc ];
|
||||
const double y = work_data->sum[1][ iFunc ];
|
||||
const double z = work_data->sum[2][ iFunc ];
|
||||
const double dof_val = work_data->sum[3][ iFunc ];
|
||||
|
||||
__syncthreads(); // Wait for all warps to finish extracting
|
||||
|
||||
sum_x_clear(); // Make sure summation scratch is zero
|
||||
|
||||
//------------------------------------
|
||||
// Each warp is now on its own computing an integration point
|
||||
// so no further explicit synchronizations are required.
|
||||
|
||||
if ( hasFunc ) {
|
||||
|
||||
float * const J = work_data->spaceJac[ threadIdx.y ];
|
||||
float * const invJ = work_data->spaceInvJac[ threadIdx.y ];
|
||||
|
||||
for ( unsigned iInt = threadIdx.y ;
|
||||
iInt < IntegrationCount ; iInt += blockDim.y ) {
|
||||
|
||||
const float val = elem_data.values[iInt][iFunc] ;
|
||||
const float gx = elem_data.gradients[iInt][0][iFunc] ;
|
||||
const float gy = elem_data.gradients[iInt][1][iFunc] ;
|
||||
const float gz = elem_data.gradients[iInt][2][iFunc] ;
|
||||
|
||||
sum_x( J[j11], gx * x );
|
||||
sum_x( J[j12], gx * y );
|
||||
sum_x( J[j13], gx * z );
|
||||
|
||||
sum_x( J[j21], gy * x );
|
||||
sum_x( J[j22], gy * y );
|
||||
sum_x( J[j23], gy * z );
|
||||
|
||||
sum_x( J[j31], gz * x );
|
||||
sum_x( J[j32], gz * y );
|
||||
sum_x( J[j33], gz * z );
|
||||
|
||||
// Inverse jacobian, only enough parallel work for 9 threads in the warp
|
||||
|
||||
if ( iFunc < TensorDim ) {
|
||||
|
||||
invJ[ iFunc ] =
|
||||
J[ invJacIndex[iFunc][0] ] * J[ invJacIndex[iFunc][1] ] -
|
||||
J[ invJacIndex[iFunc][2] ] * J[ invJacIndex[iFunc][3] ] ;
|
||||
|
||||
// Let all threads in the warp compute determinant into a register
|
||||
|
||||
const float detJ = J[j11] * invJ[j11] +
|
||||
J[j21] * invJ[j12] +
|
||||
J[j31] * invJ[j13] ;
|
||||
|
||||
invJ[ iFunc ] /= detJ ;
|
||||
|
||||
if ( 0 == iFunc ) {
|
||||
work_data->detJweight[ iInt ] = detJ * elem_data.weights[ iInt ] ;
|
||||
}
|
||||
}
|
||||
|
||||
// Transform bases gradients and compute value and gradient
|
||||
|
||||
const float dx = gx * invJ[j11] + gy * invJ[j12] + gz * invJ[j13];
|
||||
const float dy = gx * invJ[j21] + gy * invJ[j22] + gz * invJ[j23];
|
||||
const float dz = gx * invJ[j31] + gy * invJ[j32] + gz * invJ[j33];
|
||||
|
||||
work_data->dpsidx[iFunc][iInt] = dx ;
|
||||
work_data->dpsidy[iFunc][iInt] = dy ;
|
||||
work_data->dpsidz[iFunc][iInt] = dz ;
|
||||
|
||||
sum_x( work_data->gradx_at_integ[iInt] , dof_val * dx );
|
||||
sum_x( work_data->grady_at_integ[iInt] , dof_val * dy );
|
||||
sum_x( work_data->gradz_at_integ[iInt] , dof_val * dz );
|
||||
sum_x( work_data->value_at_integ[iInt] , dof_val * val );
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); // All shared data must be populated at return.
|
||||
}
|
||||
|
||||
__device__ inline
|
||||
void contributeResidualJacobian( const unsigned ielem ) const
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
sum_x_clear(); // Make sure summation scratch is zero
|
||||
|
||||
// $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
|
||||
// $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
|
||||
|
||||
const unsigned iInt = threadIdx.x ;
|
||||
|
||||
if ( iInt < IntegrationCount ) {
|
||||
|
||||
const double value_at_integ = work_data->value_at_integ[ iInt ] ;
|
||||
const double gradx_at_integ = work_data->gradx_at_integ[ iInt ] ;
|
||||
const double grady_at_integ = work_data->grady_at_integ[ iInt ] ;
|
||||
const double gradz_at_integ = work_data->gradz_at_integ[ iInt ] ;
|
||||
|
||||
const float detJweight = work_data->detJweight[ iInt ] ;
|
||||
const float coeff_K_detJweight = coeff_K * detJweight ;
|
||||
|
||||
for ( unsigned iRow = threadIdx.y ;
|
||||
iRow < FunctionCount ; iRow += blockDim.y ) {
|
||||
|
||||
const float value_row = elem_data.values[ iInt ][ iRow ] * detJweight ;
|
||||
const float dpsidx_row = work_data->dpsidx[ iRow ][ iInt ] * coeff_K_detJweight ;
|
||||
const float dpsidy_row = work_data->dpsidy[ iRow ][ iInt ] * coeff_K_detJweight ;
|
||||
const float dpsidz_row = work_data->dpsidz[ iRow ][ iInt ] * coeff_K_detJweight ;
|
||||
|
||||
const double res_del = dpsidx_row * gradx_at_integ +
|
||||
dpsidy_row * grady_at_integ +
|
||||
dpsidz_row * gradz_at_integ ;
|
||||
|
||||
const double res_val = value_at_integ * value_at_integ * value_row ;
|
||||
const double jac_val_row = 2 * value_at_integ * value_row ;
|
||||
|
||||
sum_x( element_vectors( ielem , iRow ) , res_del + res_val );
|
||||
|
||||
for ( unsigned iCol = 0 ; iCol < FunctionCount ; ++iCol ) {
|
||||
|
||||
const float jac_del =
|
||||
dpsidx_row * work_data->dpsidx[iCol][iInt] +
|
||||
dpsidy_row * work_data->dpsidy[iCol][iInt] +
|
||||
dpsidz_row * work_data->dpsidz[iCol][iInt] ;
|
||||
|
||||
const double jac_val =
|
||||
jac_val_row * elem_data.values[ iInt ][ iCol ] ;
|
||||
|
||||
sum_x( element_matrices( ielem , iRow , iCol ) , jac_del + jac_val );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); // All warps finish before refilling shared data
|
||||
}
|
||||
|
||||
__device__ inline
|
||||
void operator()(void) const
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
for ( unsigned ielem = blockIdx.x ; ielem < elem_count ; ielem += gridDim.x ) {
|
||||
|
||||
evaluateFunctions( ielem );
|
||||
|
||||
contributeResidualJacobian( ielem );
|
||||
}
|
||||
}
|
||||
|
||||
}; /* ElementComputation */
|
||||
|
||||
} /* namespace Nonlinear */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
@ -1,482 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_NONLINEARFUNCTORS_HPP
|
||||
#define KOKKOS_NONLINEARFUNCTORS_HPP
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Nonlinear {
|
||||
|
||||
template< class MeshType , typename ScalarType > struct ElementComputation ;
|
||||
template< class MeshType , typename ScalarType > struct DirichletSolution ;
|
||||
template< class MeshType , typename ScalarType > struct DirichletResidual ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* A Cuda-specific specialization for the element computation functor. */
|
||||
#if defined( __CUDACC__ )
|
||||
#include <NonlinearElement_Cuda.hpp>
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Nonlinear {
|
||||
|
||||
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
|
||||
typename ScalarType >
|
||||
struct ElementComputation<
|
||||
FEMesh< ScalarCoordType , ElemNode , DeviceType > , ScalarType >
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef ScalarType scalar_type ;
|
||||
|
||||
static const unsigned ElementNodeCount = ElemNode ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
typedef HexElement_Data< ElementNodeCount > element_data_type ;
|
||||
|
||||
static const unsigned SpatialDim = element_data_type::spatial_dimension ;
|
||||
static const unsigned FunctionCount = element_data_type::function_count ;
|
||||
static const unsigned IntegrationCount = element_data_type::integration_count ;
|
||||
static const unsigned TensorDim = SpatialDim * SpatialDim ;
|
||||
|
||||
typedef Kokkos::View< scalar_type[][FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
|
||||
typedef Kokkos::View< scalar_type[][FunctionCount] , execution_space > elem_vectors_type ;
|
||||
typedef Kokkos::View< scalar_type[] , execution_space > value_vector_type ;
|
||||
|
||||
|
||||
private:
|
||||
|
||||
const element_data_type elem_data ;
|
||||
typename mesh_type::elem_node_ids_type elem_node_ids ;
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
value_vector_type nodal_values ;
|
||||
elem_matrices_type element_matrices ;
|
||||
elem_vectors_type element_vectors ;
|
||||
scalar_type coeff_K ;
|
||||
|
||||
public:
|
||||
|
||||
ElementComputation( const mesh_type & arg_mesh ,
|
||||
const elem_matrices_type & arg_element_matrices ,
|
||||
const elem_vectors_type & arg_element_vectors ,
|
||||
const value_vector_type & arg_nodal_values ,
|
||||
const scalar_type arg_coeff_K )
|
||||
: elem_data()
|
||||
, elem_node_ids( arg_mesh.elem_node_ids )
|
||||
, node_coords( arg_mesh.node_coords )
|
||||
, nodal_values( arg_nodal_values )
|
||||
, element_matrices( arg_element_matrices )
|
||||
, element_vectors( arg_element_vectors )
|
||||
, coeff_K( arg_coeff_K )
|
||||
{
|
||||
const size_t elem_count = arg_mesh.elem_node_ids.dimension_0();
|
||||
|
||||
parallel_for( elem_count , *this );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_transform_gradients =
|
||||
/* Jacobian */ FunctionCount * TensorDim * 2 +
|
||||
/* Inverse jacobian */ TensorDim * 6 + 6 +
|
||||
/* Gradient transform */ FunctionCount * 15 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float transform_gradients(
|
||||
const float grad[][ FunctionCount ] , // Gradient of bases master element
|
||||
const double x[] ,
|
||||
const double y[] ,
|
||||
const double z[] ,
|
||||
float dpsidx[] ,
|
||||
float dpsidy[] ,
|
||||
float dpsidz[] ) const
|
||||
{
|
||||
enum { j11 = 0 , j12 = 1 , j13 = 2 ,
|
||||
j21 = 3 , j22 = 4 , j23 = 5 ,
|
||||
j31 = 6 , j32 = 7 , j33 = 8 };
|
||||
|
||||
// Jacobian accumulation:
|
||||
|
||||
double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
for( unsigned i = 0; i < FunctionCount ; ++i ) {
|
||||
const double x1 = x[i] ;
|
||||
const double x2 = y[i] ;
|
||||
const double x3 = z[i] ;
|
||||
|
||||
const float g1 = grad[0][i] ;
|
||||
const float g2 = grad[1][i] ;
|
||||
const float g3 = grad[2][i] ;
|
||||
|
||||
J[j11] += g1 * x1 ;
|
||||
J[j12] += g1 * x2 ;
|
||||
J[j13] += g1 * x3 ;
|
||||
|
||||
J[j21] += g2 * x1 ;
|
||||
J[j22] += g2 * x2 ;
|
||||
J[j23] += g2 * x3 ;
|
||||
|
||||
J[j31] += g3 * x1 ;
|
||||
J[j32] += g3 * x2 ;
|
||||
J[j33] += g3 * x3 ;
|
||||
}
|
||||
|
||||
// Inverse jacobian:
|
||||
|
||||
float invJ[ TensorDim ] = {
|
||||
static_cast<float>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
|
||||
static_cast<float>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
|
||||
static_cast<float>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
|
||||
|
||||
static_cast<float>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
|
||||
static_cast<float>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
|
||||
static_cast<float>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
|
||||
|
||||
static_cast<float>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
|
||||
static_cast<float>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
|
||||
static_cast<float>( J[j11] * J[j22] - J[j12] * J[j21] ) };
|
||||
|
||||
const float detJ = J[j11] * invJ[j11] +
|
||||
J[j21] * invJ[j12] +
|
||||
J[j31] * invJ[j13] ;
|
||||
|
||||
const float detJinv = 1.0 / detJ ;
|
||||
|
||||
for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
|
||||
|
||||
// Transform gradients:
|
||||
|
||||
for( unsigned i = 0; i < FunctionCount ; ++i ) {
|
||||
const float g0 = grad[0][i];
|
||||
const float g1 = grad[1][i];
|
||||
const float g2 = grad[2][i];
|
||||
|
||||
dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
|
||||
dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
|
||||
dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
|
||||
}
|
||||
|
||||
return detJ ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void contributeResidualJacobian(
|
||||
const float coeff_k ,
|
||||
const double dof_values[] ,
|
||||
const float dpsidx[] ,
|
||||
const float dpsidy[] ,
|
||||
const float dpsidz[] ,
|
||||
const float detJ ,
|
||||
const float integ_weight ,
|
||||
const float bases_vals[] ,
|
||||
double elem_res[] ,
|
||||
double elem_mat[][ FunctionCount ] ) const
|
||||
{
|
||||
double value_at_pt = 0 ;
|
||||
double gradx_at_pt = 0 ;
|
||||
double grady_at_pt = 0 ;
|
||||
double gradz_at_pt = 0 ;
|
||||
|
||||
for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
|
||||
value_at_pt += dof_values[m] * bases_vals[m] ;
|
||||
gradx_at_pt += dof_values[m] * dpsidx[m] ;
|
||||
grady_at_pt += dof_values[m] * dpsidy[m] ;
|
||||
gradz_at_pt += dof_values[m] * dpsidz[m] ;
|
||||
}
|
||||
|
||||
const scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
|
||||
const double res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
|
||||
const double mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
|
||||
|
||||
// $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
|
||||
// $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
|
||||
|
||||
for ( unsigned m = 0; m < FunctionCount; m++) {
|
||||
double * const mat = elem_mat[m] ;
|
||||
const float bases_val_m = bases_vals[m];
|
||||
const float dpsidx_m = dpsidx[m] ;
|
||||
const float dpsidy_m = dpsidy[m] ;
|
||||
const float dpsidz_m = dpsidz[m] ;
|
||||
|
||||
elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
|
||||
dpsidy_m * grady_at_pt +
|
||||
dpsidz_m * gradz_at_pt ) +
|
||||
res_val * bases_val_m ;
|
||||
|
||||
for( unsigned n = 0; n < FunctionCount; n++) {
|
||||
|
||||
mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
|
||||
dpsidy_m * dpsidy[n] +
|
||||
dpsidz_m * dpsidz[n] ) +
|
||||
mat_val * bases_val_m * bases_vals[n];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const unsigned ielem ) const
|
||||
{
|
||||
// Gather nodal coordinates and solution vector:
|
||||
|
||||
double x[ FunctionCount ] ;
|
||||
double y[ FunctionCount ] ;
|
||||
double z[ FunctionCount ] ;
|
||||
double val[ FunctionCount ] ;
|
||||
|
||||
for ( unsigned i = 0 ; i < ElementNodeCount ; ++i ) {
|
||||
const unsigned node_index = elem_node_ids( ielem , i );
|
||||
|
||||
x[i] = node_coords( node_index , 0 );
|
||||
y[i] = node_coords( node_index , 1 );
|
||||
z[i] = node_coords( node_index , 2 );
|
||||
|
||||
val[i] = nodal_values( node_index );
|
||||
}
|
||||
|
||||
double elem_vec[ FunctionCount ] ;
|
||||
double elem_mat[ FunctionCount ][ FunctionCount ] ;
|
||||
|
||||
for( unsigned i = 0; i < FunctionCount ; i++ ) {
|
||||
elem_vec[i] = 0 ;
|
||||
for( unsigned j = 0; j < FunctionCount ; j++){
|
||||
elem_mat[i][j] = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
|
||||
float dpsidx[ FunctionCount ] ;
|
||||
float dpsidy[ FunctionCount ] ;
|
||||
float dpsidz[ FunctionCount ] ;
|
||||
|
||||
const float detJ =
|
||||
transform_gradients( elem_data.gradients[i] , x , y , z ,
|
||||
dpsidx , dpsidy , dpsidz );
|
||||
|
||||
contributeResidualJacobian( coeff_K ,
|
||||
val , dpsidx , dpsidy , dpsidz ,
|
||||
detJ ,
|
||||
elem_data.weights[i] ,
|
||||
elem_data.values[i] ,
|
||||
elem_vec , elem_mat );
|
||||
}
|
||||
|
||||
for( unsigned i = 0; i < FunctionCount ; i++){
|
||||
element_vectors(ielem, i) = elem_vec[i] ;
|
||||
for( unsigned j = 0; j < FunctionCount ; j++){
|
||||
element_matrices(ielem, i, j) = elem_mat[i][j] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}; /* ElementComputation */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
|
||||
typename ScalarType >
|
||||
struct DirichletSolution<
|
||||
FEMesh< ScalarCoordType , ElemNode , DeviceType > ,
|
||||
ScalarType >
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
|
||||
static const unsigned ElementNodeCount = ElemNode ;
|
||||
|
||||
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
|
||||
vector_type solution ;
|
||||
ScalarCoordType bc_lower_z ;
|
||||
ScalarCoordType bc_upper_z ;
|
||||
ScalarType bc_lower_value ;
|
||||
ScalarType bc_upper_value ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const unsigned inode ) const
|
||||
{
|
||||
|
||||
// Apply dirichlet boundary condition on the Solution vector.
|
||||
// Define boundary node values to be either bc_lower_value or
|
||||
// bc_upper_value, depending on which boundary face they lie on.
|
||||
// Non-boundary terms will be left at their previous value.
|
||||
|
||||
const ScalarCoordType z = node_coords(inode,2);
|
||||
const bool bc_lower = z <= bc_lower_z ;
|
||||
const bool bc_upper = bc_upper_z <= z ;
|
||||
|
||||
if ( bc_lower || bc_upper ) {
|
||||
const ScalarType bc_value = bc_lower ? bc_lower_value
|
||||
: bc_upper_value ;
|
||||
|
||||
solution(inode) = bc_value ; // set the solution vector
|
||||
}
|
||||
}
|
||||
|
||||
static void apply( const vector_type & solution ,
|
||||
const mesh_type & mesh ,
|
||||
const ScalarCoordType bc_lower_z ,
|
||||
const ScalarCoordType bc_upper_z ,
|
||||
const ScalarType bc_lower_value ,
|
||||
const ScalarType bc_upper_value )
|
||||
{
|
||||
DirichletSolution op ;
|
||||
op.node_coords = mesh.node_coords ;
|
||||
op.solution = solution ;
|
||||
op.bc_lower_z = bc_lower_z ;
|
||||
op.bc_upper_z = bc_upper_z ;
|
||||
op.bc_lower_value = bc_lower_value ;
|
||||
op.bc_upper_value = bc_upper_value ;
|
||||
parallel_for( solution.dimension_0() , op );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
|
||||
typename ScalarType >
|
||||
struct DirichletResidual<
|
||||
FEMesh< ScalarCoordType , ElemNode , DeviceType > , ScalarType >
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const unsigned ElementNodeCount = ElemNode ;
|
||||
|
||||
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
|
||||
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
matrix_type matrix ;
|
||||
vector_type rhs ;
|
||||
ScalarCoordType bc_lower_z ;
|
||||
ScalarCoordType bc_upper_z ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const unsigned inode ) const
|
||||
{
|
||||
// Apply a dirichlet boundary condition to 'irow'
|
||||
// to maintain the symmetry of the original
|
||||
// global stiffness matrix, zero out the columns
|
||||
// that correspond to boundary conditions, and
|
||||
// adjust the load vector accordingly
|
||||
|
||||
const size_type iBeg = matrix.graph.row_map[inode];
|
||||
const size_type iEnd = matrix.graph.row_map[inode+1];
|
||||
|
||||
const ScalarCoordType z = node_coords(inode,2);
|
||||
const bool bc_lower = z <= bc_lower_z ;
|
||||
const bool bc_upper = bc_upper_z <= z ;
|
||||
|
||||
if ( bc_lower || bc_upper ) {
|
||||
rhs(inode) = 0 ; // set the residual vector
|
||||
|
||||
// zero each value on the row, and leave a one
|
||||
// on the diagonal
|
||||
|
||||
for( size_type i = iBeg ; i < iEnd ; i++) {
|
||||
matrix.coefficients(i) =
|
||||
(int) inode == matrix.graph.entries(i) ? 1 : 0 ;
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
// Find any columns that are boundary conditions.
|
||||
// Clear them and adjust the load vector
|
||||
|
||||
for( size_type i = iBeg ; i < iEnd ; i++ ) {
|
||||
const size_type cnode = matrix.graph.entries(i) ;
|
||||
|
||||
const ScalarCoordType zc = node_coords(cnode,2);
|
||||
const bool c_bc_lower = zc <= bc_lower_z ;
|
||||
const bool c_bc_upper = bc_upper_z <= zc ;
|
||||
|
||||
if ( c_bc_lower || c_bc_upper ) {
|
||||
|
||||
matrix.coefficients(i) = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void apply( const matrix_type & linsys_matrix ,
|
||||
const vector_type & linsys_rhs ,
|
||||
const mesh_type & mesh ,
|
||||
const ScalarCoordType bc_lower_z ,
|
||||
const ScalarCoordType bc_upper_z)
|
||||
{
|
||||
const size_t row_count = linsys_matrix.graph.row_map.dimension_0() - 1 ;
|
||||
|
||||
DirichletResidual op ;
|
||||
op.node_coords = mesh.node_coords ;
|
||||
op.matrix = linsys_matrix ;
|
||||
op.rhs = linsys_rhs ;
|
||||
op.bc_lower_z = bc_lower_z ;
|
||||
op.bc_upper_z = bc_upper_z ;
|
||||
parallel_for( row_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Nonlinear */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
#endif /* #ifndef KOKKOS_NONLINEARFUNCTORS_HPP */
|
||||
|
||||
@ -1,167 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef PARALLELCOMM_HPP
|
||||
#define PARALLELCOMM_HPP
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
#include <mpi.h>
|
||||
#include <string>
|
||||
|
||||
namespace comm {
|
||||
|
||||
struct Machine {
|
||||
MPI_Comm mpi_comm ;
|
||||
|
||||
Machine() : mpi_comm( MPI_COMM_NULL ) {}
|
||||
|
||||
Machine( const Machine & rhs )
|
||||
: mpi_comm( rhs.mpi_comm ) {}
|
||||
|
||||
Machine( MPI_Comm c ) : mpi_comm( c ) {}
|
||||
|
||||
static Machine init( int * argc , char *** argv )
|
||||
{
|
||||
MPI_Init( argc , argv );
|
||||
return Machine( MPI_COMM_WORLD );
|
||||
}
|
||||
|
||||
static void finalize() { MPI_Finalize(); }
|
||||
};
|
||||
|
||||
inline
|
||||
unsigned size( Machine machine )
|
||||
{
|
||||
int np ; MPI_Comm_size( machine.mpi_comm , & np ); return np ;
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned rank( Machine machine )
|
||||
{
|
||||
int ip ; MPI_Comm_rank( machine.mpi_comm , & ip ); return ip ;
|
||||
}
|
||||
|
||||
inline
|
||||
double max( Machine machine , double local )
|
||||
{
|
||||
double global = 0;
|
||||
MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , machine.mpi_comm );
|
||||
return global ;
|
||||
}
|
||||
|
||||
inline
|
||||
std::string command_line( Machine machine , const int argc , const char * const * const argv )
|
||||
{
|
||||
std::string argline ;
|
||||
|
||||
if ( 0 == rank( machine ) ) {
|
||||
for ( int i = 1 ; i < argc ; ++i ) {
|
||||
argline.append(" ").append( argv[i] );
|
||||
}
|
||||
}
|
||||
|
||||
int length = argline.length();
|
||||
MPI_Bcast( & length , 1 , MPI_INT , 0 , machine.mpi_comm );
|
||||
argline.resize( length , ' ' );
|
||||
MPI_Bcast( (void*) argline.data() , length , MPI_CHAR , 0 , machine.mpi_comm );
|
||||
|
||||
return argline ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else /* ! defined( KOKKOS_HAVE_MPI ) */
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace comm {
|
||||
|
||||
// Stub for non-parallel
|
||||
|
||||
struct Machine {
|
||||
static Machine init( int * , char *** )
|
||||
{ return Machine(); }
|
||||
|
||||
static void finalize() {}
|
||||
};
|
||||
|
||||
inline
|
||||
unsigned size( Machine ) { return 1 ; }
|
||||
|
||||
inline
|
||||
unsigned rank( Machine ) { return 0 ; }
|
||||
|
||||
inline
|
||||
double max( Machine , double local )
|
||||
{ return local ; }
|
||||
|
||||
inline
|
||||
std::string command_line( Machine machine , const int argc , const char * const * const argv )
|
||||
{
|
||||
std::string argline ;
|
||||
|
||||
if ( 0 == rank( machine ) ) {
|
||||
for ( int i = 1 ; i < argc ; ++i ) {
|
||||
argline.append(" ").append( argv[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return argline ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* ! defined( KOKKOS_HAVE_MPI ) */
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef PARALLELCOMM_HPP */
|
||||
|
||||
|
||||
@ -1,517 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_PARALLELDATAMAP_HPP
|
||||
#define KOKKOS_PARALLELDATAMAP_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <ParallelComm.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Parallel distributed data mapping
|
||||
*
|
||||
* ordering { interior : { owned items not sent elsewhere }
|
||||
* send : { owned items sent }
|
||||
* receive : { not-owned items received } }
|
||||
*
|
||||
* recv { { N ghosted items from process P : ( P , N ) } }
|
||||
*
|
||||
* send { { N send items to process P : ( P , N ) } }
|
||||
*
|
||||
* send_item { send item offsets within 'send' range }
|
||||
*/
|
||||
struct ParallelDataMap {
|
||||
typedef View< unsigned*[2], HostSpace > host_recv_type ;
|
||||
typedef View< unsigned*[2], HostSpace > host_send_type ;
|
||||
typedef View< unsigned* , HostSpace > host_send_item_type ;
|
||||
|
||||
comm::Machine machine ;
|
||||
host_recv_type host_recv ;
|
||||
host_send_type host_send ;
|
||||
host_send_item_type host_send_item ;
|
||||
unsigned count_interior ;
|
||||
unsigned count_send ;
|
||||
unsigned count_owned ; // = count_interior + count_send
|
||||
unsigned count_receive ;
|
||||
|
||||
void assign( const unsigned arg_count_interior ,
|
||||
const unsigned arg_count_owned ,
|
||||
const unsigned arg_count_total ,
|
||||
const unsigned arg_recv_msg ,
|
||||
const unsigned arg_send_msg ,
|
||||
const unsigned arg_send_count )
|
||||
{
|
||||
const std::string label("Kokkos::ParallelDataMap buffer");
|
||||
|
||||
count_interior = arg_count_interior ;
|
||||
count_owned = arg_count_owned ;
|
||||
count_send = arg_count_owned - arg_count_interior ;
|
||||
count_receive = arg_count_total - arg_count_owned ;
|
||||
|
||||
host_recv = host_recv_type( label , arg_recv_msg );
|
||||
host_send = host_send_type( label , arg_send_msg );
|
||||
host_send_item = host_send_item_type( label , arg_send_count );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//PackArray
|
||||
//----------------------------------------------------------------------------
|
||||
template< class ArrayType , class Rank = void >
|
||||
struct PackArray ;
|
||||
|
||||
template< typename DeviceType, typename ValueType >
|
||||
struct PackArray< View< ValueType* , DeviceType > , void >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename DeviceType::size_type size_type ;
|
||||
typedef View< ValueType* , execution_space > array_type ;
|
||||
typedef View< ValueType* , execution_space > buffer_type ;
|
||||
|
||||
private:
|
||||
|
||||
buffer_type output ;
|
||||
array_type input ;
|
||||
size_type base ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i ) const
|
||||
{ output[i] = input(base+i); }
|
||||
|
||||
inline
|
||||
static
|
||||
void pack( const buffer_type & arg_output ,
|
||||
const size_type arg_begin ,
|
||||
const size_type arg_count ,
|
||||
const array_type & arg_input )
|
||||
{
|
||||
PackArray op ;
|
||||
op.output = arg_output ;
|
||||
op.input = arg_input ;
|
||||
op.base = arg_begin ;
|
||||
parallel_for( arg_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DeviceType, typename ValueType , unsigned N1 >
|
||||
struct PackArray< View< ValueType*[N1] , DeviceType > , void >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename DeviceType::size_type size_type ;
|
||||
typedef View< ValueType*[N1] , execution_space > array_type ;
|
||||
typedef View< ValueType* , execution_space > buffer_type ;
|
||||
|
||||
private:
|
||||
|
||||
buffer_type output ;
|
||||
array_type input ;
|
||||
size_type base ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i ) const
|
||||
{
|
||||
for ( size_type j = 0 , k = i * N1 ; j < N1 ; ++j , ++k ) {
|
||||
output[k] = input(base+i,j);
|
||||
}
|
||||
}
|
||||
|
||||
inline static
|
||||
void pack( const buffer_type & arg_output ,
|
||||
const size_type arg_begin ,
|
||||
const size_type arg_count ,
|
||||
const array_type & arg_input )
|
||||
{
|
||||
if ( arg_count ) {
|
||||
PackArray op ;
|
||||
op.output = arg_output ;
|
||||
op.input = arg_input ;
|
||||
op.base = arg_begin ;
|
||||
parallel_for( arg_count , op );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//UnpackArray
|
||||
//----------------------------------------------------------------------------
|
||||
template< class ArrayType , class Rank = void > struct UnpackArray ;
|
||||
|
||||
template< typename DeviceType, typename ValueType >
|
||||
struct UnpackArray< View< ValueType* , DeviceType > , void >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename DeviceType::size_type size_type ;
|
||||
typedef View< ValueType* , execution_space > array_type ;
|
||||
typedef View< ValueType* , execution_space > buffer_type ;
|
||||
|
||||
private:
|
||||
|
||||
array_type output ;
|
||||
buffer_type input ;
|
||||
size_type base ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i ) const
|
||||
{ output(base+i) = input[i]; }
|
||||
|
||||
inline
|
||||
static
|
||||
void unpack( const array_type & arg_output ,
|
||||
const buffer_type & arg_input ,
|
||||
const size_type arg_begin ,
|
||||
const size_type arg_count )
|
||||
{
|
||||
UnpackArray op ;
|
||||
op.output = arg_output ;
|
||||
op.input = arg_input ;
|
||||
op.base = arg_begin ;
|
||||
parallel_for( arg_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DeviceType, typename ValueType , unsigned N1 >
|
||||
struct UnpackArray< View< ValueType*[N1] , DeviceType > , void >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename DeviceType::size_type size_type ;
|
||||
typedef View< ValueType* , execution_space > buffer_type ;
|
||||
typedef View< ValueType*[N1] , execution_space > array_type ;
|
||||
|
||||
private:
|
||||
|
||||
array_type output ;
|
||||
buffer_type input ;
|
||||
size_type base ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i ) const
|
||||
{
|
||||
for ( size_type j = 0 , k = i * N1 ; j < N1 ; ++j , ++k ) {
|
||||
output(base+i,j) = input(k);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
static
|
||||
void unpack( const array_type & arg_output ,
|
||||
const buffer_type & arg_input ,
|
||||
const size_type arg_begin ,
|
||||
const size_type arg_count )
|
||||
{
|
||||
if ( arg_count ) {
|
||||
UnpackArray op ;
|
||||
op.output = arg_output ;
|
||||
op.input = arg_input ;
|
||||
op.base = arg_begin ;
|
||||
parallel_for( arg_count , op );
|
||||
}
|
||||
}
|
||||
};
|
||||
//----------------------------------------------------------------------------
|
||||
template< class ValueType , class Device , class DataMap >
|
||||
class AsyncExchange ;
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Application call procedure:
|
||||
//
|
||||
// construct: AsyncExchange object
|
||||
// * pack send buffer on device
|
||||
// initiate: copy send buffer from device to host
|
||||
// * dispatch asynchronous local work
|
||||
// complete: send/receive on host, copy receive buffer to device
|
||||
// * unpack receive buffer on device
|
||||
// destroy: AsyncExchange object
|
||||
//
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#ifdef KOKKOS_HAVE_MPI
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class ValueType , class Device >
|
||||
class AsyncExchange< ValueType, Device , Kokkos::ParallelDataMap > {
|
||||
public:
|
||||
|
||||
typedef Device execution_space ;
|
||||
typedef Kokkos::ParallelDataMap data_map_type ;
|
||||
typedef Kokkos::View< ValueType* , execution_space > buffer_dev_type ;
|
||||
typedef typename buffer_dev_type::HostMirror buffer_host_type ;
|
||||
|
||||
private:
|
||||
|
||||
static const int mpi_tag = 11 ;
|
||||
|
||||
const data_map_type data_map ;
|
||||
unsigned chunk_size ;
|
||||
unsigned send_count_max ;
|
||||
buffer_host_type host_recv_buffer ;
|
||||
buffer_host_type host_send_buffer ;
|
||||
buffer_host_type send_msg_buffer ;
|
||||
buffer_dev_type dev_buffer ;
|
||||
buffer_dev_type dev_send_buffer ; // Subview for send
|
||||
buffer_dev_type dev_recv_buffer ; // Subview for receive
|
||||
std::vector< MPI_Request > recv_request ;
|
||||
|
||||
public:
|
||||
|
||||
const buffer_dev_type & buffer() const { return dev_buffer ; }
|
||||
|
||||
AsyncExchange( const data_map_type & arg_data_map ,
|
||||
const size_t arg_chunk )
|
||||
: data_map( arg_data_map )
|
||||
, chunk_size( arg_chunk )
|
||||
, send_count_max( 0 )
|
||||
, host_recv_buffer()
|
||||
, host_send_buffer()
|
||||
, send_msg_buffer()
|
||||
, dev_buffer()
|
||||
, dev_send_buffer()
|
||||
, dev_recv_buffer()
|
||||
, recv_request()
|
||||
{
|
||||
const size_t send_msg_count = arg_data_map.host_send.dimension_0();
|
||||
const size_t recv_msg_count = arg_data_map.host_recv.dimension_0();
|
||||
|
||||
const size_t send_msg_length = arg_chunk * arg_data_map.count_send ;
|
||||
const size_t recv_msg_length = arg_chunk * arg_data_map.count_receive ;
|
||||
|
||||
for ( size_t i = 0 ; i < send_msg_count ; ++i ) {
|
||||
send_count_max = std::max( send_count_max ,
|
||||
(unsigned) arg_data_map.host_send(i,1) );
|
||||
}
|
||||
|
||||
// A single shared buffer on the device can be used for
|
||||
// send and receive message buffers.
|
||||
dev_buffer = buffer_dev_type(
|
||||
std::string("AsyncExchange dev_buffer") ,
|
||||
std::max( send_msg_length , recv_msg_length ) );
|
||||
|
||||
// Total send subview of the device buffer
|
||||
dev_send_buffer =
|
||||
Kokkos::subview( dev_buffer , std::pair<size_t,size_t>( 0 , send_msg_length ) );
|
||||
|
||||
// Total receive subview of the device buffer
|
||||
dev_recv_buffer =
|
||||
Kokkos::subview( dev_buffer , std::pair<size_t,size_t>( 0 , recv_msg_length ) );
|
||||
|
||||
// Total receive message buffer on the host:
|
||||
host_recv_buffer = buffer_host_type(
|
||||
std::string("AsyncExchange host_recv_buffer") ,
|
||||
recv_msg_length );
|
||||
|
||||
// Total send message buffer on the host:
|
||||
host_send_buffer = buffer_host_type(
|
||||
std::string("AsyncExchange host_send_buffer") ,
|
||||
send_msg_length );
|
||||
|
||||
// Individual send message buffer on the host:
|
||||
send_msg_buffer = buffer_host_type(
|
||||
std::string("AsyncExchange send_msg_buffer") ,
|
||||
arg_chunk * send_count_max );
|
||||
|
||||
// MPI asynchronous receive request handles:
|
||||
recv_request.assign( recv_msg_count , MPI_REQUEST_NULL );
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
void setup()
|
||||
{
|
||||
{ // Post receives:
|
||||
const size_t recv_msg_count = data_map.host_recv.dimension_0();
|
||||
|
||||
ValueType * ptr = host_recv_buffer.ptr_on_device();
|
||||
|
||||
for ( size_t i = 0 ; i < recv_msg_count ; ++i ) {
|
||||
const int proc = data_map.host_recv(i,0);
|
||||
const int count = data_map.host_recv(i,1) * chunk_size ;
|
||||
|
||||
MPI_Irecv( ptr , count * sizeof(ValueType) , MPI_BYTE ,
|
||||
proc , mpi_tag , data_map.machine.mpi_comm ,
|
||||
& recv_request[i] );
|
||||
|
||||
ptr += count ;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy send buffer from the device to host memory for sending
|
||||
|
||||
Kokkos::deep_copy( host_send_buffer , dev_send_buffer );
|
||||
|
||||
// Done with the device until communication is complete.
|
||||
// Application can dispatch asynchronous work on the device.
|
||||
}
|
||||
|
||||
// Application can dispatch local work to device ...
|
||||
// No communication progress until main thread calls 'send_receive'
|
||||
|
||||
void send_receive()
|
||||
{
|
||||
const size_t recv_msg_count = data_map.host_recv.dimension_0();
|
||||
const size_t send_msg_count = data_map.host_send.dimension_0();
|
||||
|
||||
// Pack and send:
|
||||
|
||||
for ( size_t i = 0 , j = 0 ; i < send_msg_count ; ++i ) {
|
||||
const int proc = data_map.host_send(i,0);
|
||||
const int count = data_map.host_send(i,1);
|
||||
|
||||
for ( int k = 0 , km = 0 ; k < count ; ++k , ++j ) {
|
||||
const int km_end = km + chunk_size ;
|
||||
int ki = chunk_size * data_map.host_send_item(j);
|
||||
|
||||
for ( ; km < km_end ; ++km , ++ki ) {
|
||||
send_msg_buffer[km] = host_send_buffer[ki];
|
||||
}
|
||||
}
|
||||
|
||||
// MPI_Ssend blocks until
|
||||
// (1) a receive is matched for the message and
|
||||
// (2) the send buffer can be re-used.
|
||||
//
|
||||
// It is suggested that MPI_Ssend will have the best performance:
|
||||
// http://www.mcs.anl.gov/research/projects/mpi/sendmode.html .
|
||||
|
||||
MPI_Ssend( send_msg_buffer.ptr_on_device(),
|
||||
count * chunk_size * sizeof(ValueType) , MPI_BYTE ,
|
||||
proc , mpi_tag , data_map.machine.mpi_comm );
|
||||
}
|
||||
|
||||
// Wait for receives and verify:
|
||||
|
||||
for ( size_t i = 0 ; i < recv_msg_count ; ++i ) {
|
||||
MPI_Status recv_status ;
|
||||
int recv_which = 0 ;
|
||||
int recv_size = 0 ;
|
||||
|
||||
MPI_Waitany( recv_msg_count , & recv_request[0] ,
|
||||
& recv_which , & recv_status );
|
||||
|
||||
const int recv_proc = recv_status.MPI_SOURCE ;
|
||||
|
||||
MPI_Get_count( & recv_status , MPI_BYTE , & recv_size );
|
||||
|
||||
// Verify message properly received:
|
||||
|
||||
const int expected_proc = data_map.host_recv(recv_which,0);
|
||||
const int expected_size = data_map.host_recv(recv_which,1) *
|
||||
chunk_size * sizeof(ValueType);
|
||||
|
||||
if ( ( expected_proc != recv_proc ) ||
|
||||
( expected_size != recv_size ) ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "AsyncExchange error:"
|
||||
<< " P" << comm::rank( data_map.machine )
|
||||
<< " received from P" << recv_proc
|
||||
<< " size " << recv_size
|
||||
<< " expected " << expected_size
|
||||
<< " from P" << expected_proc ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
}
|
||||
|
||||
// Copy received data to device memory.
|
||||
|
||||
Kokkos::deep_copy( dev_recv_buffer , host_recv_buffer );
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#else /* ! #ifdef KOKKOS_HAVE_MPI */
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class ValueType , class Device >
|
||||
class AsyncExchange< ValueType, Device , Kokkos::ParallelDataMap > {
|
||||
public:
|
||||
|
||||
typedef Device execution_space ;
|
||||
typedef Kokkos::ParallelDataMap data_map_type ;
|
||||
typedef Kokkos::View< ValueType* , execution_space > buffer_dev_type ;
|
||||
typedef typename buffer_dev_type::HostMirror buffer_host_type ;
|
||||
|
||||
buffer_dev_type dev_buffer ;
|
||||
|
||||
public:
|
||||
|
||||
const buffer_dev_type & buffer() const { return dev_buffer ; }
|
||||
|
||||
AsyncExchange( const data_map_type & , const size_t )
|
||||
: dev_buffer()
|
||||
{ }
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
void setup() { }
|
||||
|
||||
void send_receive() { }
|
||||
};
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* ! #ifdef KOKKOS_HAVE_MPI */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_PARALLELDATAMAP_HPP */
|
||||
|
||||
|
||||
@ -1,178 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if 0
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <ParallelMachine.hpp>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if ! defined( KOKKOS_HAVE_MPI )
|
||||
#define MPI_COMM_NULL 0
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
namespace Parallel {
|
||||
|
||||
Machine::Machine( int * argc , char *** argv )
|
||||
: m_mpi_comm( MPI_COMM_NULL )
|
||||
, m_mpi_size(0)
|
||||
, m_mpi_rank(0)
|
||||
, m_mpi_gpu(0)
|
||||
{
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
//------------------------------------
|
||||
// Might be using a Cuda aware version of MPI.
|
||||
// Must select Cuda device before initializing MPI.
|
||||
{
|
||||
int i = 1 ;
|
||||
for ( ; i < *argc && strcmp((*argv)[i],"mpi_cuda") ; ++i );
|
||||
|
||||
if ( i < *argc ) {
|
||||
// Determine, if possible, what will be the node-local
|
||||
// rank of the MPI process once MPI has been initialized.
|
||||
// This rank is needed to set the Cuda device before 'mvapich'
|
||||
// is initialized.
|
||||
|
||||
const char * const mvapich_local_rank = getenv("MV2_COMM_WORLD_LOCAL_RANK");
|
||||
const char * const slurm_local_rank = getenv("SLURM_LOCALID");
|
||||
|
||||
const int pre_mpi_local_rank =
|
||||
0 != mvapich_local_rank ? atoi( mvapich_local_rank ) : (
|
||||
0 != slurm_local_rank ? atoi( slurm_local_rank ) : (
|
||||
-1 ) );
|
||||
|
||||
if ( 0 <= pre_mpi_local_rank ) {
|
||||
|
||||
const int ngpu = Kokkos::Cuda::detect_device_count();
|
||||
|
||||
const int cuda_device_rank = pre_mpi_local_rank % ngpu ;
|
||||
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cuda_device_rank ) );
|
||||
|
||||
m_mpi_gpu = 1 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Init( argc , argv );
|
||||
m_mpi_comm = MPI_COMM_WORLD ;
|
||||
MPI_Comm_size( m_mpi_comm , & m_mpi_size );
|
||||
MPI_Comm_rank( m_mpi_comm , & m_mpi_rank );
|
||||
#endif
|
||||
|
||||
// Query hwloc after MPI initialization to allow MPI binding:
|
||||
//------------------------------------
|
||||
// Request to use host device:
|
||||
{
|
||||
int i = 1 ;
|
||||
for ( ; i < *argc && strcmp((*argv)[i],"host") ; ++i );
|
||||
|
||||
if ( i < *argc ) {
|
||||
|
||||
unsigned team_count = Kokkos::hwloc::get_available_numa_count();
|
||||
unsigned threads_per_team = Kokkos::hwloc::get_available_cores_per_numa() *
|
||||
Kokkos::hwloc::get_available_threads_per_core();
|
||||
|
||||
if ( i + 2 < *argc ) {
|
||||
team_count = atoi( (*argv)[i+1] );
|
||||
threads_per_team = atoi( (*argv)[i+2] );
|
||||
}
|
||||
|
||||
Kokkos::Threads::initialize( team_count * threads_per_team );
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
//------------------------------------
|
||||
// Request to use Cuda device and not already initialized.
|
||||
if ( ! m_mpi_gpu ) {
|
||||
int i = 1 ;
|
||||
for ( ; i < *argc && strcmp((*argv)[i],"mpi_cuda") && strcmp((*argv)[i],"cuda") ; ++i );
|
||||
|
||||
if ( i < *argc ) {
|
||||
|
||||
const int ngpu = Kokkos::Cuda::detect_device_count();
|
||||
|
||||
const int cuda_device_rank = m_mpi_rank % ngpu ;
|
||||
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cuda_device_rank ) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
Machine::~Machine()
|
||||
{
|
||||
Kokkos::Threads::finalize();
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
Kokkos::Cuda::finalize();
|
||||
#endif
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Machine::print_configuration( std::ostream & msg ) const
|
||||
{
|
||||
msg << "MPI [ " << m_mpi_rank << " / " << m_mpi_size << " ]" << std::endl ;
|
||||
Kokkos::Threads::print_configuration( msg );
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
Kokkos::Cuda::print_configuration( msg );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* #if 0 */
|
||||
|
||||
@ -1,118 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#error "ParallelMachine"
|
||||
|
||||
#ifndef PARALLELMACHINE_HPP
|
||||
#define PARALLELMACHINE_HPP
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include <iosfwd>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
#include <mpi.h>
|
||||
#else
|
||||
typedef int MPI_Comm ;
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
namespace Parallel {
|
||||
|
||||
/** \brief Hybrid parallel machine with MPI+Kokkos::Threads or MPI+Kokkos::Cuda.
|
||||
*
|
||||
* Initialization of MPI and Kokkos device has interdependencies which this
|
||||
* class manages. The command line and environment variables are queried to initialize
|
||||
* the Threads or Cuda device:
|
||||
*
|
||||
* 1) cuda : initializes Cuda device
|
||||
* 2) host : initializes Threads device with all hwloc detected cores.
|
||||
* 3) host #gang #worker : initializes Threads with specified
|
||||
*/
|
||||
class Machine {
|
||||
private:
|
||||
|
||||
MPI_Comm m_mpi_comm ;
|
||||
int m_mpi_size ;
|
||||
int m_mpi_rank ;
|
||||
unsigned m_mpi_gpu ;
|
||||
unsigned m_gpu_arch ;
|
||||
|
||||
Machine();
|
||||
Machine( const Machine & );
|
||||
Machine & operator = ( const Machine & );
|
||||
|
||||
public:
|
||||
|
||||
/** \brief Coordinated initialize MPI, Cuda, or Threads devices from 'main'. */
|
||||
Machine( int * argc , char *** argv );
|
||||
|
||||
~Machine();
|
||||
|
||||
MPI_Comm mpi_comm() const { return m_mpi_comm ; }
|
||||
|
||||
int mpi_size() const { return m_mpi_size ; }
|
||||
int mpi_rank() const { return m_mpi_rank ; }
|
||||
|
||||
/** \brief If using MPI that can directly operate on GPU memory */
|
||||
bool mpi_gpu() const { return m_mpi_gpu ; }
|
||||
|
||||
/** \brief If using GPU then what architecture */
|
||||
unsigned gpu_arch() const { return m_gpu_arch ; }
|
||||
|
||||
void print_configuration( std::ostream & ) const ;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef PARALLELMACHINE_HPP */
|
||||
|
||||
|
||||
@ -1,400 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef SPARSELINEARSYSTEM_HPP
|
||||
#define SPARSELINEARSYSTEM_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_StaticCrsGraph.hpp>
|
||||
|
||||
#include <LinAlgBLAS.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< typename ScalarType , class Device >
|
||||
struct CrsMatrix {
|
||||
typedef Device execution_space ;
|
||||
typedef ScalarType value_type ;
|
||||
|
||||
typedef StaticCrsGraph< int , execution_space , void , int > graph_type ;
|
||||
typedef View< value_type* , execution_space > coefficients_type ;
|
||||
|
||||
graph_type graph ;
|
||||
coefficients_type coefficients ;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Impl {
|
||||
|
||||
template< class Matrix , class OutputVector , class InputVector >
|
||||
struct Multiply ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename AScalarType ,
|
||||
typename VScalarType ,
|
||||
class DeviceType >
|
||||
struct Multiply< CrsMatrix<AScalarType,DeviceType> ,
|
||||
View<VScalarType*,DeviceType > ,
|
||||
View<VScalarType*,DeviceType > >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
typedef View< VScalarType*, execution_space, MemoryUnmanaged > vector_type ;
|
||||
typedef View< const VScalarType*, execution_space, MemoryUnmanaged > vector_const_type ;
|
||||
|
||||
typedef CrsMatrix< AScalarType , execution_space > matrix_type ;
|
||||
|
||||
private:
|
||||
|
||||
matrix_type m_A ;
|
||||
vector_const_type m_x ;
|
||||
vector_type m_y ;
|
||||
|
||||
public:
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type iRow ) const
|
||||
{
|
||||
const size_type iEntryBegin = m_A.graph.row_map[iRow];
|
||||
const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
|
||||
|
||||
double sum = 0 ;
|
||||
|
||||
#if defined( __INTEL_COMPILER )
|
||||
#pragma simd reduction(+:sum)
|
||||
#pragma ivdep
|
||||
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
|
||||
sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
|
||||
}
|
||||
#else
|
||||
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
|
||||
sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
|
||||
}
|
||||
#endif
|
||||
|
||||
m_y(iRow) = sum ;
|
||||
}
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type , // ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
: m_A( A ), m_x( x ), m_y( y )
|
||||
{
|
||||
parallel_for( nrow , *this );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename AScalarType ,
|
||||
typename VScalarType ,
|
||||
class Device >
|
||||
class Operator {
|
||||
typedef CrsMatrix<AScalarType,Device> matrix_type ;
|
||||
typedef View<VScalarType*,Device> vector_type ;
|
||||
|
||||
private:
|
||||
const CrsMatrix<AScalarType,Device> A ;
|
||||
|
||||
ParallelDataMap data_map ;
|
||||
AsyncExchange< VScalarType , Device , ParallelDataMap > exchange ;
|
||||
|
||||
public:
|
||||
|
||||
Operator( const ParallelDataMap & arg_data_map ,
|
||||
const CrsMatrix<AScalarType,Device> & arg_A )
|
||||
: A( arg_A )
|
||||
, data_map( arg_data_map )
|
||||
, exchange( arg_data_map , 1 )
|
||||
{}
|
||||
|
||||
void apply( const View<VScalarType*,Device> & x ,
|
||||
const View<VScalarType*,Device> & y )
|
||||
{
|
||||
// Gather off-processor data for 'x'
|
||||
|
||||
PackArray< vector_type >::pack( exchange.buffer() ,
|
||||
data_map.count_interior ,
|
||||
data_map.count_send , x );
|
||||
|
||||
exchange.setup();
|
||||
|
||||
// If interior & boundary matrices then could launch interior multiply
|
||||
|
||||
exchange.send_receive();
|
||||
|
||||
UnpackArray< vector_type >::unpack( x , exchange.buffer() ,
|
||||
data_map.count_owned ,
|
||||
data_map.count_receive );
|
||||
|
||||
const typename Device::size_type nrow = data_map.count_owned ;
|
||||
const typename Device::size_type ncol = data_map.count_owned +
|
||||
data_map.count_receive ;
|
||||
|
||||
Impl::Multiply<matrix_type,vector_type,vector_type>( A, nrow, ncol, x, y);
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename AScalarType , typename VScalarType , class Device >
|
||||
void cgsolve(
|
||||
const ParallelDataMap data_map ,
|
||||
const CrsMatrix<AScalarType,Device> A ,
|
||||
const View<VScalarType*,Device> b ,
|
||||
const View<VScalarType*,Device> x ,
|
||||
size_t & iteration ,
|
||||
double & normr ,
|
||||
double & iter_time ,
|
||||
const size_t maximum_iteration = 200 ,
|
||||
const double tolerance = std::numeric_limits<VScalarType>::epsilon() )
|
||||
{
|
||||
typedef View<VScalarType*,Device> vector_type ;
|
||||
//typedef View<VScalarType, Device> value_type ; // unused
|
||||
|
||||
const size_t count_owned = data_map.count_owned ;
|
||||
const size_t count_total = data_map.count_owned + data_map.count_receive ;
|
||||
|
||||
Operator<AScalarType,VScalarType,Device> matrix_operator( data_map , A );
|
||||
|
||||
// Need input vector to matvec to be owned + received
|
||||
vector_type pAll ( "cg::p" , count_total );
|
||||
|
||||
vector_type p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
|
||||
vector_type r ( "cg::r" , count_owned );
|
||||
vector_type Ap( "cg::Ap", count_owned );
|
||||
|
||||
/* r = b - A * x ; */
|
||||
|
||||
/* p = x */ deep_copy( p , x );
|
||||
/* Ap = A * p */ matrix_operator.apply( pAll , Ap );
|
||||
/* r = b - Ap */ waxpby( count_owned , 1.0 , b , -1.0 , Ap , r );
|
||||
/* p = r */ deep_copy( p , r );
|
||||
|
||||
double old_rdot = dot( count_owned , r , data_map.machine );
|
||||
|
||||
normr = sqrt( old_rdot );
|
||||
iteration = 0 ;
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
while ( tolerance < normr && iteration < maximum_iteration ) {
|
||||
|
||||
/* pAp_dot = dot( p , Ap = A * p ) */
|
||||
|
||||
/* Ap = A * p */ matrix_operator.apply( pAll , Ap );
|
||||
|
||||
const double pAp_dot = dot( count_owned , p , Ap , data_map.machine );
|
||||
const double alpha = old_rdot / pAp_dot ;
|
||||
|
||||
/* x += alpha * p ; */ axpy( count_owned, alpha, p , x );
|
||||
/* r -= alpha * Ap ; */ axpy( count_owned, -alpha, Ap, r );
|
||||
|
||||
const double r_dot = dot( count_owned , r , data_map.machine );
|
||||
const double beta = r_dot / old_rdot ;
|
||||
|
||||
/* p = r + beta * p ; */ xpby( count_owned , r , beta , p );
|
||||
|
||||
normr = sqrt( old_rdot = r_dot );
|
||||
++iteration ;
|
||||
}
|
||||
|
||||
iter_time = wall_clock.seconds();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
#if ( CUDA_VERSION < 6000 )
|
||||
#pragma message "cusparse_v2.h"
|
||||
#include <cusparse_v2.h>
|
||||
#else
|
||||
#pragma message "cusparse.h"
|
||||
#include <cusparse.h>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
struct CudaSparseSingleton {
|
||||
cusparseHandle_t handle;
|
||||
cusparseMatDescr_t descra;
|
||||
|
||||
CudaSparseSingleton()
|
||||
{
|
||||
cusparseCreate( & handle );
|
||||
cusparseCreateMatDescr( & descra );
|
||||
cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL );
|
||||
cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO );
|
||||
}
|
||||
|
||||
static CudaSparseSingleton & singleton();
|
||||
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Multiply< CrsMatrix<double,Cuda> ,
|
||||
View<double*,Cuda > ,
|
||||
View<double*,Cuda > >
|
||||
{
|
||||
typedef Cuda execution_space ;
|
||||
typedef execution_space::size_type size_type ;
|
||||
typedef double scalar_type ;
|
||||
typedef View< scalar_type* , execution_space > vector_type ;
|
||||
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
|
||||
public:
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
{
|
||||
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
|
||||
const scalar_type alpha = 1 , beta = 0 ;
|
||||
|
||||
cusparseStatus_t status =
|
||||
cusparseDcsrmv( s.handle ,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE ,
|
||||
nrow , ncol , A.coefficients.dimension_0() ,
|
||||
&alpha ,
|
||||
s.descra ,
|
||||
A.coefficients.ptr_on_device() ,
|
||||
A.graph.row_map.ptr_on_device() ,
|
||||
A.graph.entries.ptr_on_device() ,
|
||||
x.ptr_on_device() ,
|
||||
&beta ,
|
||||
y.ptr_on_device() );
|
||||
|
||||
if ( CUSPARSE_STATUS_SUCCESS != status ) {
|
||||
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct Multiply< CrsMatrix<float,Cuda> ,
|
||||
View<float*,Cuda > ,
|
||||
View<float*,Cuda > >
|
||||
{
|
||||
typedef Cuda execution_space ;
|
||||
typedef execution_space::size_type size_type ;
|
||||
typedef float scalar_type ;
|
||||
typedef View< scalar_type* , execution_space > vector_type ;
|
||||
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
|
||||
public:
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
{
|
||||
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
|
||||
const scalar_type alpha = 1 , beta = 0 ;
|
||||
|
||||
cusparseStatus_t status =
|
||||
cusparseScsrmv( s.handle ,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE ,
|
||||
nrow , ncol , A.coefficients.dimension_0() ,
|
||||
&alpha ,
|
||||
s.descra ,
|
||||
A.coefficients.ptr_on_device() ,
|
||||
A.graph.row_map.ptr_on_device() ,
|
||||
A.graph.entries.ptr_on_device() ,
|
||||
x.ptr_on_device() ,
|
||||
&beta ,
|
||||
y.ptr_on_device() );
|
||||
|
||||
if ( CUSPARSE_STATUS_SUCCESS != status ) {
|
||||
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef SPARSELINEARSYSTEM_HPP */
|
||||
|
||||
@ -1,276 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef SPARSELINEARSYSTEMFILL_HPP
|
||||
#define SPARSELINEARSYSTEMFILL_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include <FEMesh.hpp>
|
||||
#include <SparseLinearSystem.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
|
||||
template< class MatrixType , class MeshType ,
|
||||
class elem_matrices_type ,
|
||||
class elem_vectors_type > struct GatherFill ;
|
||||
|
||||
|
||||
template< typename ScalarType ,
|
||||
class DeviceType ,
|
||||
unsigned ElemNode ,
|
||||
typename CoordScalarType ,
|
||||
class elem_matrices_type ,
|
||||
class elem_vectors_type >
|
||||
struct GatherFill<
|
||||
Kokkos::CrsMatrix< ScalarType , DeviceType > ,
|
||||
FEMesh< CoordScalarType , ElemNode , DeviceType > ,
|
||||
elem_matrices_type , elem_vectors_type >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const size_type ElemNodeCount = ElemNode ;
|
||||
|
||||
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
|
||||
typedef typename matrix_type::coefficients_type coefficients_type ;
|
||||
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
|
||||
typedef Kokkos::View< size_type[][ElemNodeCount][ElemNodeCount] , execution_space > elem_graph_type ;
|
||||
|
||||
typedef FEMesh< CoordScalarType , ElemNodeCount , execution_space > mesh_type ;
|
||||
typedef typename mesh_type::node_elem_ids_type node_elem_ids_type ;
|
||||
|
||||
private:
|
||||
|
||||
node_elem_ids_type node_elem_ids ;
|
||||
elem_graph_type elem_graph ;
|
||||
elem_matrices_type elem_matrices ;
|
||||
elem_vectors_type elem_vectors ;
|
||||
coefficients_type system_coeff ;
|
||||
vector_type system_rhs ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type irow ) const
|
||||
{
|
||||
const size_type node_elem_begin = node_elem_ids.row_map[irow];
|
||||
const size_type node_elem_end = node_elem_ids.row_map[irow+1];
|
||||
|
||||
// for each element that a node belongs to
|
||||
|
||||
for ( size_type i = node_elem_begin ; i < node_elem_end ; i++ ) {
|
||||
|
||||
const size_type elem_id = node_elem_ids.entries( i, 0);
|
||||
const size_type row_index = node_elem_ids.entries( i, 1);
|
||||
|
||||
system_rhs(irow) += elem_vectors(elem_id, row_index);
|
||||
|
||||
// for each node in a particular related element
|
||||
// gather the contents of the element stiffness
|
||||
// matrix that belong in irow
|
||||
|
||||
for ( size_type j = 0 ; j < ElemNodeCount ; ++j ){
|
||||
const size_type A_index = elem_graph( elem_id , row_index , j );
|
||||
|
||||
system_coeff( A_index ) += elem_matrices( elem_id, row_index, j );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void apply( const matrix_type & matrix ,
|
||||
const vector_type & rhs ,
|
||||
const mesh_type & mesh ,
|
||||
const elem_graph_type & elem_graph ,
|
||||
const elem_matrices_type & elem_matrices ,
|
||||
const elem_vectors_type & elem_vectors )
|
||||
{
|
||||
const size_t row_count = matrix.graph.row_map.dimension_0() - 1 ;
|
||||
GatherFill op ;
|
||||
op.node_elem_ids = mesh.node_elem_ids ;
|
||||
op.elem_graph = elem_graph ;
|
||||
op.elem_matrices = elem_matrices ;
|
||||
op.elem_vectors = elem_vectors ;
|
||||
op.system_coeff = matrix.coefficients ;
|
||||
op.system_rhs = rhs ;
|
||||
|
||||
parallel_for( row_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
|
||||
template< class GraphType , class MeshType >
|
||||
struct GraphFactory {
|
||||
typedef GraphType graph_type ;
|
||||
typedef MeshType mesh_type ;
|
||||
typedef typename graph_type::execution_space execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const unsigned ElemNodeCount = mesh_type::element_node_count ;
|
||||
|
||||
typedef Kokkos::View< size_type[][ElemNodeCount][ElemNodeCount] , execution_space > element_map_type ;
|
||||
|
||||
static
|
||||
void
|
||||
create( const mesh_type & mesh ,
|
||||
graph_type & graph ,
|
||||
element_map_type & elem_map )
|
||||
{
|
||||
typename mesh_type::node_elem_ids_type::HostMirror
|
||||
node_elem_ids = create_mirror( mesh.node_elem_ids );
|
||||
|
||||
typename mesh_type::elem_node_ids_type::HostMirror
|
||||
elem_node_ids = create_mirror( mesh.elem_node_ids );
|
||||
|
||||
typedef typename element_map_type::HostMirror element_map_host_type ;
|
||||
|
||||
deep_copy( elem_node_ids , mesh.elem_node_ids );
|
||||
deep_copy( node_elem_ids.entries , mesh.node_elem_ids.entries );
|
||||
|
||||
const size_t owned_node = mesh.parallel_data_map.count_owned ;
|
||||
const size_t total_elem = mesh.elem_node_ids.dimension_0();
|
||||
|
||||
if ( total_elem ) {
|
||||
elem_map = element_map_type( std::string("element_map"), total_elem );
|
||||
}
|
||||
|
||||
element_map_host_type elem_map_host = create_mirror( elem_map );
|
||||
|
||||
//------------------------------------
|
||||
// Node->node mapping for the CrsMatrix graph
|
||||
|
||||
std::vector< std::vector< unsigned > > node_node_ids( owned_node );
|
||||
std::vector< unsigned > node_node_begin( owned_node );
|
||||
|
||||
size_t offset = 0 ;
|
||||
for ( size_t i = 0 ; i < owned_node ; ++i ) {
|
||||
const size_t j_end = node_elem_ids.row_map[i+1];
|
||||
size_t j = node_elem_ids.row_map[i];
|
||||
|
||||
node_node_begin[i] = offset ;
|
||||
|
||||
std::vector< unsigned > & work = node_node_ids[i] ;
|
||||
|
||||
for ( ; j < j_end ; ++j ) {
|
||||
const size_t elem_id = node_elem_ids.entries(j,0);
|
||||
for ( size_t k = 0 ; k < ElemNodeCount ; ++k ) {
|
||||
work.push_back( elem_node_ids( elem_id , k ) );
|
||||
}
|
||||
}
|
||||
|
||||
std::sort( work.begin() , work.end() );
|
||||
|
||||
work.erase( std::unique( work.begin() , work.end() ) , work.end() );
|
||||
|
||||
offset += work.size();
|
||||
}
|
||||
|
||||
graph = Kokkos::create_staticcrsgraph< graph_type >( "node_node_ids" , node_node_ids );
|
||||
|
||||
//------------------------------------
|
||||
// ( element , node_row , node_column ) -> matrix_crs_column
|
||||
|
||||
for ( size_t elem_id = 0 ; elem_id < total_elem ; ++elem_id ) {
|
||||
for ( size_t i = 0 ; i < ElemNodeCount ; ++i ) {
|
||||
|
||||
const size_t node_row = elem_node_ids( elem_id , i );
|
||||
const size_t node_row_begin = node_node_begin[ node_row ];
|
||||
const std::vector< unsigned > & column = node_node_ids[ node_row ] ;
|
||||
|
||||
if ( owned_node <= node_row ) {
|
||||
for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) {
|
||||
elem_map_host( elem_id , i , j ) = std::numeric_limits<size_type>::max();
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) {
|
||||
const size_type node_col = elem_node_ids( elem_id , j );
|
||||
|
||||
int col_search = 0 ;
|
||||
|
||||
for ( int len = column.size() ; 0 < len ; ) {
|
||||
|
||||
const int half = len >> 1;
|
||||
const int middle = col_search + half ;
|
||||
|
||||
if ( column[middle] < node_col ){
|
||||
col_search = middle + 1 ;
|
||||
len -= half + 1 ;
|
||||
}
|
||||
else {
|
||||
len = half ;
|
||||
}
|
||||
}
|
||||
if ( node_col != column[col_search] ) {
|
||||
throw std::runtime_error(std::string("Failed"));
|
||||
}
|
||||
elem_map_host( elem_id , i , j ) = col_search + node_row_begin ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
deep_copy( elem_map , elem_map_host );
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace HybridFEM
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef SPARSELINEARSYSTEMFILL_HPP */
|
||||
|
||||
@ -1,164 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef SPARSELINEARSYSTEM_CUDA_HPP
|
||||
#define SPARSELINEARSYSTEM_CUDA_HPP
|
||||
|
||||
#if defined( BUILD_FROM_CU_FILE )
|
||||
|
||||
#include <cusparse_v2.h>
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
|
||||
struct CudaSparseSingleton {
|
||||
cusparseHandle_t handle;
|
||||
cusparseMatDescr_t descra;
|
||||
|
||||
CudaSparseSingleton()
|
||||
{
|
||||
cusparseCreate( & handle );
|
||||
cusparseCreateMatDescr( & descra );
|
||||
cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL );
|
||||
cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO );
|
||||
}
|
||||
|
||||
static CudaSparseSingleton & singleton();
|
||||
|
||||
};
|
||||
|
||||
CudaSparseSingleton & CudaSparseSingleton::singleton()
|
||||
{ static CudaSparseSingleton s ; return s ; }
|
||||
|
||||
|
||||
template<>
|
||||
struct Multiply< CrsMatrix<double,Cuda> ,
|
||||
View<double*,Cuda > ,
|
||||
View<double*,Cuda > >
|
||||
{
|
||||
typedef Cuda execution_space ;
|
||||
typedef execution_space::size_type size_type ;
|
||||
typedef double scalar_type ;
|
||||
typedef View< scalar_type* , execution_space > vector_type ;
|
||||
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
|
||||
public:
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
{
|
||||
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
|
||||
const scalar_type alpha = 1 , beta = 0 ;
|
||||
|
||||
cusparseStatus_t status =
|
||||
cusparseDcsrmv( s.handle ,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE ,
|
||||
nrow , ncol , A.coefficients.dimension_0() ,
|
||||
&alpha ,
|
||||
s.descra ,
|
||||
A.coefficients.ptr_on_device() ,
|
||||
A.graph.row_map.ptr_on_device() ,
|
||||
A.graph.entries.ptr_on_device() ,
|
||||
x.ptr_on_device() ,
|
||||
&beta ,
|
||||
y.ptr_on_device() );
|
||||
|
||||
if ( CUSPARSE_STATUS_SUCCESS != status ) {
|
||||
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct Multiply< CrsMatrix<float,Cuda> ,
|
||||
View<float*,Cuda > ,
|
||||
View<float*,Cuda > >
|
||||
{
|
||||
typedef Cuda execution_space ;
|
||||
typedef execution_space::size_type size_type ;
|
||||
typedef float scalar_type ;
|
||||
typedef View< scalar_type* , execution_space > vector_type ;
|
||||
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
|
||||
public:
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
{
|
||||
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
|
||||
const scalar_type alpha = 1 , beta = 0 ;
|
||||
|
||||
cusparseStatus_t status =
|
||||
cusparseScsrmv( s.handle ,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE ,
|
||||
nrow , ncol , A.coefficients.dimension_0() ,
|
||||
&alpha ,
|
||||
s.descra ,
|
||||
A.coefficients.ptr_on_device() ,
|
||||
A.graph.row_map.ptr_on_device() ,
|
||||
A.graph.entries.ptr_on_device() ,
|
||||
x.ptr_on_device() ,
|
||||
&beta ,
|
||||
y.ptr_on_device() );
|
||||
|
||||
if ( CUSPARSE_STATUS_SUCCESS != status ) {
|
||||
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #if defined( __CUDACC__ ) */
|
||||
#endif /* #ifndef SPARSELINEARSYSTEM_CUDA_HPP */
|
||||
|
||||
@ -1,242 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef TESTFEMESHBOXFIXTURE_HPP
|
||||
#define TESTFEMESHBOXFIXTURE_HPP
|
||||
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <BoxMeshFixture.hpp>
|
||||
|
||||
#include <ParallelComm.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace TestFEMesh {
|
||||
|
||||
template< class ViewType >
|
||||
struct VerifyUnpack ;
|
||||
|
||||
template< typename DeviceType, typename T >
|
||||
struct VerifyUnpack< Kokkos::View< T*[3] , DeviceType > >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
typedef size_type value_type ;
|
||||
|
||||
typedef Kokkos::View< T* , execution_space > buffer_type ;
|
||||
typedef Kokkos::View< T*[3] , execution_space > array_type ;
|
||||
|
||||
private:
|
||||
|
||||
array_type node_coords ;
|
||||
buffer_type buffer ;
|
||||
size_type node_begin ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & update )
|
||||
{ update = 0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & update ,
|
||||
const volatile value_type & source )
|
||||
{ update += source ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i , value_type & update ) const
|
||||
{
|
||||
const size_type node_id = i + node_begin ;
|
||||
const size_type k = i * 3 ;
|
||||
|
||||
const long xb = buffer[k];
|
||||
const long yb = buffer[k+1];
|
||||
const long zb = buffer[k+2];
|
||||
const long xn = node_coords(node_id,0);
|
||||
const long yn = node_coords(node_id,1);
|
||||
const long zn = node_coords(node_id,2);
|
||||
|
||||
if ( xb != xn || yb != yn || zb != zn ) {
|
||||
printf("TestFEMesh::VerifyUnpack failed at %d : node %d : { %ld %ld %ld } != { %ld %ld %ld }\n",
|
||||
(int)i,(int)node_id, xb,yb,zb, xn, yn, zn );
|
||||
++update ;
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
size_type unpack( const array_type & arg_node_coords ,
|
||||
const size_type arg_node_begin ,
|
||||
const size_type arg_node_count ,
|
||||
const buffer_type & arg_buffer )
|
||||
{
|
||||
VerifyUnpack op ;
|
||||
op.node_coords = arg_node_coords ;
|
||||
op.buffer = arg_buffer ;
|
||||
op.node_begin = arg_node_begin ;
|
||||
size_type count = 0 ;
|
||||
Kokkos::parallel_reduce( arg_node_count , op , count );
|
||||
return count ;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#ifdef KOKKOS_HAVE_MPI
|
||||
|
||||
namespace TestFEMesh {
|
||||
|
||||
template< typename coordinate_scalar_type ,
|
||||
unsigned ElemNodeCount ,
|
||||
class Device >
|
||||
void verify_parallel(
|
||||
const HybridFEM::FEMesh< coordinate_scalar_type ,
|
||||
ElemNodeCount ,
|
||||
Device > & mesh )
|
||||
{
|
||||
typedef HybridFEM::FEMesh< coordinate_scalar_type, ElemNodeCount, Device > femesh_type ;
|
||||
typedef typename femesh_type::node_coords_type node_coords_type ;
|
||||
|
||||
comm::Machine machine = mesh.parallel_data_map.machine ;
|
||||
|
||||
// Communicate node coordinates to verify communication and setup.
|
||||
|
||||
const size_t chunk_size = 3 ;
|
||||
|
||||
Kokkos::AsyncExchange< coordinate_scalar_type, Device, Kokkos::ParallelDataMap >
|
||||
exchange( mesh.parallel_data_map , chunk_size );
|
||||
|
||||
const size_t send_begin = mesh.parallel_data_map.count_interior ;
|
||||
const size_t send_count = mesh.parallel_data_map.count_send ;
|
||||
|
||||
const size_t recv_begin = mesh.parallel_data_map.count_owned ;
|
||||
const size_t recv_count = mesh.parallel_data_map.count_receive ;
|
||||
|
||||
typedef Kokkos::PackArray< node_coords_type > pack_type ;
|
||||
|
||||
pack_type::pack( exchange.buffer(), send_begin, send_count, mesh.node_coords );
|
||||
|
||||
exchange.setup();
|
||||
|
||||
// Launch local-action device kernels
|
||||
|
||||
exchange.send_receive();
|
||||
|
||||
unsigned long local[3] ;
|
||||
local[0] = mesh.parallel_data_map.count_owned ;
|
||||
local[1] = mesh.parallel_data_map.count_receive ;
|
||||
local[2] = TestFEMesh::VerifyUnpack< node_coords_type >::unpack( mesh.node_coords, recv_begin, recv_count, exchange.buffer() );
|
||||
|
||||
unsigned long global[3] = { 0 , 0 , 0 };
|
||||
|
||||
MPI_Allreduce( local , global ,
|
||||
3 , MPI_UNSIGNED_LONG , MPI_SUM , machine.mpi_comm );
|
||||
|
||||
if ( 0 == comm::rank( machine ) ) {
|
||||
std::cout << ( global[2] ? "FAILED" : "PASSED" )
|
||||
<< ": TestFEMesh::verify_parallel "
|
||||
<< "NP(" << comm::size( machine )
|
||||
<< ") total_node(" << global[0]
|
||||
<< ") verified_nodes(" << global[1]
|
||||
<< ") failed_nodes(" << global[2]
|
||||
<< ")" << std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace TestFEMesh
|
||||
|
||||
#else /* ! #ifdef KOKKOS_HAVE_MPI */
|
||||
|
||||
namespace TestFEMesh {
|
||||
|
||||
template< typename coordinate_scalar_type ,
|
||||
unsigned ElemNodeCount ,
|
||||
class Device >
|
||||
void verify_parallel(
|
||||
const HybridFEM::FEMesh< coordinate_scalar_type ,
|
||||
ElemNodeCount ,
|
||||
Device > & )
|
||||
{}
|
||||
|
||||
} // namespace TestFEMesh
|
||||
|
||||
#endif /* ! #ifdef KOKKOS_HAVE_MPI */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class Device >
|
||||
void test_box_fixture( comm::Machine machine ,
|
||||
const size_t gang_count ,
|
||||
const size_t nodes_nx ,
|
||||
const size_t nodes_ny ,
|
||||
const size_t nodes_nz )
|
||||
{
|
||||
typedef long coordinate_scalar_type ;
|
||||
typedef FixtureElementHex8 fixture_element_type ;
|
||||
|
||||
typedef BoxMeshFixture< coordinate_scalar_type ,
|
||||
Device ,
|
||||
fixture_element_type > fixture_type ;
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
|
||||
const size_t proc_count = comm::size( machine );
|
||||
const size_t proc_local = comm::rank( machine ) ;
|
||||
|
||||
mesh_type mesh =
|
||||
fixture_type::create( proc_count , proc_local , gang_count ,
|
||||
nodes_nx - 1 , nodes_ny - 1 , nodes_nz - 1 );
|
||||
|
||||
mesh.parallel_data_map.machine = machine ;
|
||||
|
||||
TestFEMesh::verify_parallel( mesh );
|
||||
}
|
||||
|
||||
#endif /* #ifndef TESTFEMESHBOXFIXTURE_HPP */
|
||||
|
||||
|
||||
@ -1,172 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <BoxMeshPartition.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_box_partition( bool print )
|
||||
{
|
||||
const size_t np_max = 10000 ;
|
||||
|
||||
const BoxBoundsLinear use_box ;
|
||||
|
||||
BoxType root_box ;
|
||||
|
||||
root_box[0][0] = 0 ; root_box[0][1] = 100 ;
|
||||
root_box[1][0] = 0 ; root_box[1][1] = 200 ;
|
||||
root_box[2][0] = 0 ; root_box[2][1] = 300 ;
|
||||
|
||||
const size_t cell_total =
|
||||
( root_box[0][1] - root_box[0][0] ) *
|
||||
( root_box[1][1] - root_box[1][0] ) *
|
||||
( root_box[2][1] - root_box[2][0] );
|
||||
|
||||
for ( size_t np = 2 ; np < np_max ; np = 2 * ( np + 1 ) ) {
|
||||
|
||||
std::vector<BoxType> part_boxes( np );
|
||||
|
||||
box_partition_rcb( root_box , part_boxes );
|
||||
|
||||
size_t cell_goal = ( cell_total + np - 1 ) / np ;
|
||||
size_t cell_max = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < np ; ++i ) {
|
||||
cell_max = std::max( cell_max , count( part_boxes[i] ) );
|
||||
}
|
||||
|
||||
if ( print ) {
|
||||
std::cout << std::endl
|
||||
<< "box_part( " << np
|
||||
<< " ) max( " << cell_max
|
||||
<< " ) goal( " << cell_goal
|
||||
<< " ) ratio( " << double(cell_max) / double(cell_goal)
|
||||
<< " )" << std::endl ;
|
||||
}
|
||||
|
||||
const size_t nsample = std::min(np,(size_t)4);
|
||||
const size_t stride = ( np + nsample - 1 ) / nsample ;
|
||||
|
||||
for ( size_t my_part = 0 ; my_part < np ; my_part += stride ) {
|
||||
BoxType my_use_box ;
|
||||
std::vector<size_t> my_use_id_map ;
|
||||
size_t my_count_interior ;
|
||||
size_t my_count_owned ;
|
||||
size_t my_count_uses ;
|
||||
std::vector<size_t> my_recv_counts ;
|
||||
std::vector<std::vector<size_t> > my_send_map ;
|
||||
|
||||
size_t count_verify = 0 ;
|
||||
|
||||
box_partition_maps( root_box , part_boxes ,
|
||||
use_box , my_part ,
|
||||
my_use_box , my_use_id_map ,
|
||||
my_count_interior ,
|
||||
my_count_owned ,
|
||||
my_count_uses ,
|
||||
my_recv_counts ,
|
||||
my_send_map );
|
||||
|
||||
count_verify = my_count_owned ;
|
||||
|
||||
if ( print ) {
|
||||
std::cout << " my_part(" << my_part << ") layout { "
|
||||
<< "P" << my_part
|
||||
<< "(" << my_count_interior
|
||||
<< "," << ( my_count_owned - my_count_interior )
|
||||
<< ")" ;
|
||||
}
|
||||
|
||||
for ( size_t i = 1 ; i < np ; ++i ) {
|
||||
if ( my_recv_counts[i] ) {
|
||||
count_verify += my_recv_counts[i] ;
|
||||
const size_t ip = ( my_part + i ) % np ;
|
||||
|
||||
if ( print ) {
|
||||
std::cout << " P" << ip << "(" << my_recv_counts[i] << ")" ;
|
||||
}
|
||||
|
||||
// Compare recv & send lists
|
||||
|
||||
BoxType ip_use_box ;
|
||||
std::vector<size_t> ip_use_id_map ;
|
||||
size_t ip_count_interior ;
|
||||
size_t ip_count_owned ;
|
||||
size_t ip_count_uses ;
|
||||
std::vector<size_t> ip_recv_counts ;
|
||||
std::vector<std::vector<size_t> > ip_send_map ;
|
||||
|
||||
box_partition_maps( root_box , part_boxes ,
|
||||
use_box , ip ,
|
||||
ip_use_box , ip_use_id_map ,
|
||||
ip_count_interior ,
|
||||
ip_count_owned ,
|
||||
ip_count_uses ,
|
||||
ip_recv_counts ,
|
||||
ip_send_map );
|
||||
|
||||
// Sent by ip, received by my_part:
|
||||
|
||||
const BoxType recv_send = intersect( part_boxes[ip] , my_use_box );
|
||||
const size_t recv_send_count = count( recv_send );
|
||||
|
||||
const size_t j = ( my_part + np - ip ) % np ;
|
||||
|
||||
if ( recv_send_count != my_recv_counts[i] ||
|
||||
recv_send_count != ip_send_map[j].size() ) {
|
||||
throw std::runtime_error( std::string("bad recv/send map") );
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( print ) { std::cout << " }" << std::endl ; }
|
||||
|
||||
if ( count_verify != my_count_uses ) {
|
||||
throw std::runtime_error( std::string("bad partition map") );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,192 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <TestBoxMeshFixture.hpp>
|
||||
#include <Implicit.hpp>
|
||||
#include <Nonlinear.hpp>
|
||||
#include <Explicit.hpp>
|
||||
|
||||
#include <SparseLinearSystem.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
CudaSparseSingleton & CudaSparseSingleton::singleton()
|
||||
{ static CudaSparseSingleton s ; return s ; }
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_query( comm::Machine machine )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
std::cout << "P" << comm_rank
|
||||
<< ": Cuda device_count = "
|
||||
<< Kokkos::Cuda::detect_device_count()
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_fixture( comm::Machine machine ,
|
||||
size_t nx , size_t ny , size_t nz )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
test_box_fixture<Kokkos::Cuda>( machine , gang_count , nx , ny , nz );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_implicit( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
HybridFEM::Implicit::driver<double,Kokkos::Cuda>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_explicit( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
Explicit::driver<double,Kokkos::Cuda>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_nonlinear( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
|
||||
typedef Kokkos::Cuda device ;
|
||||
typedef FixtureElementHex8 hex8 ;
|
||||
HybridFEM::Nonlinear::driver<double,device,hex8>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
void test_cuda_nonlinear_quadratic( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
|
||||
typedef Kokkos::Cuda device ;
|
||||
typedef FixtureElementHex27 hex27 ;
|
||||
HybridFEM::Nonlinear::driver<double,device,hex27>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
|
||||
@ -1,137 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
// Must be included first on Intel-Phi systems due to
|
||||
// redefinition of SEEK_SET in <mpi.h>.
|
||||
|
||||
#include <ParallelComm.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <BoxMeshFixture.hpp>
|
||||
#include <TestBoxMeshFixture.hpp>
|
||||
#include <Implicit.hpp>
|
||||
#include <Nonlinear.hpp>
|
||||
#include <Explicit.hpp>
|
||||
#include <SparseLinearSystem.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_fixture( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t nx , size_t ny , size_t nz )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
test_box_fixture<Kokkos::HostSpace::execution_space>( machine , gang_count , nx , ny , nz );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_implicit( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
HybridFEM::Implicit::driver<double,Kokkos::HostSpace::execution_space>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_explicit( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
Explicit::driver<double,Kokkos::HostSpace::execution_space>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_nonlinear( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
typedef FixtureElementHex8 hex8 ;
|
||||
typedef Kokkos::HostSpace::execution_space device ;
|
||||
HybridFEM::Nonlinear::driver<double,device,hex8>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
void test_host_nonlinear_quadratic( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
typedef FixtureElementHex27 hex27 ;
|
||||
typedef Kokkos::HostSpace::execution_space device ;
|
||||
HybridFEM::Nonlinear::driver<double,device,hex27>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -1,348 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
// Must be included first on Intel-Phi systems due to
|
||||
// redefinition of SEEK_SET in <mpi.h>.
|
||||
|
||||
#include <ParallelComm.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <Kokkos_hwloc.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_box_partition( bool print );
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_fixture( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t nx , size_t ny , size_t nz );
|
||||
|
||||
void test_host_implicit( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_host_explicit( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_host_nonlinear( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_host_nonlinear_quadratic( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_query( comm::Machine );
|
||||
|
||||
void test_cuda_fixture( comm::Machine machine ,
|
||||
size_t nx , size_t ny , size_t nz );
|
||||
|
||||
void test_cuda_implicit( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_cuda_explicit( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_cuda_nonlinear( comm:: Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_cuda_nonlinear_quadratic( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
bool run_host( std::istream & input ,
|
||||
comm::Machine machine ,
|
||||
const size_t host_gang_count ,
|
||||
const size_t host_gang_worker_count )
|
||||
{
|
||||
bool cmd_error = false ;
|
||||
|
||||
std::string which ; input >> which ;
|
||||
|
||||
if ( which == std::string("fixture") ) {
|
||||
|
||||
size_t nx = 0 , ny = 0 , nz = 0 ;
|
||||
input >> nx >> ny >> nz ;
|
||||
test_host_fixture( machine , host_gang_count , host_gang_worker_count , nx , ny , nz );
|
||||
|
||||
}
|
||||
else if ( which == std::string("explicit") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_host_explicit( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("implicit") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_host_implicit( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("nonlinear") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_host_nonlinear( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("nonlinear_quadratic") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_host_nonlinear_quadratic( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else {
|
||||
cmd_error = true ;
|
||||
}
|
||||
|
||||
return cmd_error ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
bool run_cuda( std::istream & input , comm::Machine machine )
|
||||
{
|
||||
bool cmd_error = false ;
|
||||
|
||||
std::string which ; input >> which ;
|
||||
|
||||
if ( which == std::string("fixture") ) {
|
||||
|
||||
size_t nx = 0 , ny = 0 , nz = 0 ;
|
||||
input >> nx >> ny >> nz ;
|
||||
test_cuda_fixture( machine , nx , ny , nz );
|
||||
|
||||
}
|
||||
else if ( which == std::string("explicit") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_cuda_explicit( machine , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("implicit") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_cuda_implicit( machine , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("nonlinear") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_cuda_nonlinear( machine , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("nonlinear_quadratic") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_cuda_nonlinear_quadratic( machine , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else {
|
||||
cmd_error = true ;
|
||||
}
|
||||
|
||||
return cmd_error ;
|
||||
}
|
||||
#endif
|
||||
|
||||
void run( const std::string & argline , comm::Machine machine )
|
||||
{
|
||||
const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
|
||||
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
|
||||
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
|
||||
|
||||
std::istringstream input( argline );
|
||||
|
||||
bool cmd_error = false ;
|
||||
|
||||
std::string which ; input >> which ;
|
||||
|
||||
if ( which == std::string("query") ) {
|
||||
std::cout << "P" << comm::rank( machine )
|
||||
<< ": hwloc { NUMA[" << numa_count << "]"
|
||||
<< " CORE[" << cores_per_numa << "]"
|
||||
<< " PU[" << threads_per_core << "] }"
|
||||
<< std::endl ;
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
test_cuda_query( machine );
|
||||
#endif
|
||||
}
|
||||
else if ( which == std::string("partition") ) {
|
||||
if ( 0 == comm::rank( machine ) ) {
|
||||
test_box_partition( false /* print flag */ );
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ( which == std::string("host") ) {
|
||||
size_t host_gang_count = 0 ;
|
||||
size_t host_gang_worker_count = 1 ;
|
||||
|
||||
input >> host_gang_count ;
|
||||
input >> host_gang_worker_count ;
|
||||
|
||||
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
|
||||
}
|
||||
else if ( which == std::string("host-all") ) {
|
||||
size_t host_gang_count = numa_count ;
|
||||
size_t host_gang_worker_count = cores_per_numa * threads_per_core ;
|
||||
|
||||
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
|
||||
}
|
||||
else if ( which == std::string("host-most") ) {
|
||||
size_t host_gang_count = numa_count ;
|
||||
size_t host_gang_worker_count = ( cores_per_numa - 1 ) * threads_per_core ;
|
||||
|
||||
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
|
||||
}
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
else if ( which == std::string("cuda") ) {
|
||||
cmd_error = run_cuda( input , machine );
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
cmd_error = true ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( cmd_error && 0 == comm::rank( machine ) ) {
|
||||
std::cout << "Expecting command line with" << std::endl
|
||||
<< " query" << std::endl
|
||||
<< " partition" << std::endl
|
||||
<< " host NumNumaNode NumThreadPerNode <test>" << std::endl
|
||||
<< " host-all <test>" << std::endl
|
||||
<< " host-most <test>" << std::endl
|
||||
<< " cuda <test>" << std::endl
|
||||
<< "where <test> is" << std::endl
|
||||
<< " fixture NumElemX NumElemY NumElemZ" << std::endl
|
||||
<< " implicit NumElemBegin NumElemEnd NumRun" << std::endl
|
||||
<< " explicit NumElemBegin NumElemEnd NumRun" << std::endl
|
||||
<< " nonlinear NumElemBegin NumElemEnd NumRun" << std::endl
|
||||
<< " nonlinear_quadratic NumElemBegin NumElemEnd NumRun" << std::endl ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
int main( int argc , char ** argv )
|
||||
{
|
||||
comm::Machine machine = comm::Machine::init( & argc , & argv );
|
||||
|
||||
const unsigned comm_rank = comm::rank( machine );
|
||||
|
||||
const std::string argline = comm::command_line( machine , argc , argv );
|
||||
|
||||
try {
|
||||
run( argline , machine );
|
||||
}
|
||||
catch( const std::exception & x ) {
|
||||
std::cerr << "P" << comm_rank << " throw: " << x.what() << std::endl ;
|
||||
}
|
||||
catch( ... ) {
|
||||
std::cerr << "P" << comm_rank << " throw: unknown exception" << std::endl ;
|
||||
}
|
||||
|
||||
comm::Machine::finalize();
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
@ -1,14 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE(GLOB SOURCES *.cpp)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
query_device
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
)
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
KOKKOS_PATH ?= ../..
|
||||
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
SRC_DIR := $(dir $(MAKEFILE_PATH))
|
||||
|
||||
SRC = $(wildcard $(SRC_DIR)/*.cpp)
|
||||
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
|
||||
#SRC = $(wildcard *.cpp)
|
||||
#OBJ = $(SRC:%.cpp=%.o)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
# use installed Makefile.kokkos
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
#KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "OpenMP"
|
||||
#KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
LIB =
|
||||
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean:
|
||||
rm -f *.a *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,100 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
#include <mpi.h>
|
||||
#endif
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
int main( int argc , char ** argv )
|
||||
{
|
||||
std::ostringstream msg ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
MPI_Init( & argc , & argv );
|
||||
|
||||
int mpi_rank = 0 ;
|
||||
|
||||
MPI_Comm_rank( MPI_COMM_WORLD , & mpi_rank );
|
||||
|
||||
msg << "MPI rank(" << mpi_rank << ") " ;
|
||||
|
||||
#endif
|
||||
|
||||
msg << "{" << std::endl ;
|
||||
|
||||
if ( Kokkos::hwloc::available() ) {
|
||||
msg << "hwloc( NUMA[" << Kokkos::hwloc::get_available_numa_count()
|
||||
<< "] x CORE[" << Kokkos::hwloc::get_available_cores_per_numa()
|
||||
<< "] x HT[" << Kokkos::hwloc::get_available_threads_per_core()
|
||||
<< "] )"
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
Kokkos::Cuda::print_configuration( msg );
|
||||
#endif
|
||||
|
||||
msg << "}" << std::endl ;
|
||||
|
||||
std::cout << msg.str();
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
MPI_Finalize();
|
||||
|
||||
#endif
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
@ -1,15 +0,0 @@
|
||||
INCLUDE(TribitsAddExecutableAndTest)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE(GLOB SOURCES *.cpp)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
sort_array
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
)
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
KOKKOS_PATH ?= ../..
|
||||
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
SRC_DIR := $(dir $(MAKEFILE_PATH))
|
||||
|
||||
SRC = $(wildcard $(SRC_DIR)/*.cpp)
|
||||
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
|
||||
#SRC = $(wildcard *.cpp)
|
||||
#OBJ = $(SRC:%.cpp=%.o)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
# use installed Makefile.kokkos
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
#KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "OpenMP"
|
||||
#KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
LIB =
|
||||
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean:
|
||||
rm -f *.a *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,95 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <sort_array.hpp>
|
||||
|
||||
|
||||
int main( int argc , char ** argv )
|
||||
{
|
||||
#if defined( KOKKOS_HAVE_CUDA ) || defined( KOKKOS_HAVE_PTHREAD ) || defined( KOKKOS_HAVE_OPENMP )
|
||||
Kokkos::initialize( argc , argv );
|
||||
|
||||
int length_array = 100000 ;
|
||||
|
||||
for ( int i = 0 ; i < argc ; ++i ) {
|
||||
if ( 0 == strcmp( argv[i] , "length_array" ) ) {
|
||||
length_array = atoi( argv[i+1] );
|
||||
}
|
||||
}
|
||||
|
||||
int length_total_array = length_array * 100;
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
if ( Kokkos::Cuda::is_initialized() ) {
|
||||
std::cout << "Kokkos::Cuda" << std::endl ;
|
||||
Example::sort_array< Kokkos::Cuda >( length_array , length_total_array );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_PTHREAD )
|
||||
if ( Kokkos::Threads::is_initialized() ) {
|
||||
std::cout << "Kokkos::Threads" << std::endl ;
|
||||
Example::sort_array< Kokkos::Threads >( length_array , length_total_array );
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined( KOKKOS_HAVE_OPENMP )
|
||||
if ( Kokkos::OpenMP::is_initialized() ) {
|
||||
std::cout << "Kokkos::OpenMP" << std::endl ;
|
||||
Example::sort_array< Kokkos::OpenMP >( length_array , length_total_array );
|
||||
}
|
||||
#endif
|
||||
|
||||
Kokkos::finalize();
|
||||
#endif
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
@ -1,190 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef EXAMPLE_SORT_ARRAY
|
||||
#define EXAMPLE_SORT_ARRAY
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Example {
|
||||
|
||||
template< class Device >
|
||||
struct SortView {
|
||||
|
||||
template< typename ValueType >
|
||||
SortView( const Kokkos::View<ValueType*,Device> v , int begin , int end )
|
||||
{
|
||||
std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end );
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#if defined(KOKKOS_HAVE_CUDA)
|
||||
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/sort.h>
|
||||
|
||||
namespace Example {
|
||||
|
||||
template<>
|
||||
struct SortView< Kokkos::Cuda > {
|
||||
template< typename ValueType >
|
||||
SortView( const Kokkos::View<ValueType*,Kokkos::Cuda> v , int begin , int end )
|
||||
{
|
||||
thrust::sort( thrust::device_ptr<ValueType>( v.ptr_on_device() + begin )
|
||||
, thrust::device_ptr<ValueType>( v.ptr_on_device() + end ) );
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Example {
|
||||
|
||||
template< class Device >
|
||||
void sort_array( const size_t array_length /* length of spans of array to sort */
|
||||
, const size_t total_length /* total length of array */
|
||||
, const int print = 1 )
|
||||
{
|
||||
typedef Device execution_space ;
|
||||
typedef Kokkos::View<int*,Device> device_array_type ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
typedef typename
|
||||
Kokkos::Impl::if_c< Kokkos::Impl::is_same< Device , Kokkos::Cuda >::value
|
||||
, Kokkos::View<int*,Kokkos::Cuda::array_layout,Kokkos::CudaHostPinnedSpace>
|
||||
, typename device_array_type::HostMirror
|
||||
>::type host_array_type ;
|
||||
|
||||
#else
|
||||
|
||||
typedef typename device_array_type::HostMirror host_array_type ;
|
||||
|
||||
#endif
|
||||
|
||||
Kokkos::Impl::Timer timer;
|
||||
|
||||
const device_array_type work_array("work_array" , array_length );
|
||||
const host_array_type host_array("host_array" , total_length );
|
||||
|
||||
std::cout << "sort_array length( " << total_length << " )"
|
||||
<< " in chunks( " << array_length << " )"
|
||||
<< std::endl ;
|
||||
|
||||
double sec = timer.seconds();
|
||||
std::cout << "declaring Views took "
|
||||
<< sec << " seconds" << std::endl;
|
||||
timer.reset();
|
||||
|
||||
for ( size_t i = 0 ; i < total_length ; ++i ) {
|
||||
host_array(i) = ( lrand48() * total_length ) >> 31 ;
|
||||
}
|
||||
|
||||
sec = timer.seconds();
|
||||
std::cout << "initializing " << total_length << " elements on host took "
|
||||
<< sec << " seconds" << std::endl;
|
||||
timer.reset();
|
||||
|
||||
double sec_copy_in = 0 ;
|
||||
double sec_sort = 0 ;
|
||||
double sec_copy_out = 0 ;
|
||||
double sec_error = 0 ;
|
||||
size_t error_count = 0 ;
|
||||
|
||||
for ( size_t begin = 0 ; begin < total_length ; begin += array_length ) {
|
||||
|
||||
const size_t end = begin + array_length < total_length
|
||||
? begin + array_length : total_length ;
|
||||
|
||||
const std::pair<size_t,size_t> host_range(begin,end);
|
||||
|
||||
const host_array_type host_subarray = Kokkos::subview( host_array , host_range );
|
||||
|
||||
timer.reset();
|
||||
|
||||
Kokkos::deep_copy( work_array , host_subarray );
|
||||
|
||||
sec_copy_in += timer.seconds(); timer.reset();
|
||||
|
||||
SortView< execution_space >( work_array , 0 , end - begin );
|
||||
|
||||
sec_sort += timer.seconds(); timer.reset();
|
||||
|
||||
Kokkos::deep_copy( host_subarray , work_array );
|
||||
|
||||
sec_copy_out += timer.seconds(); timer.reset();
|
||||
|
||||
for ( size_t i = begin + 1 ; i < end ; ++i ) {
|
||||
if ( host_array(i) < host_array(i-1) ) ++error_count ;
|
||||
}
|
||||
|
||||
sec_error += timer.seconds(); timer.reset();
|
||||
}
|
||||
|
||||
std::cout << "copy to device " << sec_copy_in << " seconds" << std::endl
|
||||
<< "sort on device " << sec_sort << " seconds" << std::endl
|
||||
<< "copy from device " << sec_copy_out << " seconds" << std::endl
|
||||
<< "errors " << error_count << " took " << sec_error << " seconds" << std::endl
|
||||
;
|
||||
}
|
||||
|
||||
} // namespace Example
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef EXAMPLE_SORT_ARRAY */
|
||||
|
||||
@ -1,11 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
# This is a tutorial, not a test, so we don't ask CTest to run it.
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
tutorial_01_hello_world
|
||||
SOURCES hello_world.cpp
|
||||
COMM serial mpi
|
||||
)
|
||||
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,130 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
#include <typeinfo>
|
||||
|
||||
//
|
||||
// "Hello world" parallel_for example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Execute a parallel for loop in the default execution space,
|
||||
// using a functor to define the loop body
|
||||
// 3. Shut down Kokkos
|
||||
//
|
||||
// If Kokkos was built with C++11 enabled, try comparing this example
|
||||
// to 01_hello_world_lambda. The latter uses C++11 lambdas (anonymous
|
||||
// functions) to define the loop body of the parallel_for. That makes
|
||||
// the code much more concise and readable. On the other hand,
|
||||
// breaking out the loop body into an explicit functor makes it easier
|
||||
// to test the loop independently of the parallel pattern.
|
||||
//
|
||||
|
||||
// Functor that defines the parallel_for's loop body.
|
||||
//
|
||||
// A "functor" is just a class or struct with a public operator()
|
||||
// instance method.
|
||||
struct hello_world {
|
||||
// If a functor has an "execution_space" (or "execution_space", for
|
||||
// backwards compatibility) public typedef, parallel_* will only run
|
||||
// the functor in that execution space. That's a good way to mark a
|
||||
// functor as specific to an execution space. If the functor lacks
|
||||
// this typedef, parallel_for will run it in the default execution
|
||||
// space, unless you tell it otherwise (that's an advanced topic;
|
||||
// see "execution policies").
|
||||
|
||||
// The functor's operator() defines the loop body. It takes an
|
||||
// integer argument which is the parallel for loop index. Other
|
||||
// arguments are possible; see the "hierarchical parallelism" part
|
||||
// of the tutorial.
|
||||
//
|
||||
// The operator() method must be const, and must be marked with the
|
||||
// KOKKOS_INLINE_FUNCTION macro. If building with CUDA, this macro
|
||||
// will mark your method as suitable for running on the CUDA device
|
||||
// (as well as on the host). If not building with CUDA, the macro
|
||||
// is unnecessary but harmless.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int i) const {
|
||||
printf ("Hello from i = %i\n", i);
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
// You must call initialize() before you may call Kokkos.
|
||||
//
|
||||
// With no arguments, this initializes the default execution space
|
||||
// (and potentially its host execution space) with default
|
||||
// parameters. You may also pass in argc and argv, analogously to
|
||||
// MPI_Init(). It reads and removes command-line arguments that
|
||||
// start with "--kokkos-".
|
||||
Kokkos::initialize (argc, argv);
|
||||
|
||||
// Print the name of Kokkos' default execution space. We're using
|
||||
// typeid here, so the name might get a bit mangled by the linker,
|
||||
// but you should still be able to figure out what it is.
|
||||
printf ("Hello World on Kokkos execution space %s\n",
|
||||
typeid (Kokkos::DefaultExecutionSpace).name ());
|
||||
|
||||
// Run the above functor on the default Kokkos execution space in
|
||||
// parallel, with a parallel for loop count of 15.
|
||||
//
|
||||
// The Kokkos::DefaultExecutionSpace typedef gives the default
|
||||
// execution space. Depending on how Kokkos was configured, this
|
||||
// could be OpenMP, Threads, Cuda, Serial, or even some other
|
||||
// execution space.
|
||||
//
|
||||
// The following line of code would look like this in OpenMP:
|
||||
//
|
||||
// #pragma omp parallel for
|
||||
// for (int i = 0; i < 15; ++i) {
|
||||
// printf ("Hello from i = %i\n", i);
|
||||
// }
|
||||
//
|
||||
// You may notice that the printed numbers do not print out in
|
||||
// order. Parallel for loops may execute in any order.
|
||||
Kokkos::parallel_for ("HelloWorld",15, hello_world ());
|
||||
|
||||
// You must call finalize() after you are done using Kokkos.
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,13 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
IF (Kokkos_ENABLE_CXX11)
|
||||
# This is a tutorial, not a test, so we don't ask CTest to run it.
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
tutorial_01_hello_world_lambda
|
||||
SOURCES hello_world_lambda.cpp
|
||||
COMM serial mpi
|
||||
)
|
||||
ENDIF ()
|
||||
|
||||
@ -1,44 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,109 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
#include <typeinfo>
|
||||
|
||||
//
|
||||
// "Hello world" parallel_for example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Execute a parallel for loop in the default execution space,
|
||||
// using a C++11 lambda to define the loop body
|
||||
// 3. Shut down Kokkos
|
||||
//
|
||||
// This example only builds if C++11 is enabled. Compare this example
|
||||
// to 01_hello_world, which uses functors (explicitly defined classes)
|
||||
// to define the loop body of the parallel_for. Both functors and
|
||||
// lambdas have their places.
|
||||
//
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
// You must call initialize() before you may call Kokkos.
|
||||
//
|
||||
// With no arguments, this initializes the default execution space
|
||||
// (and potentially its host execution space) with default
|
||||
// parameters. You may also pass in argc and argv, analogously to
|
||||
// MPI_Init(). It reads and removes command-line arguments that
|
||||
// start with "--kokkos-".
|
||||
Kokkos::initialize (argc, argv);
|
||||
|
||||
// Print the name of Kokkos' default execution space. We're using
|
||||
// typeid here, so the name might get a bit mangled by the linker,
|
||||
// but you should still be able to figure out what it is.
|
||||
printf ("Hello World on Kokkos execution space %s\n",
|
||||
typeid (Kokkos::DefaultExecutionSpace).name ());
|
||||
|
||||
// Run lambda on the default Kokkos execution space in parallel,
|
||||
// with a parallel for loop count of 15. The lambda's argument is
|
||||
// an integer which is the parallel for's loop index. As you learn
|
||||
// about different kinds of parallelism, you will find out that
|
||||
// there are other valid argument types as well.
|
||||
//
|
||||
// For a single level of parallelism, we prefer that you use the
|
||||
// KOKKOS_LAMBDA macro. If CUDA is disabled, this just turns into
|
||||
// [=]. That captures variables from the surrounding scope by
|
||||
// value. Do NOT capture them by reference! If CUDA is enabled,
|
||||
// this macro may have a special definition that makes the lambda
|
||||
// work correctly with CUDA. Compare to the KOKKOS_INLINE_FUNCTION
|
||||
// macro, which has a special meaning if CUDA is enabled.
|
||||
//
|
||||
// The following parallel_for would look like this if we were using
|
||||
// OpenMP by itself, instead of Kokkos:
|
||||
//
|
||||
// #pragma omp parallel for
|
||||
// for (int i = 0; i < 15; ++i) {
|
||||
// printf ("Hello from i = %i\n", i);
|
||||
// }
|
||||
//
|
||||
// You may notice that the printed numbers do not print out in
|
||||
// order. Parallel for loops may execute in any order.
|
||||
Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) {
|
||||
// printf works in a CUDA parallel kernel; std::ostream does not.
|
||||
printf ("Hello from i = %i\n", i);
|
||||
});
|
||||
|
||||
// You must call finalize() after you are done using Kokkos.
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,10 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
# This is a tutorial, not a test, so we don't ask CTest to run it.
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
tutorial_02_simple_reduce
|
||||
SOURCES simple_reduce.cpp
|
||||
COMM serial mpi
|
||||
)
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,101 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
//
|
||||
// First reduction (parallel_reduce) example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Execute a parallel_reduce loop in the default execution space,
|
||||
// using a functor to define the loop body
|
||||
// 3. Shut down Kokkos
|
||||
//
|
||||
// Compare this example to 02_simple_reduce_lambda, which uses a C++11
|
||||
// lambda to define the loop body of the parallel_reduce.
|
||||
//
|
||||
|
||||
// Reduction functor for computing the sum of squares.
|
||||
//
|
||||
// More advanced reduction examples will show how to control the
|
||||
// reduction's "join" operator. If the join operator is not provided,
|
||||
// it defaults to binary operator+ (adding numbers together).
|
||||
struct squaresum {
|
||||
// Specify the type of the reduction value with a "value_type"
|
||||
// typedef. In this case, the reduction value has type int.
|
||||
typedef int value_type;
|
||||
|
||||
// The reduction functor's operator() looks a little different than
|
||||
// the parallel_for functor's operator(). For the reduction, we
|
||||
// pass in both the loop index i, and the intermediate reduction
|
||||
// value lsum. The latter MUST be passed in by nonconst reference.
|
||||
// (If the reduction type is an array like int[], indicating an
|
||||
// array reduction result, then the second argument is just int[].)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator () (const int i, int& lsum) const {
|
||||
lsum += i*i; // compute the sum of squares
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
Kokkos::initialize (argc, argv);
|
||||
const int n = 10;
|
||||
|
||||
// Compute the sum of squares of integers from 0 to n-1, in
|
||||
// parallel, using Kokkos.
|
||||
int sum = 0;
|
||||
Kokkos::parallel_reduce (n, squaresum (), sum);
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed in parallel, is %i\n", n - 1, sum);
|
||||
|
||||
// Compare to a sequential loop.
|
||||
int seqSum = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
seqSum += i*i;
|
||||
}
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed sequentially, is %i\n", n - 1, seqSum);
|
||||
Kokkos::finalize ();
|
||||
return (sum == seqSum) ? 0 : -1;
|
||||
}
|
||||
|
||||
@ -1,12 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
IF (Kokkos_ENABLE_CXX11)
|
||||
# This is a tutorial, not a test, so we don't ask CTest to run it.
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
tutorial_02_simple_reduce_lambda
|
||||
SOURCES simple_reduce_lambda.cpp
|
||||
COMM serial mpi
|
||||
)
|
||||
ENDIF ()
|
||||
@ -1,44 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,86 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
//
|
||||
// First reduction (parallel_reduce) example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Execute a parallel_reduce loop in the default execution space,
|
||||
// using a C++11 lambda to define the loop body
|
||||
// 3. Shut down Kokkos
|
||||
//
|
||||
// This example only builds if C++11 is enabled. Compare this example
|
||||
// to 02_simple_reduce, which uses a functor to define the loop body
|
||||
// of the parallel_reduce.
|
||||
//
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
Kokkos::initialize (argc, argv);
|
||||
const int n = 10;
|
||||
|
||||
// Compute the sum of squares of integers from 0 to n-1, in
|
||||
// parallel, using Kokkos. This time, use a lambda instead of a
|
||||
// functor. The lambda takes the same arguments as the functor's
|
||||
// operator().
|
||||
int sum = 0;
|
||||
// The KOKKOS_LAMBDA macro replaces the capture-by-value clause [=].
|
||||
// It also handles any other syntax needed for CUDA.
|
||||
Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) {
|
||||
lsum += i*i;
|
||||
}, sum);
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed in parallel, is %i\n", n - 1, sum);
|
||||
|
||||
// Compare to a sequential loop.
|
||||
int seqSum = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
seqSum += i*i;
|
||||
}
|
||||
printf ("Sum of squares of integers from 0 to %i, "
|
||||
"computed sequentially, is %i\n", n - 1, seqSum);
|
||||
Kokkos::finalize ();
|
||||
return (sum == seqSum) ? 0 : -1;
|
||||
}
|
||||
|
||||
@ -1,10 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
# This is a tutorial, not a test, so we don't ask CTest to run it.
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
tutorial_03_simple_view
|
||||
SOURCES simple_view.cpp
|
||||
COMM serial mpi
|
||||
)
|
||||
@ -1,43 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
@ -1,142 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
//
|
||||
// First Kokkos::View (multidimensional array) example:
|
||||
// 1. Start up Kokkos
|
||||
// 2. Allocate a Kokkos::View
|
||||
// 3. Execute a parallel_for and a parallel_reduce over that View's data
|
||||
// 4. Shut down Kokkos
|
||||
//
|
||||
// Compare this example to 03_simple_view_lambda, which uses C++11
|
||||
// lambdas to define the loop bodies of the parallel_for and
|
||||
// parallel_reduce.
|
||||
//
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <cstdio>
|
||||
|
||||
// A Kokkos::View is an array of zero or more dimensions. The number
|
||||
// of dimensions is specified at compile time, as part of the type of
|
||||
// the View. This array has two dimensions. The first one
|
||||
// (represented by the asterisk) is a run-time dimension, and the
|
||||
// second (represented by [3]) is a compile-time dimension. Thus,
|
||||
// this View type is an N x 3 array of type double, where N is
|
||||
// specified at run time in the View's constructor.
|
||||
//
|
||||
// The first dimension of the View is the dimension over which it is
|
||||
// efficient for Kokkos to parallelize.
|
||||
typedef Kokkos::View<double*[3]> view_type;
|
||||
|
||||
// parallel_for functor that fills the View given to its constructor.
|
||||
// The View must already have been allocated.
|
||||
struct InitView {
|
||||
view_type a;
|
||||
|
||||
// Views have "view semantics." This means that they behave like
|
||||
// pointers, not like std::vector. Their copy constructor and
|
||||
// operator= only do shallow copies. Thus, you can pass View
|
||||
// objects around by "value"; they won't do a deep copy unless you
|
||||
// explicitly ask for a deep copy.
|
||||
InitView (view_type a_) :
|
||||
a (a_)
|
||||
{}
|
||||
|
||||
// Fill the View with some data. The parallel_for loop will iterate
|
||||
// over the View's first dimension N.
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator () (const int i) const {
|
||||
// Acesss the View just like a Fortran array. The layout depends
|
||||
// on the View's memory space, so don't rely on the View's
|
||||
// physical memory layout unless you know what you're doing.
|
||||
a(i,0) = 1.0*i;
|
||||
a(i,1) = 1.0*i*i;
|
||||
a(i,2) = 1.0*i*i*i;
|
||||
}
|
||||
};
|
||||
|
||||
// Reduction functor that reads the View given to its constructor.
|
||||
struct ReduceFunctor {
|
||||
view_type a;
|
||||
|
||||
// Constructor takes View by "value"; this does a shallow copy.
|
||||
ReduceFunctor (view_type a_) : a (a_) {}
|
||||
|
||||
// If you write a functor to do a reduction, you must specify the
|
||||
// type of the reduction result via a public 'value_type' typedef.
|
||||
typedef double value_type;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (int i, double &lsum) const {
|
||||
lsum += a(i,0)*a(i,1)/(a(i,2)+0.1);
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
Kokkos::initialize (argc, argv);
|
||||
const int N = 10;
|
||||
|
||||
// Allocate the View. The first dimension is a run-time parameter
|
||||
// N. We set N = 10 here. The second dimension is a compile-time
|
||||
// parameter, 3. We don't specify it here because we already set it
|
||||
// by declaring the type of the View.
|
||||
//
|
||||
// Views get initialized to zero by default. This happens in
|
||||
// parallel, using the View's memory space's default execution
|
||||
// space. Parallel initialization ensures first-touch allocation.
|
||||
// There is a way to shut off default initialization.
|
||||
//
|
||||
// You may NOT allocate a View inside of a parallel_{for, reduce,
|
||||
// scan}. Treat View allocation as a "thread collective."
|
||||
//
|
||||
// The string "A" is just the label; it only matters for debugging.
|
||||
// Different Views may have the same label.
|
||||
view_type a ("A", N);
|
||||
|
||||
Kokkos::parallel_for (N, InitView (a));
|
||||
double sum = 0;
|
||||
Kokkos::parallel_reduce (N, ReduceFunctor (a), sum);
|
||||
printf ("Result: %f\n", sum);
|
||||
Kokkos::finalize ();
|
||||
}
|
||||
|
||||
@ -1,12 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
IF (Kokkos_ENABLE_CXX11)
|
||||
# This is a tutorial, not a test, so we don't ask CTest to run it.
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
tutorial_03_simple_view_lambda
|
||||
SOURCES simple_view_lambda.cpp
|
||||
COMM serial mpi
|
||||
)
|
||||
ENDIF ()
|
||||
@ -1,44 +0,0 @@
|
||||
KOKKOS_PATH = ../../..
|
||||
SRC = $(wildcard *.cpp)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = nvcc_wrapper
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.cuda)
|
||||
KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
KOKKOS_ARCH = "SNB,Kepler35"
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -O3
|
||||
LINK = ${CXX}
|
||||
LINKFLAGS =
|
||||
EXE = $(SRC:.cpp=.host)
|
||||
KOKKOS_DEVICES = "OpenMP"
|
||||
KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
LIB =
|
||||
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean: kokkos-clean
|
||||
rm -f *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user