Updating kokkos lib
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14918 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -1,610 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_BOXMESHFIXTURE_HPP
|
||||
#define KOKKOS_BOXMESHFIXTURE_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <BoxMeshPartition.hpp>
|
||||
#include <FEMesh.hpp>
|
||||
#include <HexElement.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
struct FixtureElementHex8 {
|
||||
|
||||
static const unsigned element_node_count = 8 ;
|
||||
|
||||
HybridFEM::HexElement_TensorData< element_node_count > elem_data ;
|
||||
BoxBoundsLinear box_bounds ;
|
||||
|
||||
FixtureElementHex8() : elem_data(), box_bounds() {}
|
||||
|
||||
static void create_node_boxes_from_vertex_boxes(
|
||||
const BoxType & vertex_box_global ,
|
||||
const std::vector< BoxType > & vertex_box_parts ,
|
||||
BoxType & node_box_global ,
|
||||
std::vector< BoxType > & node_box_parts )
|
||||
{
|
||||
node_box_global = vertex_box_global ;
|
||||
node_box_parts = vertex_box_parts ;
|
||||
}
|
||||
|
||||
void elem_to_node( const unsigned node_local , unsigned coord[] ) const
|
||||
{
|
||||
coord[0] += elem_data.eval_map[ node_local ][0] ;
|
||||
coord[1] += elem_data.eval_map[ node_local ][1] ;
|
||||
coord[2] += elem_data.eval_map[ node_local ][2] ;
|
||||
}
|
||||
};
|
||||
|
||||
struct FixtureElementHex27 {
|
||||
static const unsigned element_node_count = 27 ;
|
||||
|
||||
HybridFEM::HexElement_TensorData< element_node_count > elem_data ;
|
||||
BoxBoundsQuadratic box_bounds ;
|
||||
|
||||
FixtureElementHex27() : elem_data(), box_bounds() {}
|
||||
|
||||
static void create_node_boxes_from_vertex_boxes(
|
||||
const BoxType & vertex_box_global ,
|
||||
const std::vector< BoxType > & vertex_box_parts ,
|
||||
BoxType & node_box_global ,
|
||||
std::vector< BoxType > & node_box_parts )
|
||||
{
|
||||
node_box_global = vertex_box_global ;
|
||||
node_box_parts = vertex_box_parts ;
|
||||
|
||||
node_box_global[0][1] = 2 * node_box_global[0][1] - 1 ;
|
||||
node_box_global[1][1] = 2 * node_box_global[1][1] - 1 ;
|
||||
node_box_global[2][1] = 2 * node_box_global[2][1] - 1 ;
|
||||
|
||||
for ( unsigned i = 0 ; i < vertex_box_parts.size() ; ++i ) {
|
||||
node_box_parts[i][0][0] = 2 * node_box_parts[i][0][0] ;
|
||||
node_box_parts[i][1][0] = 2 * node_box_parts[i][1][0] ;
|
||||
node_box_parts[i][2][0] = 2 * node_box_parts[i][2][0] ;
|
||||
|
||||
node_box_parts[i][0][1] =
|
||||
std::min( node_box_global[0][1] , 2 * node_box_parts[i][0][1] );
|
||||
node_box_parts[i][1][1] =
|
||||
std::min( node_box_global[1][1] , 2 * node_box_parts[i][1][1] );
|
||||
node_box_parts[i][2][1] =
|
||||
std::min( node_box_global[2][1] , 2 * node_box_parts[i][2][1] );
|
||||
}
|
||||
}
|
||||
|
||||
void elem_to_node( const unsigned node_local , unsigned coord[] ) const
|
||||
{
|
||||
coord[0] = 2 * coord[0] + elem_data.eval_map[ node_local ][0] ;
|
||||
coord[1] = 2 * coord[1] + elem_data.eval_map[ node_local ][1] ;
|
||||
coord[2] = 2 * coord[2] + elem_data.eval_map[ node_local ][2] ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class Device , class ElementSpec >
|
||||
struct BoxMeshFixture {
|
||||
|
||||
typedef Scalar coordinate_scalar_type ;
|
||||
typedef Device execution_space ;
|
||||
|
||||
static const unsigned element_node_count = ElementSpec::element_node_count ;
|
||||
|
||||
typedef HybridFEM::FEMesh< coordinate_scalar_type ,
|
||||
element_node_count ,
|
||||
execution_space > FEMeshType ;
|
||||
|
||||
typedef typename FEMeshType::node_coords_type node_coords_type ;
|
||||
typedef typename FEMeshType::elem_node_ids_type elem_node_ids_type ;
|
||||
typedef typename FEMeshType::node_elem_ids_type node_elem_ids_type ;
|
||||
|
||||
|
||||
static void verify(
|
||||
const typename FEMeshType::node_coords_type::HostMirror & node_coords ,
|
||||
const typename FEMeshType::elem_node_ids_type::HostMirror & elem_node_ids ,
|
||||
const typename FEMeshType::node_elem_ids_type::HostMirror & node_elem_ids )
|
||||
{
|
||||
typedef typename FEMeshType::size_type size_type ;
|
||||
//typedef typename node_coords_type::value_type coords_type ; // unused
|
||||
|
||||
const size_type node_count_total = node_coords.dimension_0();
|
||||
const size_type elem_count_total = elem_node_ids.dimension_0();
|
||||
|
||||
const ElementSpec element ;
|
||||
|
||||
for ( size_type node_index = 0 ;
|
||||
node_index < node_count_total ; ++node_index ) {
|
||||
|
||||
for ( size_type
|
||||
j = node_elem_ids.row_map[ node_index ] ;
|
||||
j < node_elem_ids.row_map[ node_index + 1 ] ; ++j ) {
|
||||
|
||||
const size_type elem_index = node_elem_ids.entries(j,0);
|
||||
const size_type node_local = node_elem_ids.entries(j,1);
|
||||
const size_type en_id = elem_node_ids(elem_index,node_local);
|
||||
|
||||
if ( node_index != en_id ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "BoxMeshFixture node_elem_ids error"
|
||||
<< " : node_index(" << node_index
|
||||
<< ") entry(" << j
|
||||
<< ") elem_index(" << elem_index
|
||||
<< ") node_local(" << node_local
|
||||
<< ") elem_node_id(" << en_id
|
||||
<< ")" ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for ( size_type elem_index = 0 ;
|
||||
elem_index < elem_count_total; ++elem_index ) {
|
||||
|
||||
coordinate_scalar_type elem_node_coord[ element_node_count ][3] ;
|
||||
|
||||
for ( size_type nn = 0 ; nn < element_node_count ; ++nn ) {
|
||||
const size_type node_index = elem_node_ids( elem_index , nn );
|
||||
|
||||
for ( size_type nc = 0 ; nc < 3 ; ++nc ) {
|
||||
elem_node_coord[nn][nc] = node_coords( node_index , nc );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for ( size_type nn = 0 ; nn < element_node_count ; ++nn ) {
|
||||
|
||||
const unsigned ix = element.elem_data.eval_map[nn][0] ;
|
||||
const unsigned iy = element.elem_data.eval_map[nn][1] ;
|
||||
const unsigned iz = element.elem_data.eval_map[nn][2] ;
|
||||
|
||||
if ( elem_node_coord[nn][0] != elem_node_coord[0][0] + ix ||
|
||||
elem_node_coord[nn][1] != elem_node_coord[0][1] + iy ||
|
||||
elem_node_coord[nn][2] != elem_node_coord[0][2] + iz ) {
|
||||
|
||||
std::ostringstream msg ;
|
||||
msg << "BoxMeshFixture elem_node_coord mapping failure { "
|
||||
<< elem_node_coord[nn][0] << " "
|
||||
<< elem_node_coord[nn][1] << " "
|
||||
<< elem_node_coord[nn][2] << " } != { "
|
||||
<< elem_node_coord[ 0][0] + ix << " "
|
||||
<< elem_node_coord[ 0][1] + iy << " "
|
||||
<< elem_node_coord[ 0][2] + iz
|
||||
<< " }" ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Initialize element-node connectivity:
|
||||
// Order elements that only depend on owned nodes first.
|
||||
// These elements could be computed while waiting for
|
||||
// received node data.
|
||||
|
||||
static void layout_elements_interior_exterior(
|
||||
const BoxType vertex_box_local_used ,
|
||||
const BoxType vertex_box_local_owned ,
|
||||
const BoxType node_box_local_used ,
|
||||
const std::vector<size_t> & node_used_id_map ,
|
||||
const ElementSpec element_fixture ,
|
||||
const size_t elem_count_interior ,
|
||||
const typename elem_node_ids_type::HostMirror elem_node_ids )
|
||||
{
|
||||
size_t elem_index_interior = 0 ;
|
||||
size_t elem_index_boundary = elem_count_interior ;
|
||||
|
||||
for ( size_t iz = vertex_box_local_used[2][0] ;
|
||||
iz < vertex_box_local_used[2][1] - 1 ; ++iz ) {
|
||||
for ( size_t iy = vertex_box_local_used[1][0] ;
|
||||
iy < vertex_box_local_used[1][1] - 1 ; ++iy ) {
|
||||
for ( size_t ix = vertex_box_local_used[0][0] ;
|
||||
ix < vertex_box_local_used[0][1] - 1 ; ++ix ) {
|
||||
|
||||
size_t elem_index ;
|
||||
|
||||
// If lower and upper vertices are owned then element is interior
|
||||
if ( contain( vertex_box_local_owned, ix, iy, iz ) &&
|
||||
contain( vertex_box_local_owned, ix+1, iy+1, iz+1 ) ) {
|
||||
elem_index = elem_index_interior++ ;
|
||||
}
|
||||
else {
|
||||
elem_index = elem_index_boundary++ ;
|
||||
}
|
||||
|
||||
for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) {
|
||||
unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) };
|
||||
|
||||
element_fixture.elem_to_node( nn , coord );
|
||||
|
||||
const size_t node_local_id =
|
||||
box_map_id( node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
coord[0] , coord[1] , coord[2] );
|
||||
|
||||
elem_node_ids( elem_index , nn ) = node_local_id ;
|
||||
}
|
||||
}}}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Nested partitioning of elements by number of thread 'gangs'
|
||||
|
||||
static void layout_elements_partitioned(
|
||||
const BoxType vertex_box_local_used ,
|
||||
const BoxType /*vertex_box_local_owned*/ ,
|
||||
const BoxType node_box_local_used ,
|
||||
const std::vector<size_t> & node_used_id_map ,
|
||||
const ElementSpec element_fixture ,
|
||||
const size_t thread_gang_count ,
|
||||
const typename elem_node_ids_type::HostMirror elem_node_ids )
|
||||
{
|
||||
std::vector< BoxType > element_box_gangs( thread_gang_count );
|
||||
|
||||
BoxType element_box_local_used = vertex_box_local_used ;
|
||||
|
||||
element_box_local_used[0][1] -= 1 ;
|
||||
element_box_local_used[1][1] -= 1 ;
|
||||
element_box_local_used[2][1] -= 1 ;
|
||||
|
||||
box_partition_rcb( element_box_local_used , element_box_gangs );
|
||||
|
||||
size_t elem_index = 0 ;
|
||||
|
||||
for ( size_t ig = 0 ; ig < thread_gang_count ; ++ig ) {
|
||||
|
||||
const BoxType box = element_box_gangs[ig] ;
|
||||
|
||||
for ( size_t iz = box[2][0] ; iz < box[2][1] ; ++iz ) {
|
||||
for ( size_t iy = box[1][0] ; iy < box[1][1] ; ++iy ) {
|
||||
for ( size_t ix = box[0][0] ; ix < box[0][1] ; ++ix , ++elem_index ) {
|
||||
|
||||
for ( size_t nn = 0 ; nn < element_node_count ; ++nn ) {
|
||||
unsigned coord[3] = { static_cast<unsigned>(ix) , static_cast<unsigned>(iy) , static_cast<unsigned>(iz) };
|
||||
|
||||
element_fixture.elem_to_node( nn , coord );
|
||||
|
||||
const size_t node_local_id =
|
||||
box_map_id( node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
coord[0] , coord[1] , coord[2] );
|
||||
|
||||
elem_node_ids( elem_index , nn ) = node_local_id ;
|
||||
}
|
||||
}}}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static FEMeshType create( const size_t proc_count ,
|
||||
const size_t proc_local ,
|
||||
const size_t gang_count ,
|
||||
const size_t elems_x ,
|
||||
const size_t elems_y ,
|
||||
const size_t elems_z ,
|
||||
const double x_coord_curve = 1 ,
|
||||
const double y_coord_curve = 1 ,
|
||||
const double z_coord_curve = 1 )
|
||||
{
|
||||
const size_t vertices_x = elems_x + 1 ;
|
||||
const size_t vertices_y = elems_y + 1 ;
|
||||
const size_t vertices_z = elems_z + 1 ;
|
||||
|
||||
const BoxBoundsLinear vertex_box_bounds ;
|
||||
const ElementSpec element ;
|
||||
|
||||
// Partition based upon vertices:
|
||||
|
||||
BoxType vertex_box_global ;
|
||||
std::vector< BoxType > vertex_box_parts( proc_count );
|
||||
|
||||
vertex_box_global[0][0] = 0 ; vertex_box_global[0][1] = vertices_x ;
|
||||
vertex_box_global[1][0] = 0 ; vertex_box_global[1][1] = vertices_y ;
|
||||
vertex_box_global[2][0] = 0 ; vertex_box_global[2][1] = vertices_z ;
|
||||
|
||||
box_partition_rcb( vertex_box_global , vertex_box_parts );
|
||||
|
||||
const BoxType vertex_box_local_owned = vertex_box_parts[ proc_local ];
|
||||
|
||||
// Determine interior and used vertices:
|
||||
|
||||
BoxType vertex_box_local_interior ;
|
||||
BoxType vertex_box_local_used ;
|
||||
|
||||
vertex_box_bounds.apply( vertex_box_global ,
|
||||
vertex_box_local_owned ,
|
||||
vertex_box_local_interior ,
|
||||
vertex_box_local_used );
|
||||
|
||||
// Element counts:
|
||||
|
||||
const long local_elems_x =
|
||||
( vertex_box_local_used[0][1] - vertex_box_local_used[0][0] ) - 1 ;
|
||||
const long local_elems_y =
|
||||
( vertex_box_local_used[1][1] - vertex_box_local_used[1][0] ) - 1 ;
|
||||
const long local_elems_z =
|
||||
( vertex_box_local_used[2][1] - vertex_box_local_used[2][0] ) - 1 ;
|
||||
|
||||
const size_t elem_count_total = std::max( long(0) , local_elems_x ) *
|
||||
std::max( long(0) , local_elems_y ) *
|
||||
std::max( long(0) , local_elems_z );
|
||||
|
||||
const long interior_elems_x =
|
||||
( vertex_box_local_owned[0][1] - vertex_box_local_owned[0][0] ) - 1 ;
|
||||
const long interior_elems_y =
|
||||
( vertex_box_local_owned[1][1] - vertex_box_local_owned[1][0] ) - 1 ;
|
||||
const long interior_elems_z =
|
||||
( vertex_box_local_owned[2][1] - vertex_box_local_owned[2][0] ) - 1 ;
|
||||
|
||||
const size_t elem_count_interior = std::max( long(0) , interior_elems_x ) *
|
||||
std::max( long(0) , interior_elems_y ) *
|
||||
std::max( long(0) , interior_elems_z );
|
||||
|
||||
// Expand vertex boxes to node boxes:
|
||||
|
||||
BoxType node_box_global ;
|
||||
BoxType node_box_local_used ;
|
||||
std::vector< BoxType > node_box_parts ;
|
||||
|
||||
element.create_node_boxes_from_vertex_boxes(
|
||||
vertex_box_global , vertex_box_parts ,
|
||||
node_box_global , node_box_parts );
|
||||
|
||||
// Node communication maps:
|
||||
|
||||
size_t node_count_interior = 0 ;
|
||||
size_t node_count_owned = 0 ;
|
||||
size_t node_count_total = 0 ;
|
||||
std::vector<size_t> node_used_id_map ;
|
||||
std::vector<size_t> node_part_counts ;
|
||||
std::vector< std::vector<size_t> > node_send_map ;
|
||||
|
||||
box_partition_maps( node_box_global ,
|
||||
node_box_parts ,
|
||||
element.box_bounds ,
|
||||
proc_local ,
|
||||
node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
node_count_interior ,
|
||||
node_count_owned ,
|
||||
node_count_total ,
|
||||
node_part_counts ,
|
||||
node_send_map );
|
||||
|
||||
size_t node_count_send = 0 ;
|
||||
for ( size_t i = 0 ; i < node_send_map.size() ; ++i ) {
|
||||
node_count_send += node_send_map[i].size();
|
||||
}
|
||||
|
||||
size_t recv_msg_count = 0 ;
|
||||
size_t send_msg_count = 0 ;
|
||||
size_t send_count = 0 ;
|
||||
|
||||
for ( size_t i = 1 ; i < proc_count ; ++i ) {
|
||||
if ( node_part_counts[i] ) ++recv_msg_count ;
|
||||
if ( node_send_map[i].size() ) {
|
||||
++send_msg_count ;
|
||||
send_count += node_send_map[i].size();
|
||||
}
|
||||
}
|
||||
|
||||
// Finite element mesh:
|
||||
|
||||
FEMeshType mesh ;
|
||||
|
||||
if ( node_count_total ) {
|
||||
mesh.node_coords = node_coords_type( "node_coords", node_count_total );
|
||||
}
|
||||
|
||||
if ( elem_count_total ) {
|
||||
mesh.elem_node_ids =
|
||||
elem_node_ids_type( "elem_node_ids", elem_count_total );
|
||||
}
|
||||
|
||||
mesh.parallel_data_map.assign( node_count_interior ,
|
||||
node_count_owned ,
|
||||
node_count_total ,
|
||||
recv_msg_count ,
|
||||
send_msg_count ,
|
||||
send_count );
|
||||
|
||||
typename node_coords_type::HostMirror node_coords =
|
||||
Kokkos::create_mirror( mesh.node_coords );
|
||||
|
||||
typename elem_node_ids_type::HostMirror elem_node_ids =
|
||||
Kokkos::create_mirror( mesh.elem_node_ids );
|
||||
|
||||
//------------------------------------
|
||||
// set node coordinates to grid location for subsequent verification
|
||||
|
||||
for ( size_t iz = node_box_local_used[2][0] ;
|
||||
iz < node_box_local_used[2][1] ; ++iz ) {
|
||||
|
||||
for ( size_t iy = node_box_local_used[1][0] ;
|
||||
iy < node_box_local_used[1][1] ; ++iy ) {
|
||||
|
||||
for ( size_t ix = node_box_local_used[0][0] ;
|
||||
ix < node_box_local_used[0][1] ; ++ix ) {
|
||||
|
||||
const size_t node_local_id =
|
||||
box_map_id( node_box_local_used , node_used_id_map , ix , iy , iz );
|
||||
|
||||
node_coords( node_local_id , 0 ) = ix ;
|
||||
node_coords( node_local_id , 1 ) = iy ;
|
||||
node_coords( node_local_id , 2 ) = iz ;
|
||||
}}}
|
||||
|
||||
//------------------------------------
|
||||
// Initialize element-node connectivity:
|
||||
|
||||
if ( 1 < gang_count ) {
|
||||
layout_elements_partitioned( vertex_box_local_used ,
|
||||
vertex_box_local_owned ,
|
||||
node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
element ,
|
||||
gang_count ,
|
||||
elem_node_ids );
|
||||
}
|
||||
else {
|
||||
layout_elements_interior_exterior( vertex_box_local_used ,
|
||||
vertex_box_local_owned ,
|
||||
node_box_local_used ,
|
||||
node_used_id_map ,
|
||||
element ,
|
||||
elem_count_interior ,
|
||||
elem_node_ids );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Populate node->element connectivity:
|
||||
|
||||
std::vector<size_t> node_elem_work( node_count_total , (size_t) 0 );
|
||||
|
||||
for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
|
||||
for ( size_t n = 0 ; n < element_node_count ; ++n ) {
|
||||
++node_elem_work[ elem_node_ids(i,n) ];
|
||||
}
|
||||
}
|
||||
|
||||
mesh.node_elem_ids =
|
||||
Kokkos::create_staticcrsgraph< node_elem_ids_type >( "node_elem_ids" , node_elem_work );
|
||||
|
||||
typename node_elem_ids_type::HostMirror
|
||||
node_elem_ids = Kokkos::create_mirror( mesh.node_elem_ids );
|
||||
|
||||
for ( size_t i = 0 ; i < node_count_total ; ++i ) {
|
||||
node_elem_work[i] = node_elem_ids.row_map[i];
|
||||
}
|
||||
|
||||
// Looping in element order insures the list of elements
|
||||
// is sorted by element index.
|
||||
|
||||
for ( size_t i = 0 ; i < elem_count_total ; ++i ) {
|
||||
for ( size_t n = 0 ; n < element_node_count ; ++n ) {
|
||||
const unsigned nid = elem_node_ids(i, n);
|
||||
const unsigned j = node_elem_work[nid] ; ++node_elem_work[nid] ;
|
||||
|
||||
node_elem_ids.entries( j , 0 ) = i ;
|
||||
node_elem_ids.entries( j , 1 ) = n ;
|
||||
}
|
||||
}
|
||||
//------------------------------------
|
||||
// Verify setup with node coordinates matching grid indices.
|
||||
verify( node_coords , elem_node_ids , node_elem_ids );
|
||||
|
||||
//------------------------------------
|
||||
// Scale node coordinates to problem extent with
|
||||
// nonlinear mapping.
|
||||
{
|
||||
const double problem_extent[3] =
|
||||
{ static_cast<double>( vertex_box_global[0][1] - 1 ) ,
|
||||
static_cast<double>( vertex_box_global[1][1] - 1 ) ,
|
||||
static_cast<double>( vertex_box_global[2][1] - 1 ) };
|
||||
|
||||
const double grid_extent[3] =
|
||||
{ static_cast<double>( node_box_global[0][1] - 1 ) ,
|
||||
static_cast<double>( node_box_global[1][1] - 1 ) ,
|
||||
static_cast<double>( node_box_global[2][1] - 1 ) };
|
||||
|
||||
for ( size_t i = 0 ; i < node_count_total ; ++i ) {
|
||||
const double x_unit = node_coords(i,0) / grid_extent[0] ;
|
||||
const double y_unit = node_coords(i,1) / grid_extent[1] ;
|
||||
const double z_unit = node_coords(i,2) / grid_extent[2] ;
|
||||
|
||||
node_coords(i,0) = coordinate_scalar_type( problem_extent[0] * std::pow( x_unit , x_coord_curve ) );
|
||||
node_coords(i,1) = coordinate_scalar_type( problem_extent[1] * std::pow( y_unit , y_coord_curve ) );
|
||||
node_coords(i,2) = coordinate_scalar_type( problem_extent[2] * std::pow( z_unit , z_coord_curve ) );
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::deep_copy( mesh.node_coords , node_coords );
|
||||
Kokkos::deep_copy( mesh.elem_node_ids , elem_node_ids );
|
||||
Kokkos::deep_copy( mesh.node_elem_ids.entries , node_elem_ids.entries );
|
||||
|
||||
//------------------------------------
|
||||
// Communication lists:
|
||||
{
|
||||
recv_msg_count = 0 ;
|
||||
send_msg_count = 0 ;
|
||||
send_count = 0 ;
|
||||
|
||||
for ( size_t i = 1 ; i < proc_count ; ++i ) {
|
||||
|
||||
// Order sending starting with the local processor rank
|
||||
// to try to smooth out the amount of messages simultaneously
|
||||
// send to a particular processor.
|
||||
|
||||
const int proc = ( proc_local + i ) % proc_count ;
|
||||
if ( node_part_counts[i] ) {
|
||||
mesh.parallel_data_map.host_recv(recv_msg_count,0) = proc ;
|
||||
mesh.parallel_data_map.host_recv(recv_msg_count,1) = node_part_counts[i] ;
|
||||
++recv_msg_count ;
|
||||
}
|
||||
if ( node_send_map[i].size() ) {
|
||||
mesh.parallel_data_map.host_send(send_msg_count,0) = proc ;
|
||||
mesh.parallel_data_map.host_send(send_msg_count,1) = node_send_map[i].size() ;
|
||||
for ( size_t j = 0 ; j < node_send_map[i].size() ; ++j , ++send_count ) {
|
||||
mesh.parallel_data_map.host_send_item(send_count) = node_send_map[i][j] - node_count_interior ;
|
||||
}
|
||||
++send_msg_count ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return mesh ;
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_BOXMESHFIXTURE_HPP */
|
||||
|
||||
|
||||
@ -1,381 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <BoxMeshPartition.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
void box_partition( size_t ip , size_t up ,
|
||||
const BoxType & box ,
|
||||
BoxType * const p_box )
|
||||
{
|
||||
const size_t np = up - ip ;
|
||||
|
||||
if ( 1 == np ) {
|
||||
p_box[ip] = box ;
|
||||
}
|
||||
else {
|
||||
// Choose axis with largest count:
|
||||
|
||||
const size_t n0 = box[0][1] - box[0][0] ;
|
||||
const size_t n1 = box[1][1] - box[1][0] ;
|
||||
const size_t n2 = box[2][1] - box[2][0] ;
|
||||
|
||||
const size_t axis = n2 > n1 ? ( n2 > n0 ? 2 : ( n1 > n0 ? 1 : 0 ) ) :
|
||||
( n1 > n0 ? 1 : 0 );
|
||||
|
||||
const size_t n = box[ axis ][1] - box[ axis ][0] ;
|
||||
|
||||
if ( 0 == np % 3 ) {
|
||||
const size_t np_part = np / 3 ; // exact
|
||||
|
||||
const size_t nbox_low = (size_t)(( (double) n ) * ( 1.0 / 3.0 ));
|
||||
const size_t nbox_mid = (size_t)(( (double) n ) * ( 2.0 / 3.0 ));
|
||||
|
||||
BoxType dbox_low = box ; // P = [ip,ip+np/3)
|
||||
BoxType dbox_mid = box ; // P = [ip+np/3,ip+2*np/3)
|
||||
BoxType dbox_upp = box ; // P = [ip+2*np/3,ip+np)
|
||||
|
||||
dbox_low[ axis ][1] = box[ axis ][0] + nbox_low ;
|
||||
dbox_mid[ axis ][1] = box[ axis ][0] + nbox_mid ;
|
||||
|
||||
dbox_mid[ axis ][0] = dbox_low[ axis ][1];
|
||||
dbox_upp[ axis ][0] = dbox_mid[ axis ][1];
|
||||
|
||||
box_partition( ip, ip + np_part, dbox_low , p_box );
|
||||
box_partition( ip+ np_part, ip + 2*np_part, dbox_mid , p_box );
|
||||
box_partition( ip+2*np_part, up, dbox_upp , p_box );
|
||||
}
|
||||
else {
|
||||
const size_t np_low = np / 2 ; /* Rounded down */
|
||||
const size_t nbox_low = (size_t)
|
||||
(((double)n) * ( ((double) np_low ) / ((double) np ) ));
|
||||
|
||||
BoxType dbox_low = box ;
|
||||
BoxType dbox_upp = box ;
|
||||
|
||||
dbox_low[ axis ][1] = dbox_low[ axis ][0] + nbox_low ;
|
||||
dbox_upp[ axis ][0] = dbox_low[ axis ][1];
|
||||
|
||||
box_partition( ip, ip + np_low, dbox_low , p_box );
|
||||
box_partition( ip + np_low, up, dbox_upp , p_box );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t box_map_offset( const BoxType & local_use ,
|
||||
const size_t global_i ,
|
||||
const size_t global_j ,
|
||||
const size_t global_k )
|
||||
|
||||
{
|
||||
const size_t max = std::numeric_limits<size_t>::max();
|
||||
|
||||
const size_t n[3] =
|
||||
{ local_use[0][1] - local_use[0][0] ,
|
||||
local_use[1][1] - local_use[1][0] ,
|
||||
local_use[2][1] - local_use[2][0] };
|
||||
|
||||
const size_t use[3] = {
|
||||
( global_i >= local_use[0][0] ? global_i - local_use[0][0] : max ) ,
|
||||
( global_j >= local_use[1][0] ? global_j - local_use[1][0] : max ) ,
|
||||
( global_k >= local_use[2][0] ? global_k - local_use[2][0] : max ) };
|
||||
|
||||
const size_t offset =
|
||||
( use[0] < n[0] && use[1] < n[1] && use[2] < n[2] ) ?
|
||||
( use[0] + n[0] * ( use[1] + n[1] * use[2] ) ) : max ;
|
||||
|
||||
if ( offset == max ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "box_map_offset ERROR: "
|
||||
<< " use " << local_use
|
||||
<< " ( " << global_i
|
||||
<< " , " << global_j
|
||||
<< " , " << global_k
|
||||
<< " )" ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
|
||||
return offset ;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void BoxBoundsLinear::apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const
|
||||
{
|
||||
const unsigned ghost = 1 ;
|
||||
|
||||
if ( 0 == count( box_part ) ) {
|
||||
box_interior = box_part ;
|
||||
box_use = box_part ;
|
||||
}
|
||||
else {
|
||||
for ( size_t i = 0 ; i < 3 ; ++i ) {
|
||||
|
||||
box_interior[i][0] =
|
||||
( box_part[i][0] == box_global[i][0] ) ? box_part[i][0] : (
|
||||
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][0] + ghost :
|
||||
box_part[i][1] );
|
||||
|
||||
box_interior[i][1] =
|
||||
( box_part[i][1] == box_global[i][1] ) ? box_part[i][1] : (
|
||||
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][1] - ghost :
|
||||
box_part[i][0] );
|
||||
|
||||
box_use[i][0] =
|
||||
( box_part[i][0] > ghost + box_global[i][0] ) ? box_part[i][0] - ghost :
|
||||
box_global[i][0] ;
|
||||
box_use[i][1] =
|
||||
( box_part[i][1] + ghost < box_global[i][1] ) ? box_part[i][1] + ghost :
|
||||
box_global[i][1] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BoxBoundsQuadratic::apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const
|
||||
{
|
||||
if ( 0 == count( box_part ) ) {
|
||||
box_interior = box_part ;
|
||||
box_use = box_part ;
|
||||
}
|
||||
else {
|
||||
for ( size_t i = 0 ; i < 3 ; ++i ) {
|
||||
const bool odd = ( box_part[i][0] - box_global[i][0] ) & 01 ;
|
||||
|
||||
const unsigned ghost = odd ? 1 : 2 ;
|
||||
|
||||
box_interior[i][0] =
|
||||
( box_part[i][0] == box_global[i][0] ) ? box_part[i][0] : (
|
||||
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][0] + ghost :
|
||||
box_part[i][1] );
|
||||
|
||||
box_interior[i][1] =
|
||||
( box_part[i][1] == box_global[i][1] ) ? box_part[i][1] : (
|
||||
( box_part[i][0] + ghost < box_part[i][1] ) ? box_part[i][1] - ghost :
|
||||
box_part[i][0] );
|
||||
|
||||
box_use[i][0] =
|
||||
( box_part[i][0] > ghost + box_global[i][0] ) ? box_part[i][0] - ghost :
|
||||
box_global[i][0] ;
|
||||
box_use[i][1] =
|
||||
( box_part[i][1] + ghost < box_global[i][1] ) ? box_part[i][1] + ghost :
|
||||
box_global[i][1] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void box_partition_rcb( const BoxType & root_box ,
|
||||
std::vector<BoxType> & part_boxes )
|
||||
{
|
||||
const BoxBoundsLinear use_boxes ;
|
||||
|
||||
const size_t part_count = part_boxes.size();
|
||||
|
||||
box_partition( 0 , part_count , root_box , & part_boxes[0] );
|
||||
|
||||
// Verify partitioning
|
||||
|
||||
size_t total_cell = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < part_count ; ++i ) {
|
||||
|
||||
total_cell += count( part_boxes[i] );
|
||||
|
||||
BoxType box_interior , box_use ;
|
||||
|
||||
use_boxes.apply( root_box , part_boxes[i] , box_interior , box_use );
|
||||
|
||||
if ( count( box_use ) < count( part_boxes[i] ) ||
|
||||
count( part_boxes[i] ) < count( box_interior ) ||
|
||||
part_boxes[i] != intersect( part_boxes[i] , box_use ) ||
|
||||
box_interior != intersect( part_boxes[i] , box_interior )) {
|
||||
|
||||
std::ostringstream msg ;
|
||||
|
||||
msg << "box_partition_rcb ERROR : "
|
||||
<< "part_boxes[" << i << "] = "
|
||||
<< part_boxes[i]
|
||||
<< " use " << box_use
|
||||
<< " interior " << box_interior
|
||||
<< std::endl
|
||||
<< " part ^ use " << intersect( part_boxes[i] , box_use )
|
||||
<< " part ^ interior " << intersect( part_boxes[i] , box_interior );
|
||||
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
|
||||
for ( size_t j = i + 1 ; j < part_count ; ++j ) {
|
||||
const BoxType tmp = intersect( part_boxes[i] , part_boxes[j] );
|
||||
|
||||
if ( count( tmp ) ) {
|
||||
throw std::runtime_error( std::string("box partition intersection") );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( total_cell != count( root_box ) ) {
|
||||
throw std::runtime_error( std::string("box partition count") );
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
size_t box_map_id( const BoxType & local_use ,
|
||||
const std::vector<size_t> & local_use_id_map ,
|
||||
const size_t global_i ,
|
||||
const size_t global_j ,
|
||||
const size_t global_k )
|
||||
|
||||
{
|
||||
const size_t offset =
|
||||
box_map_offset( local_use , global_i , global_j , global_k );
|
||||
return local_use_id_map[ offset ];
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void box_partition_maps( const BoxType & root_box ,
|
||||
const std::vector<BoxType> & part_boxes ,
|
||||
const BoxBounds & use_boxes ,
|
||||
const size_t my_part ,
|
||||
BoxType & my_use_box ,
|
||||
std::vector<size_t> & my_use_id_map ,
|
||||
size_t & my_count_interior ,
|
||||
size_t & my_count_owned ,
|
||||
size_t & my_count_uses ,
|
||||
std::vector<size_t> & my_part_counts ,
|
||||
std::vector<std::vector<size_t> > & my_send_map )
|
||||
{
|
||||
const size_t np = part_boxes.size();
|
||||
|
||||
if ( np <= my_part ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "box_partition_maps ERROR : "
|
||||
<< " np(" << np << ") <= my_part(" << my_part << ")" ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
|
||||
const BoxType my_owned_box = part_boxes[my_part];
|
||||
BoxType my_interior_box ;
|
||||
|
||||
|
||||
use_boxes.apply( root_box, my_owned_box, my_interior_box, my_use_box );
|
||||
|
||||
my_count_interior = count( my_interior_box );
|
||||
my_count_owned = count( my_owned_box );
|
||||
my_count_uses = count( my_use_box );
|
||||
|
||||
my_use_id_map.assign( my_count_uses , std::numeric_limits<size_t>::max() );
|
||||
|
||||
// Order ids as { owned-interior , owned-parallel , received_{(p+i)%np} }
|
||||
|
||||
size_t offset_interior = 0 ;
|
||||
size_t offset_parallel = my_count_interior ;
|
||||
|
||||
for ( size_t iz = my_owned_box[2][0] ; iz < my_owned_box[2][1] ; ++iz ) {
|
||||
for ( size_t iy = my_owned_box[1][0] ; iy < my_owned_box[1][1] ; ++iy ) {
|
||||
for ( size_t ix = my_owned_box[0][0] ; ix < my_owned_box[0][1] ; ++ix ) {
|
||||
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
|
||||
if ( contain( my_interior_box , ix , iy , iz ) ) {
|
||||
my_use_id_map[ offset ] = offset_interior++ ;
|
||||
}
|
||||
else {
|
||||
my_use_id_map[ offset ] = offset_parallel++ ;
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
my_part_counts.assign( np , (size_t) 0 );
|
||||
my_send_map.assign( np , std::vector<size_t>() );
|
||||
|
||||
my_part_counts[0] = my_count_owned ;
|
||||
|
||||
for ( size_t i = 1 ; i < np ; ++i ) {
|
||||
|
||||
const size_t ip = ( my_part + i ) % np ;
|
||||
|
||||
const BoxType p_owned_box = part_boxes[ip];
|
||||
BoxType p_use_box , p_interior_box ;
|
||||
use_boxes.apply( root_box, p_owned_box, p_interior_box, p_use_box );
|
||||
|
||||
const BoxType recv_box = intersect( my_use_box , p_owned_box );
|
||||
const BoxType send_box = intersect( my_owned_box , p_use_box );
|
||||
|
||||
if ( 0 != ( my_part_counts[i] = count( recv_box ) ) ) {
|
||||
for ( size_t iz = recv_box[2][0] ; iz < recv_box[2][1] ; ++iz ) {
|
||||
for ( size_t iy = recv_box[1][0] ; iy < recv_box[1][1] ; ++iy ) {
|
||||
for ( size_t ix = recv_box[0][0] ; ix < recv_box[0][1] ; ++ix ) {
|
||||
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
|
||||
my_use_id_map[ offset ] = offset_parallel++ ;
|
||||
}}}
|
||||
}
|
||||
|
||||
if ( 0 != count( send_box ) ) {
|
||||
for ( size_t iz = send_box[2][0] ; iz < send_box[2][1] ; ++iz ) {
|
||||
for ( size_t iy = send_box[1][0] ; iy < send_box[1][1] ; ++iy ) {
|
||||
for ( size_t ix = send_box[0][0] ; ix < send_box[0][1] ; ++ix ) {
|
||||
const size_t offset = box_map_offset( my_use_box , ix , iy , iz );
|
||||
|
||||
my_send_map[ i ].push_back( my_use_id_map[ offset ] );
|
||||
}}}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,210 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef BOXMESHPARTITION_HPP
|
||||
#define BOXMESHPARTITION_HPP
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
struct BoxType {
|
||||
size_t data[3][2] ;
|
||||
|
||||
typedef size_t range_type[2] ;
|
||||
|
||||
inline
|
||||
const range_type & operator[]( size_t i ) const { return data[i]; }
|
||||
|
||||
inline
|
||||
range_type & operator[]( size_t i ) { return data[i]; }
|
||||
|
||||
inline
|
||||
bool operator == ( const BoxType & rhs ) const
|
||||
{
|
||||
return data[0][0] == rhs.data[0][0] && data[0][1] == rhs.data[0][1] &&
|
||||
data[1][0] == rhs.data[1][0] && data[1][1] == rhs.data[2][1] &&
|
||||
data[2][0] == rhs.data[2][0] && data[2][1] == rhs.data[2][1] ;
|
||||
}
|
||||
|
||||
inline
|
||||
bool operator != ( const BoxType & rhs ) const
|
||||
{
|
||||
return data[0][0] != rhs.data[0][0] || data[0][1] != rhs.data[0][1] ||
|
||||
data[1][0] != rhs.data[1][0] || data[1][1] != rhs.data[1][1] ||
|
||||
data[2][0] != rhs.data[2][0] || data[2][1] != rhs.data[2][1] ;
|
||||
}
|
||||
};
|
||||
|
||||
inline
|
||||
size_t count( const BoxType & b )
|
||||
{
|
||||
size_t n = 1 ;
|
||||
for ( size_t i = 0 ; i < 3 ; ++i ) {
|
||||
n *= b[i][1] > b[i][0] ? b[i][1] - b[i][0] : 0 ;
|
||||
}
|
||||
return n ;
|
||||
}
|
||||
|
||||
inline
|
||||
bool contain( const BoxType & b , size_t i , size_t j , size_t k )
|
||||
{
|
||||
return b[0][0] <= i && i < b[0][1] &&
|
||||
b[1][0] <= j && j < b[1][1] &&
|
||||
b[2][0] <= k && k < b[2][1] ;
|
||||
}
|
||||
|
||||
inline
|
||||
BoxType intersect( const BoxType & x , const BoxType & y )
|
||||
{
|
||||
BoxType z ;
|
||||
for ( size_t i = 0 ; i < 3 ; ++i ) {
|
||||
z[i][0] = std::max( x[i][0] , y[i][0] );
|
||||
z[i][1] = std::min( x[i][1] , y[i][1] );
|
||||
}
|
||||
|
||||
return z ;
|
||||
}
|
||||
|
||||
inline
|
||||
std::ostream & operator << ( std::ostream & s , const BoxType & box )
|
||||
{
|
||||
s << "{ "
|
||||
<< box[0][0] << " " << box[0][1] << " , "
|
||||
<< box[1][0] << " " << box[1][1] << " , "
|
||||
<< box[2][0] << " " << box[2][1] << " }" ;
|
||||
return s ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
class BoxBounds {
|
||||
public:
|
||||
/** \brief Default bounds to one layer of ghosting */
|
||||
virtual
|
||||
void apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const = 0 ;
|
||||
|
||||
virtual ~BoxBounds() {}
|
||||
BoxBounds() {}
|
||||
};
|
||||
|
||||
class BoxBoundsLinear : public BoxBounds
|
||||
{
|
||||
public:
|
||||
/** \brief Default bounds to one layer of ghosting */
|
||||
virtual
|
||||
void apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const ;
|
||||
|
||||
virtual ~BoxBoundsLinear() {}
|
||||
BoxBoundsLinear() {}
|
||||
};
|
||||
|
||||
class BoxBoundsQuadratic : public BoxBounds {
|
||||
public:
|
||||
/** \brief Quadratic mesh: even ordinates have two layers,
|
||||
* odd ordinates have one layer.
|
||||
*/
|
||||
virtual
|
||||
void apply( const BoxType & box_global ,
|
||||
const BoxType & box_part ,
|
||||
BoxType & box_interior ,
|
||||
BoxType & box_use ) const ;
|
||||
|
||||
virtual ~BoxBoundsQuadratic() {}
|
||||
BoxBoundsQuadratic() {}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* Partition box into part_boxes.size() sub-boxes */
|
||||
|
||||
void box_partition_rcb( const BoxType & root_box ,
|
||||
std::vector<BoxType> & part_boxes );
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/* Determine local id layout and communication maps for partitioned boxes.
|
||||
*
|
||||
* Local ids are layed out as follows:
|
||||
* { [ owned-interior ids not sent ] ,
|
||||
* [ owned-boundary ids to be sent to other processes ] ,
|
||||
* [ received ids from processor ( my_part + 1 ) % part_count ]
|
||||
* [ received ids from processor ( my_part + 2 ) % part_count ]
|
||||
* [ received ids from processor ( my_part + 3 ) % part_count ]
|
||||
* ... };
|
||||
*
|
||||
* This layout allows
|
||||
* (1) received data to be copied into a contiguous block of memory
|
||||
* (2) send data to be extracted from a contiguous block of memory.
|
||||
*/
|
||||
void box_partition_maps(
|
||||
const BoxType & root_box , // [in] Global box
|
||||
const std::vector<BoxType> & part_boxes , // [in] Partitioned boxes
|
||||
const BoxBounds & use_boxes , // [in] Ghost boundaries
|
||||
const size_t my_part , // [in] My local part
|
||||
BoxType & my_use_box , // [out] My used box with ghost
|
||||
std::vector<size_t> & my_use_id_map , // [out] Local ordering map
|
||||
size_t & my_count_interior , // [out] How many interior
|
||||
size_t & my_count_owned , // [out] How many owned
|
||||
size_t & my_count_uses , // [out] How may used
|
||||
std::vector<size_t> & my_part_counts , // [out] Partitioning of my_use_id_map
|
||||
std::vector<std::vector<size_t> > & my_send_map ); // [out] Send id map
|
||||
|
||||
/* Mapping of cartesian coordinate to local id */
|
||||
size_t box_map_id( const BoxType & my_use_box ,
|
||||
const std::vector<size_t> & my_use_id_map ,
|
||||
const size_t global_i ,
|
||||
const size_t global_j ,
|
||||
const size_t global_k );
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef BOXMESHPARTITION_HPP */
|
||||
|
||||
@ -1,16 +0,0 @@
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
SET(SOURCES "")
|
||||
|
||||
FILE(GLOB SOURCES *.cpp)
|
||||
|
||||
SET(LIBRARIES kokkoscore)
|
||||
|
||||
TRIBITS_ADD_EXECUTABLE(
|
||||
multi_fem
|
||||
SOURCES ${SOURCES}
|
||||
COMM serial mpi
|
||||
)
|
||||
|
||||
@ -1,452 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef EXPLICIT_DRIVER_HPP
|
||||
#define EXPLICIT_DRIVER_HPP
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <ExplicitFunctors.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Explicit {
|
||||
|
||||
struct PerformanceData {
|
||||
double mesh_time ;
|
||||
double init_time ;
|
||||
double internal_force_time ;
|
||||
double central_diff ;
|
||||
double comm_time ;
|
||||
size_t number_of_steps ;
|
||||
|
||||
PerformanceData()
|
||||
: mesh_time(0)
|
||||
, init_time(0)
|
||||
, internal_force_time(0)
|
||||
, central_diff(0)
|
||||
, comm_time(0)
|
||||
, number_of_steps(0)
|
||||
{}
|
||||
|
||||
void best( const PerformanceData & rhs )
|
||||
{
|
||||
if ( rhs.mesh_time < mesh_time ) mesh_time = rhs.mesh_time ;
|
||||
if ( rhs.init_time < init_time ) init_time = rhs.init_time ;
|
||||
if ( rhs.internal_force_time < internal_force_time ) internal_force_time = rhs.internal_force_time ;
|
||||
if ( rhs.central_diff < central_diff ) central_diff = rhs.central_diff ;
|
||||
if ( rhs.comm_time < comm_time ) comm_time = rhs.comm_time ;
|
||||
}
|
||||
};
|
||||
|
||||
template< typename Scalar , class FixtureType >
|
||||
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
|
||||
const int global_max_x ,
|
||||
const int global_max_y ,
|
||||
const int global_max_z ,
|
||||
const int steps ,
|
||||
const int print_sample )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef FixtureType fixture_type ;
|
||||
typedef typename fixture_type::execution_space execution_space ;
|
||||
//typedef typename fixture_type::FEMeshType mesh_type ; // unused
|
||||
|
||||
enum { ElementNodeCount = fixture_type::element_node_count };
|
||||
|
||||
const int NumStates = 2;
|
||||
|
||||
const int total_num_steps = steps ;
|
||||
|
||||
const Scalar user_dt = 5.0e-6;
|
||||
//const Scalar end_time = 0.0050;
|
||||
|
||||
// element block parameters
|
||||
const Scalar lin_bulk_visc = 0.0;
|
||||
const Scalar quad_bulk_visc = 0.0;
|
||||
|
||||
// const Scalar lin_bulk_visc = 0.06;
|
||||
// const Scalar quad_bulk_visc = 1.2;
|
||||
// const Scalar hg_stiffness = 0.0;
|
||||
// const Scalar hg_viscosity = 0.0;
|
||||
// const Scalar hg_stiffness = 0.03;
|
||||
// const Scalar hg_viscosity = 0.001;
|
||||
|
||||
// material properties
|
||||
const Scalar youngs_modulus=1.0e6;
|
||||
const Scalar poissons_ratio=0.0;
|
||||
const Scalar density = 8.0e-4;
|
||||
|
||||
const comm::Machine machine = mesh.parallel_data_map.machine ;
|
||||
|
||||
PerformanceData perf_data ;
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
//------------------------------------
|
||||
// Generate fields
|
||||
|
||||
typedef Fields< scalar_type , execution_space > fields_type ;
|
||||
|
||||
fields_type mesh_fields( mesh ,
|
||||
lin_bulk_visc ,
|
||||
quad_bulk_visc ,
|
||||
youngs_modulus ,
|
||||
poissons_ratio ,
|
||||
density );
|
||||
|
||||
typename fields_type::node_coords_type::HostMirror
|
||||
model_coords_h = Kokkos::create_mirror( mesh_fields.model_coords );
|
||||
|
||||
typename fields_type::geom_state_array_type::HostMirror
|
||||
displacement_h = Kokkos::create_mirror( mesh_fields.displacement );
|
||||
|
||||
typename fields_type::geom_state_array_type::HostMirror
|
||||
velocity_h = Kokkos::create_mirror( mesh_fields.velocity );
|
||||
|
||||
Kokkos::deep_copy( model_coords_h , mesh_fields.model_coords );
|
||||
|
||||
//------------------------------------
|
||||
// Initialization
|
||||
|
||||
initialize_element<Scalar,execution_space>::apply( mesh_fields );
|
||||
initialize_node< Scalar,execution_space>::apply( mesh_fields );
|
||||
|
||||
const Scalar x_bc = global_max_x ;
|
||||
|
||||
// Initial condition on velocity to initiate a pulse along the X axis
|
||||
{
|
||||
const unsigned X = 0;
|
||||
for (int inode = 0; inode< mesh_fields.num_nodes; ++inode) {
|
||||
if ( model_coords_h(inode,X) == 0) {
|
||||
velocity_h(inode,X,0) = 1.0e3;
|
||||
velocity_h(inode,X,1) = 1.0e3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Kokkos::deep_copy( mesh_fields.velocity , velocity_h );
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
// We will call a sequence of functions. These functions have been
|
||||
// grouped into several functors to balance the number of global memory
|
||||
// accesses versus requiring too many registers or too much L1 cache.
|
||||
// Global memory accees have read/write cost and memory subsystem contention cost.
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
perf_data.init_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
// Parameters required for the internal force computations.
|
||||
|
||||
int current_state = 0;
|
||||
int previous_state = 0;
|
||||
int next_state = 0;
|
||||
|
||||
perf_data.number_of_steps = total_num_steps ;
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
typedef typename
|
||||
fields_type::geom_state_array_type::value_type comm_value_type ;
|
||||
|
||||
const unsigned comm_value_count = 6 ;
|
||||
|
||||
Kokkos::AsyncExchange< comm_value_type , execution_space ,
|
||||
Kokkos::ParallelDataMap >
|
||||
comm_exchange( mesh.parallel_data_map , comm_value_count );
|
||||
|
||||
#endif
|
||||
|
||||
for (int step = 0; step < total_num_steps; ++step) {
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
{
|
||||
// Communicate "send" nodes' displacement and velocity next_state
|
||||
// to the ghosted nodes.
|
||||
// buffer packages: { { dx , dy , dz , vx , vy , vz }_node }
|
||||
|
||||
pack_state< Scalar , execution_space >
|
||||
::apply( comm_exchange.buffer() ,
|
||||
mesh.parallel_data_map.count_interior ,
|
||||
mesh.parallel_data_map.count_send ,
|
||||
mesh_fields , next_state );
|
||||
|
||||
comm_exchange.setup();
|
||||
|
||||
comm_exchange.send_receive();
|
||||
|
||||
unpack_state< Scalar , execution_space >
|
||||
::apply( mesh_fields , next_state ,
|
||||
comm_exchange.buffer() ,
|
||||
mesh.parallel_data_map.count_owned ,
|
||||
mesh.parallel_data_map.count_receive );
|
||||
|
||||
execution_space::fence();
|
||||
}
|
||||
#endif
|
||||
|
||||
perf_data.comm_time += comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// rotate the states
|
||||
|
||||
previous_state = current_state;
|
||||
current_state = next_state;
|
||||
++next_state;
|
||||
next_state %= NumStates;
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
// First kernel 'grad_hgop' combines two functions:
|
||||
// gradient, velocity gradient
|
||||
grad< Scalar , execution_space >::apply( mesh_fields ,
|
||||
current_state ,
|
||||
previous_state );
|
||||
|
||||
// Combine tensor decomposition and rotation functions.
|
||||
decomp_rotate< Scalar , execution_space >::apply( mesh_fields ,
|
||||
current_state ,
|
||||
previous_state );
|
||||
|
||||
internal_force< Scalar , execution_space >::apply( mesh_fields ,
|
||||
user_dt ,
|
||||
current_state );
|
||||
|
||||
execution_space::fence();
|
||||
|
||||
perf_data.internal_force_time +=
|
||||
comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
// Assembly of elements' contributions to nodal force into
|
||||
// a nodal force vector. Update the accelerations, velocities,
|
||||
// displacements.
|
||||
// The same pattern can be used for matrix-free residual computations.
|
||||
nodal_step< Scalar , execution_space >::apply( mesh_fields ,
|
||||
x_bc ,
|
||||
current_state,
|
||||
next_state );
|
||||
execution_space::fence();
|
||||
|
||||
perf_data.central_diff +=
|
||||
comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
if ( print_sample && 0 == step % 100 ) {
|
||||
Kokkos::deep_copy( displacement_h , mesh_fields.displacement );
|
||||
Kokkos::deep_copy( velocity_h , mesh_fields.velocity );
|
||||
|
||||
if ( 1 == print_sample ) {
|
||||
|
||||
std::cout << "step " << step
|
||||
<< " : displacement(*,0,0) =" ;
|
||||
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
|
||||
if ( model_coords_h(i,1) == 0 && model_coords_h(i,2) == 0 ) {
|
||||
std::cout << " " << displacement_h(i,0,next_state);
|
||||
}
|
||||
}
|
||||
std::cout << std::endl ;
|
||||
|
||||
const float tol = 1.0e-6 ;
|
||||
const int yb = global_max_y ;
|
||||
const int zb = global_max_z ;
|
||||
std::cout << "step " << step
|
||||
<< " : displacement(*," << yb << "," << zb << ") =" ;
|
||||
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
|
||||
if ( fabs( model_coords_h(i,1) - yb ) < tol &&
|
||||
fabs( model_coords_h(i,2) - zb ) < tol ) {
|
||||
std::cout << " " << displacement_h(i,0,next_state);
|
||||
}
|
||||
}
|
||||
std::cout << std::endl ;
|
||||
}
|
||||
else if ( 2 == print_sample ) {
|
||||
|
||||
const float tol = 1.0e-6 ;
|
||||
const int xb = global_max_x / 2 ;
|
||||
const int yb = global_max_y / 2 ;
|
||||
const int zb = global_max_z / 2 ;
|
||||
|
||||
for ( int i = 0 ; i < mesh_fields.num_nodes_owned ; ++i ) {
|
||||
if ( fabs( model_coords_h(i,0) - xb ) < tol &&
|
||||
fabs( model_coords_h(i,1) - yb ) < tol &&
|
||||
fabs( model_coords_h(i,2) - zb ) < tol ) {
|
||||
std::cout << "step " << step
|
||||
<< " : displacement("
|
||||
<< xb << "," << yb << "," << zb << ") = {"
|
||||
<< std::setprecision(6)
|
||||
<< " " << displacement_h(i,0,next_state)
|
||||
<< std::setprecision(2)
|
||||
<< " " << displacement_h(i,1,next_state)
|
||||
<< std::setprecision(2)
|
||||
<< " " << displacement_h(i,2,next_state)
|
||||
<< " }" << std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return perf_data ;
|
||||
}
|
||||
|
||||
|
||||
template <typename Scalar, typename Device>
|
||||
static void driver( const char * const label ,
|
||||
comm::Machine machine ,
|
||||
const int gang_count ,
|
||||
const int elem_count_beg ,
|
||||
const int elem_count_end ,
|
||||
const int runs )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef Device execution_space ;
|
||||
typedef double coordinate_scalar_type ;
|
||||
typedef FixtureElementHex8 fixture_element_type ;
|
||||
|
||||
typedef BoxMeshFixture< coordinate_scalar_type ,
|
||||
execution_space ,
|
||||
fixture_element_type > fixture_type ;
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
|
||||
const size_t proc_count = comm::size( machine );
|
||||
const size_t proc_rank = comm::rank( machine );
|
||||
|
||||
const int space = 15 ;
|
||||
const int steps = 1000 ;
|
||||
const int print_sample = 0 ;
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
|
||||
std::cout << std::endl ;
|
||||
std::cout << "\"MiniExplicitDynamics with Kokkos " << label
|
||||
<< " time_steps(" << steps << ")"
|
||||
<< "\"" << std::endl;
|
||||
std::cout << std::left << std::setw(space) << "\"Element\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"Node\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"Initialize\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"ElemForce\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"NodeUpdate\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"NodeComm\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"Time/Elem\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"Time/Node\"";
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << std::left << std::setw(space) << "\"count\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"count\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\" , ";
|
||||
std::cout << std::left << std::setw(space) << "\"microsec\"";
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
|
||||
{
|
||||
const int iz = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
|
||||
const int iy = iz + 1 ;
|
||||
const int ix = 2 * iy ;
|
||||
const int nelem = ix * iy * iz ;
|
||||
const int nnode = ( ix + 1 ) * ( iy + 1 ) * ( iz + 1 );
|
||||
|
||||
mesh_type mesh =
|
||||
fixture_type::create( proc_count , proc_rank , gang_count ,
|
||||
ix , iy , iz );
|
||||
|
||||
mesh.parallel_data_map.machine = machine ;
|
||||
|
||||
PerformanceData perf , best ;
|
||||
|
||||
for(int j = 0; j < runs; j++){
|
||||
|
||||
perf = run<scalar_type,fixture_type>(mesh,ix,iy,iz,steps,print_sample);
|
||||
|
||||
if( j == 0 ) {
|
||||
best = perf ;
|
||||
}
|
||||
else {
|
||||
best.best( perf );
|
||||
}
|
||||
}
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
double time_per_element =
|
||||
( best.internal_force_time ) / ( nelem * perf.number_of_steps );
|
||||
double time_per_node =
|
||||
( best.comm_time + best.central_diff ) / ( nnode * perf.number_of_steps );
|
||||
|
||||
std::cout << std::setw(space-3) << nelem << " , "
|
||||
<< std::setw(space-3) << nnode << " , "
|
||||
<< std::setw(space-3) << best.number_of_steps << " , "
|
||||
<< std::setw(space-3) << best.init_time * 1000000 << " , "
|
||||
<< std::setw(space-3)
|
||||
<< ( best.internal_force_time * 1000000 ) / best.number_of_steps << " , "
|
||||
<< std::setw(space-3)
|
||||
<< ( best.central_diff * 1000000 ) / best.number_of_steps << " , "
|
||||
<< std::setw(space-3)
|
||||
<< ( best.comm_time * 1000000 ) / best.number_of_steps << " , "
|
||||
<< std::setw(space-3) << time_per_element * 1000000 << " , "
|
||||
<< std::setw(space-3) << time_per_node * 1000000
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace Explicit
|
||||
|
||||
#endif /* #ifndef EXPLICIT_DRIVER_HPP */
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,86 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_FEMESH_HPP
|
||||
#define KOKKOS_FEMESH_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_StaticCrsGraph.hpp>
|
||||
|
||||
#include <ParallelComm.hpp>
|
||||
#include <ParallelDataMap.hpp>
|
||||
|
||||
namespace HybridFEM {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Finite element mesh fixture for hybrid parallel performance tests.
|
||||
*/
|
||||
template< typename CoordScalarType , unsigned ElemNodeCount , class Device >
|
||||
struct FEMesh {
|
||||
|
||||
typedef typename Device::size_type size_type ;
|
||||
|
||||
static const size_type element_node_count = ElemNodeCount ;
|
||||
|
||||
typedef Kokkos::View< CoordScalarType*[3] , Device > node_coords_type ;
|
||||
typedef Kokkos::View< size_type*[ElemNodeCount], Device > elem_node_ids_type ;
|
||||
typedef Kokkos::StaticCrsGraph< size_type[2] , Device > node_elem_ids_type ;
|
||||
|
||||
node_coords_type node_coords ;
|
||||
elem_node_ids_type elem_node_ids ;
|
||||
node_elem_ids_type node_elem_ids ;
|
||||
Kokkos::ParallelDataMap parallel_data_map ;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
#endif /* #ifndef KOKKOS_FEMESH_HPP */
|
||||
|
||||
@ -1,268 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef ELEMENTHEX_HPP
|
||||
#define ELEMENTHEX_HPP
|
||||
|
||||
namespace HybridFEM {
|
||||
|
||||
template< unsigned NodeCount >
|
||||
class HexElement_TensorData ;
|
||||
|
||||
template< unsigned NodeCount , class Device >
|
||||
class HexElement_TensorEval ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Evaluate Hex element on interval [-1,1]^3 */
|
||||
template<>
|
||||
class HexElement_TensorData< 8 > {
|
||||
public:
|
||||
|
||||
static const unsigned element_node_count = 8 ;
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned integration_count_1d = 2 ;
|
||||
static const unsigned function_count_1d = 2 ;
|
||||
|
||||
float values_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float derivs_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float weights_1d[ integration_count_1d ];
|
||||
|
||||
unsigned char eval_map[ element_node_count ][4] ;
|
||||
|
||||
static float eval_value_1d( const unsigned jf , const float x )
|
||||
{
|
||||
return 0 == jf ? 0.5 * ( 1.0 - x ) : (
|
||||
1 == jf ? 0.5 * ( 1.0 + x ) : 0 );
|
||||
}
|
||||
|
||||
static float eval_deriv_1d( const unsigned jf , const float )
|
||||
{
|
||||
return 0 == jf ? -0.5 : (
|
||||
1 == jf ? 0.5 : 0 );
|
||||
}
|
||||
|
||||
HexElement_TensorData()
|
||||
{
|
||||
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
|
||||
{ { 0 , 0 , 0 },
|
||||
{ 1 , 0 , 0 },
|
||||
{ 1 , 1 , 0 },
|
||||
{ 0 , 1 , 0 },
|
||||
{ 0 , 0 , 1 },
|
||||
{ 1 , 0 , 1 },
|
||||
{ 1 , 1 , 1 },
|
||||
{ 0 , 1 , 1 } };
|
||||
|
||||
weights_1d[0] = 1 ;
|
||||
weights_1d[1] = 1 ;
|
||||
|
||||
const float points_1d[ integration_count_1d ] =
|
||||
{ -0.577350269 , 0.577350269 };
|
||||
|
||||
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
|
||||
eval_map[i][0] = tmp_map[i][0];
|
||||
eval_map[i][1] = tmp_map[i][1];
|
||||
eval_map[i][2] = tmp_map[i][2];
|
||||
}
|
||||
|
||||
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
|
||||
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
|
||||
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
|
||||
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
class HexElement_TensorData< 27 > {
|
||||
public:
|
||||
|
||||
static const unsigned element_node_count = 27 ;
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned integration_count_1d = 3 ;
|
||||
static const unsigned function_count_1d = 3 ;
|
||||
|
||||
float values_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float derivs_1d [ function_count_1d ][ integration_count_1d ];
|
||||
float weights_1d[ integration_count_1d ];
|
||||
|
||||
unsigned char eval_map[ element_node_count ][4] ;
|
||||
|
||||
// sizeof(EvaluateElementHex) = 111 bytes =
|
||||
// sizeof(float) * 9 +
|
||||
// sizeof(float) * 9 +
|
||||
// sizeof(float) * 3 +
|
||||
// sizeof(char) * 27
|
||||
|
||||
static float eval_value_1d( const unsigned jf , const float p )
|
||||
{
|
||||
return 0 == jf ? 0.5 * p * ( p - 1 ) : (
|
||||
1 == jf ? 1.0 - p * p : (
|
||||
2 == jf ? 0.5 * p * ( p + 1 ) : 0 ));
|
||||
}
|
||||
|
||||
static float eval_deriv_1d( const unsigned jf , const float p )
|
||||
{
|
||||
return 0 == jf ? p - 0.5 : (
|
||||
1 == jf ? -2.0 * p : (
|
||||
2 == jf ? p + 0.5 : 0 ));
|
||||
}
|
||||
|
||||
HexElement_TensorData()
|
||||
{
|
||||
const unsigned char tmp_map[ element_node_count ][ spatial_dimension ] =
|
||||
{ { 0 , 0 , 0 },
|
||||
{ 2 , 0 , 0 },
|
||||
{ 2 , 2 , 0 },
|
||||
{ 0 , 2 , 0 },
|
||||
{ 0 , 0 , 2 },
|
||||
{ 2 , 0 , 2 },
|
||||
{ 2 , 2 , 2 },
|
||||
{ 0 , 2 , 2 },
|
||||
{ 1 , 0 , 0 },
|
||||
{ 2 , 1 , 0 },
|
||||
{ 1 , 2 , 0 },
|
||||
{ 0 , 1 , 0 },
|
||||
{ 0 , 0 , 1 },
|
||||
{ 2 , 0 , 1 },
|
||||
{ 2 , 2 , 1 },
|
||||
{ 0 , 2 , 1 },
|
||||
{ 1 , 0 , 2 },
|
||||
{ 2 , 1 , 2 },
|
||||
{ 1 , 2 , 2 },
|
||||
{ 0 , 1 , 2 },
|
||||
{ 1 , 1 , 1 },
|
||||
{ 1 , 1 , 0 },
|
||||
{ 1 , 1 , 2 },
|
||||
{ 0 , 1 , 1 },
|
||||
{ 2 , 1 , 1 },
|
||||
{ 1 , 0 , 1 },
|
||||
{ 1 , 2 , 1 } };
|
||||
|
||||
// Interval [-1,1]
|
||||
|
||||
weights_1d[0] = 0.555555556 ;
|
||||
weights_1d[1] = 0.888888889 ;
|
||||
weights_1d[2] = 0.555555556 ;
|
||||
|
||||
const float points_1d[3] = { -0.774596669 ,
|
||||
0.000000000 ,
|
||||
0.774596669 };
|
||||
|
||||
for ( unsigned i = 0 ; i < element_node_count ; ++i ) {
|
||||
eval_map[i][0] = tmp_map[i][0];
|
||||
eval_map[i][1] = tmp_map[i][1];
|
||||
eval_map[i][2] = tmp_map[i][2];
|
||||
}
|
||||
|
||||
for ( unsigned xp = 0 ; xp < integration_count_1d ; ++xp ) {
|
||||
for ( unsigned xf = 0 ; xf < function_count_1d ; ++xf ) {
|
||||
values_1d[xp][xf] = eval_value_1d( xf , points_1d[xp] );
|
||||
derivs_1d[xp][xf] = eval_deriv_1d( xf , points_1d[xp] );
|
||||
}}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< unsigned NodeCount >
|
||||
class HexElement_Data {
|
||||
public:
|
||||
static const unsigned spatial_dimension = 3 ;
|
||||
static const unsigned element_node_count = NodeCount ;
|
||||
static const unsigned integration_count = NodeCount ;
|
||||
static const unsigned function_count = NodeCount ;
|
||||
|
||||
float weights[ integration_count ] ;
|
||||
float values[ integration_count ][ function_count ];
|
||||
float gradients[ integration_count ][ spatial_dimension ][ function_count ];
|
||||
|
||||
HexElement_Data()
|
||||
{
|
||||
HexElement_TensorData< NodeCount > tensor_data ;
|
||||
|
||||
for ( unsigned ip = 0 ; ip < integration_count ; ++ip ) {
|
||||
|
||||
const unsigned ipx = tensor_data.eval_map[ip][0] ;
|
||||
const unsigned ipy = tensor_data.eval_map[ip][1] ;
|
||||
const unsigned ipz = tensor_data.eval_map[ip][2] ;
|
||||
|
||||
weights[ip] = tensor_data.weights_1d[ ipx ] *
|
||||
tensor_data.weights_1d[ ipy ] *
|
||||
tensor_data.weights_1d[ ipz ] ;
|
||||
|
||||
for ( unsigned jf = 0 ; jf < function_count ; ++jf ) {
|
||||
|
||||
const unsigned jfx = tensor_data.eval_map[jf][0] ;
|
||||
const unsigned jfy = tensor_data.eval_map[jf][1] ;
|
||||
const unsigned jfz = tensor_data.eval_map[jf][2] ;
|
||||
|
||||
values[ip][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][0][jf] = tensor_data.derivs_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][1][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.derivs_1d[ ipy ][ jfy ] *
|
||||
tensor_data.values_1d[ ipz ][ jfz ] ;
|
||||
|
||||
gradients[ip][2][jf] = tensor_data.values_1d[ ipx ][ jfx ] *
|
||||
tensor_data.values_1d[ ipy ][ jfy ] *
|
||||
tensor_data.derivs_1d[ ipz ][ jfz ] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
#endif /* #ifndef ELEMENTHEX_HPP */
|
||||
|
||||
|
||||
@ -1,443 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP
|
||||
#define KOKKOS_HEXEXPLICITFUNCTIONS_HPP
|
||||
|
||||
#include <math.h>
|
||||
|
||||
namespace Explicit {
|
||||
|
||||
struct Hex8Functions
|
||||
{
|
||||
static const unsigned SpatialDim = 3 ;
|
||||
static const unsigned ElemNodeCount = 8 ;
|
||||
|
||||
// Indices for full 3x3 tensor:
|
||||
|
||||
static const unsigned K_F_XX = 0 ;
|
||||
static const unsigned K_F_YY = 1 ;
|
||||
static const unsigned K_F_ZZ = 2 ;
|
||||
static const unsigned K_F_XY = 3 ;
|
||||
static const unsigned K_F_YZ = 4 ;
|
||||
static const unsigned K_F_ZX = 5 ;
|
||||
static const unsigned K_F_YX = 6 ;
|
||||
static const unsigned K_F_ZY = 7 ;
|
||||
static const unsigned K_F_XZ = 8 ;
|
||||
static const unsigned K_F_SIZE = 9 ;
|
||||
|
||||
// Indexes into a 3 by 3 symmetric tensor stored as a length 6 vector
|
||||
|
||||
static const unsigned K_S_XX = 0 ;
|
||||
static const unsigned K_S_YY = 1 ;
|
||||
static const unsigned K_S_ZZ = 2 ;
|
||||
static const unsigned K_S_XY = 3 ;
|
||||
static const unsigned K_S_YZ = 4 ;
|
||||
static const unsigned K_S_ZX = 5 ;
|
||||
static const unsigned K_S_YX = 3 ;
|
||||
static const unsigned K_S_ZY = 4 ;
|
||||
static const unsigned K_S_XZ = 5 ;
|
||||
static const unsigned K_S_SIZE = 6 ;
|
||||
|
||||
// Indexes into a 3 by 3 skew symmetric tensor stored as a length 3 vector
|
||||
|
||||
static const unsigned K_V_XY = 0 ;
|
||||
static const unsigned K_V_YZ = 1 ;
|
||||
static const unsigned K_V_ZX = 2 ;
|
||||
static const unsigned K_V_SIZE = 3 ;
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarA , typename ScalarB >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
double dot8( const ScalarA * const a , const ScalarB * const b )
|
||||
{ return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3] +
|
||||
a[4] * b[4] + a[5] * b[5] + a[6] * b[6] + a[7] * b[7] ; }
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void grad( const ScalarPrecise x[] ,
|
||||
const ScalarPrecise z[] ,
|
||||
ScalarCompact grad_y[] )
|
||||
{
|
||||
const ScalarCompact R42=(x[3] - x[1]);
|
||||
const ScalarCompact R52=(x[4] - x[1]);
|
||||
const ScalarCompact R54=(x[4] - x[3]);
|
||||
|
||||
const ScalarCompact R63=(x[5] - x[2]);
|
||||
const ScalarCompact R83=(x[7] - x[2]);
|
||||
const ScalarCompact R86=(x[7] - x[5]);
|
||||
|
||||
const ScalarCompact R31=(x[2] - x[0]);
|
||||
const ScalarCompact R61=(x[5] - x[0]);
|
||||
const ScalarCompact R74=(x[6] - x[3]);
|
||||
|
||||
const ScalarCompact R72=(x[6] - x[1]);
|
||||
const ScalarCompact R75=(x[6] - x[4]);
|
||||
const ScalarCompact R81=(x[7] - x[0]);
|
||||
|
||||
const ScalarCompact t1=(R63 + R54);
|
||||
const ScalarCompact t2=(R61 + R74);
|
||||
const ScalarCompact t3=(R72 + R81);
|
||||
|
||||
const ScalarCompact t4 =(R86 + R42);
|
||||
const ScalarCompact t5 =(R83 + R52);
|
||||
const ScalarCompact t6 =(R75 + R31);
|
||||
|
||||
// Calculate Y gradient from X and Z data
|
||||
|
||||
grad_y[0] = (z[1] * t1) - (z[2] * R42) - (z[3] * t5) + (z[4] * t4) + (z[5] * R52) - (z[7] * R54);
|
||||
grad_y[1] = (z[2] * t2) + (z[3] * R31) - (z[0] * t1) - (z[5] * t6) + (z[6] * R63) - (z[4] * R61);
|
||||
grad_y[2] = (z[3] * t3) + (z[0] * R42) - (z[1] * t2) - (z[6] * t4) + (z[7] * R74) - (z[5] * R72);
|
||||
grad_y[3] = (z[0] * t5) - (z[1] * R31) - (z[2] * t3) + (z[7] * t6) + (z[4] * R81) - (z[6] * R83);
|
||||
grad_y[4] = (z[5] * t3) + (z[6] * R86) - (z[7] * t2) - (z[0] * t4) - (z[3] * R81) + (z[1] * R61);
|
||||
grad_y[5] = (z[6] * t5) - (z[4] * t3) - (z[7] * R75) + (z[1] * t6) - (z[0] * R52) + (z[2] * R72);
|
||||
grad_y[6] = (z[7] * t1) - (z[5] * t5) - (z[4] * R86) + (z[2] * t4) - (z[1] * R63) + (z[3] * R83);
|
||||
grad_y[7] = (z[4] * t2) - (z[6] * t1) + (z[5] * R75) - (z[3] * t6) - (z[2] * R74) + (z[0] * R54);
|
||||
}
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void grad( const ScalarPrecise x[] ,
|
||||
const ScalarPrecise y[] ,
|
||||
const ScalarPrecise z[] ,
|
||||
ScalarCompact grad_x[] ,
|
||||
ScalarCompact grad_y[] ,
|
||||
ScalarCompact grad_z[] )
|
||||
{
|
||||
grad( x , z , grad_y );
|
||||
grad( z , y , grad_x );
|
||||
grad( y , x , grad_z );
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void polar_decomp( const float dt ,
|
||||
const ScalarCompact v_gr[] ,
|
||||
ScalarPrecise stretch[] /* INOUT */ ,
|
||||
ScalarCompact str_ten[] /* OUT */ ,
|
||||
ScalarCompact rot[] /* OUT */ )
|
||||
{
|
||||
const float dt_half = 0.5 * dt;
|
||||
|
||||
ScalarCompact vort[ K_V_SIZE ]; // Vorticity
|
||||
|
||||
// Symmetric part
|
||||
str_ten[K_S_XX] = v_gr[K_F_XX];
|
||||
str_ten[K_S_YY] = v_gr[K_F_YY];
|
||||
str_ten[K_S_ZZ] = v_gr[K_F_ZZ];
|
||||
str_ten[K_S_XY] = 0.5 * ( v_gr[K_F_XY] + v_gr[K_F_YX] );
|
||||
str_ten[K_S_YZ] = 0.5 * ( v_gr[K_F_YZ] + v_gr[K_F_ZY] );
|
||||
str_ten[K_S_ZX] = 0.5 * ( v_gr[K_F_ZX] + v_gr[K_F_XZ] );
|
||||
|
||||
// Skew Symmetric part
|
||||
vort[K_V_XY] = 0.5 * ( v_gr[K_F_XY] - v_gr[K_F_YX] );
|
||||
vort[K_V_YZ] = 0.5 * ( v_gr[K_F_YZ] - v_gr[K_F_ZY] );
|
||||
vort[K_V_ZX] = 0.5 * ( v_gr[K_F_ZX] - v_gr[K_F_XZ] );
|
||||
|
||||
// calculate the rates of rotation via gauss elimination.
|
||||
|
||||
ScalarCompact z1 = str_ten[K_S_XY] * stretch[K_S_ZX] -
|
||||
str_ten[K_S_ZX] * stretch[K_S_XY] +
|
||||
str_ten[K_S_YY] * stretch[K_S_YZ] -
|
||||
str_ten[K_S_YZ] * stretch[K_S_YY] +
|
||||
str_ten[K_S_YZ] * stretch[K_S_ZZ] -
|
||||
str_ten[K_S_ZZ] * stretch[K_S_YZ];
|
||||
|
||||
ScalarCompact z2 = str_ten[K_S_ZX] * stretch[K_S_XX] -
|
||||
str_ten[K_S_XX] * stretch[K_S_ZX] +
|
||||
str_ten[K_S_YZ] * stretch[K_S_XY] -
|
||||
str_ten[K_S_XY] * stretch[K_S_YZ] +
|
||||
str_ten[K_S_ZZ] * stretch[K_S_ZX] -
|
||||
str_ten[K_S_ZX] * stretch[K_S_ZZ];
|
||||
|
||||
ScalarCompact z3 = str_ten[K_S_XX] * stretch[K_S_XY] -
|
||||
str_ten[K_S_XY] * stretch[K_S_XX] +
|
||||
str_ten[K_S_XY] * stretch[K_S_YY] -
|
||||
str_ten[K_S_YY] * stretch[K_S_XY] +
|
||||
str_ten[K_S_ZX] * stretch[K_S_YZ] -
|
||||
str_ten[K_S_YZ] * stretch[K_S_ZX];
|
||||
|
||||
{
|
||||
// forward elimination
|
||||
|
||||
const ScalarCompact a1inv = 1.0 / (stretch[K_S_YY] + stretch[K_S_ZZ]);
|
||||
const ScalarCompact a4BYa1 = -1 * stretch[K_S_XY] * a1inv;
|
||||
const ScalarCompact a2inv = 1.0 / (stretch[K_S_ZZ] + stretch[K_S_XX] + stretch[K_S_XY] * a4BYa1);
|
||||
|
||||
const ScalarCompact a5 = -stretch[K_S_YZ] + stretch[K_S_ZX] * a4BYa1;
|
||||
|
||||
z2 -= z1 * a4BYa1;
|
||||
const ScalarCompact a6BYa1 = -1 * stretch[K_S_ZX] * a1inv;
|
||||
const ScalarCompact a5BYa2 = a5 * a2inv;
|
||||
z3 -= z1 * a6BYa1 - z2 * a5BYa2;
|
||||
|
||||
// backward substitution -
|
||||
|
||||
z3 /= (stretch[K_S_XX] + stretch[K_S_YY] + stretch[K_S_ZX] * a6BYa1 + a5 * a5BYa2);
|
||||
z2 = (z2 - a5 * z3) * a2inv;
|
||||
z1 = (z1*a1inv - a6BYa1 * z3 -a4BYa1 * z2);
|
||||
}
|
||||
|
||||
// calculate rotation rates - recall that spin_rate is an asymmetric tensor,
|
||||
// so compute spin rate vector as dual of spin rate tensor,
|
||||
// i.e w_i = e_ijk * spin_rate_jk
|
||||
|
||||
z1 += vort[K_V_YZ];
|
||||
z2 += vort[K_V_ZX];
|
||||
z3 += vort[K_V_XY];
|
||||
|
||||
{
|
||||
// update rotation tensor:
|
||||
// 1) premultiply old rotation tensor to get right-hand side.
|
||||
|
||||
ScalarCompact r_XX = rot[K_F_XX] + dt_half*( z3 * rot[K_F_YX] - z2 * rot[K_F_ZX] );
|
||||
ScalarCompact r_YX = rot[K_F_YX] + dt_half*( z1 * rot[K_F_ZX] - z3 * rot[K_F_XX] );
|
||||
ScalarCompact r_ZX = rot[K_F_ZX] + dt_half*( z2 * rot[K_F_XX] - z1 * rot[K_F_YX] );
|
||||
ScalarCompact r_XY = rot[K_F_XY] + dt_half*( z3 * rot[K_F_YY] - z2 * rot[K_F_ZY] );
|
||||
ScalarCompact r_YY = rot[K_F_YY] + dt_half*( z1 * rot[K_F_ZY] - z3 * rot[K_F_XY] );
|
||||
ScalarCompact r_ZY = rot[K_F_ZY] + dt_half*( z2 * rot[K_F_XY] - z1 * rot[K_F_YY] );
|
||||
ScalarCompact r_XZ = rot[K_F_XZ] + dt_half*( z3 * rot[K_F_YZ] - z2 * rot[K_F_ZZ] );
|
||||
ScalarCompact r_YZ = rot[K_F_YZ] + dt_half*( z1 * rot[K_F_ZZ] - z3 * rot[K_F_XZ] );
|
||||
ScalarCompact r_ZZ = rot[K_F_ZZ] + dt_half*( z2 * rot[K_F_XZ] - z1 * rot[K_F_YZ] );
|
||||
|
||||
|
||||
// 2) solve for new rotation tensor via gauss elimination.
|
||||
// forward elimination -
|
||||
|
||||
const ScalarCompact a12 = - dt_half * z3;
|
||||
const ScalarCompact a13 = dt_half * z2;
|
||||
ScalarCompact b32 = - dt_half * z1;
|
||||
const ScalarCompact a22inv = 1.0 / (1.0 + a12 * a12);
|
||||
|
||||
const ScalarCompact a13a12 = a13*a12;
|
||||
const ScalarCompact a23 = b32 + a13a12;
|
||||
|
||||
r_YX += r_XX * a12;
|
||||
r_YY += r_XY * a12;
|
||||
r_YZ += r_XZ * a12;
|
||||
|
||||
b32 = (b32 - a13a12) * a22inv;
|
||||
|
||||
r_ZX += r_XX * a13 + r_YX * b32;
|
||||
r_ZY += r_XY * a13 + r_YY * b32;
|
||||
r_ZZ += r_XZ * a13 + r_YZ * b32;
|
||||
|
||||
// backward substitution -
|
||||
|
||||
const ScalarCompact a33inv = 1.0 / (1.0 + a13 * a13 + a23 * b32);
|
||||
|
||||
rot[K_F_ZX] = r_ZX * a33inv;
|
||||
rot[K_F_ZY] = r_ZY * a33inv;
|
||||
rot[K_F_ZZ] = r_ZZ * a33inv;
|
||||
rot[K_F_YX] = ( r_YX - rot[K_F_ZX] * a23 ) * a22inv;
|
||||
rot[K_F_YY] = ( r_YY - rot[K_F_ZY] * a23 ) * a22inv;
|
||||
rot[K_F_YZ] = ( r_YZ - rot[K_F_ZZ] * a23 ) * a22inv;
|
||||
rot[K_F_XX] = r_XX - rot[K_F_ZX] * a13 - rot[K_F_YX] * a12;
|
||||
rot[K_F_XY] = r_XY - rot[K_F_ZY] * a13 - rot[K_F_YY] * a12;
|
||||
rot[K_F_XZ] = r_XZ - rot[K_F_ZZ] * a13 - rot[K_F_YZ] * a12;
|
||||
}
|
||||
|
||||
// update stretch tensor in the new configuration -
|
||||
|
||||
const ScalarCompact a1 = str_ten[K_S_XY] + vort[K_V_XY];
|
||||
const ScalarCompact a2 = str_ten[K_S_YZ] + vort[K_V_YZ];
|
||||
const ScalarCompact a3 = str_ten[K_S_ZX] + vort[K_V_ZX];
|
||||
const ScalarCompact b1 = str_ten[K_S_ZX] - vort[K_V_ZX];
|
||||
const ScalarCompact b2 = str_ten[K_S_XY] - vort[K_V_XY];
|
||||
const ScalarCompact b3 = str_ten[K_S_YZ] - vort[K_V_YZ];
|
||||
|
||||
const ScalarCompact s_XX = stretch[K_S_XX];
|
||||
const ScalarCompact s_YY = stretch[K_S_YY];
|
||||
const ScalarCompact s_ZZ = stretch[K_S_ZZ];
|
||||
const ScalarCompact s_XY = stretch[K_S_XY];
|
||||
const ScalarCompact s_YZ = stretch[K_S_YZ];
|
||||
const ScalarCompact s_ZX = stretch[K_S_ZX];
|
||||
|
||||
stretch[K_S_XX] += dt * (str_ten[K_S_XX] * s_XX + ( a1 + z3 ) * s_XY + ( b1 - z2 ) * s_ZX);
|
||||
stretch[K_S_YY] += dt * (str_ten[K_S_YY] * s_YY + ( a2 + z1 ) * s_YZ + ( b2 - z3 ) * s_XY);
|
||||
stretch[K_S_ZZ] += dt * (str_ten[K_S_ZZ] * s_ZZ + ( a3 + z2 ) * s_ZX + ( b3 - z1 ) * s_YZ);
|
||||
stretch[K_S_XY] += dt * (str_ten[K_S_XX] * s_XY + ( a1 ) * s_YY + ( b1 ) * s_YZ - z3 * s_XX + z1 * s_ZX);
|
||||
stretch[K_S_YZ] += dt * (str_ten[K_S_YY] * s_YZ + ( a2 ) * s_ZZ + ( b2 ) * s_ZX - z1 * s_YY + z2 * s_XY);
|
||||
stretch[K_S_ZX] += dt * (str_ten[K_S_ZZ] * s_ZX + ( a3 ) * s_XX + ( b3 ) * s_XY - z2 * s_ZZ + z3 * s_YZ);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarCompact >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void rotate_tensor( const ScalarCompact str_ten[] ,
|
||||
const ScalarCompact rot[] ,
|
||||
ScalarCompact rot_str[] )
|
||||
{
|
||||
ScalarCompact t[9];
|
||||
|
||||
t[0] = str_ten[K_S_XX]*rot[K_F_XX] + str_ten[K_S_XY]*rot[K_F_YX] + str_ten[K_S_XZ]*rot[K_F_ZX];
|
||||
t[1] = str_ten[K_S_YX]*rot[K_F_XX] + str_ten[K_S_YY]*rot[K_F_YX] + str_ten[K_S_YZ]*rot[K_F_ZX];
|
||||
t[2] = str_ten[K_S_ZX]*rot[K_F_XX] + str_ten[K_S_ZY]*rot[K_F_YX] + str_ten[K_S_ZZ]*rot[K_F_ZX];
|
||||
|
||||
t[3] = str_ten[K_S_XX]*rot[K_F_XY] + str_ten[K_S_XY]*rot[K_F_YY] + str_ten[K_S_XZ]*rot[K_F_ZY];
|
||||
t[4] = str_ten[K_S_YX]*rot[K_F_XY] + str_ten[K_S_YY]*rot[K_F_YY] + str_ten[K_S_YZ]*rot[K_F_ZY];
|
||||
t[5] = str_ten[K_S_ZX]*rot[K_F_XY] + str_ten[K_S_ZY]*rot[K_F_YY] + str_ten[K_S_ZZ]*rot[K_F_ZY];
|
||||
|
||||
t[6] = str_ten[K_S_XX]*rot[K_F_XZ] + str_ten[K_S_XY]*rot[K_F_YZ] + str_ten[K_S_XZ]*rot[K_F_ZZ];
|
||||
t[7] = str_ten[K_S_YX]*rot[K_F_XZ] + str_ten[K_S_YY]*rot[K_F_YZ] + str_ten[K_S_YZ]*rot[K_F_ZZ];
|
||||
t[8] = str_ten[K_S_ZX]*rot[K_F_XZ] + str_ten[K_S_ZY]*rot[K_F_YZ] + str_ten[K_S_ZZ]*rot[K_F_ZZ];
|
||||
|
||||
|
||||
rot_str[ K_S_XX ] = rot[K_F_XX] * t[0] + rot[K_F_YX] * t[1] + rot[K_F_ZX] * t[2];
|
||||
rot_str[ K_S_YY ] = rot[K_F_XY] * t[3] + rot[K_F_YY] * t[4] + rot[K_F_ZY] * t[5];
|
||||
rot_str[ K_S_ZZ ] = rot[K_F_XZ] * t[6] + rot[K_F_YZ] * t[7] + rot[K_F_ZZ] * t[8];
|
||||
|
||||
rot_str[ K_S_XY ] = rot[K_F_XX] * t[3] + rot[K_F_YX] * t[4] + rot[K_F_ZX] * t[5];
|
||||
rot_str[ K_S_YZ ] = rot[K_F_XY] * t[6] + rot[K_F_YY] * t[7] + rot[K_F_ZY] * t[8];
|
||||
rot_str[ K_S_ZX ] = rot[K_F_XZ] * t[0] + rot[K_F_YZ] * t[1] + rot[K_F_ZZ] * t[2];
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void rotate_tensor_backward( const ScalarPrecise stress[] ,
|
||||
const ScalarCompact rot[] ,
|
||||
ScalarCompact rot_stress[] )
|
||||
{
|
||||
ScalarCompact t[9] ;
|
||||
|
||||
t[0] = stress[K_S_XX]*rot[K_F_XX]+ stress[K_S_XY]*rot[K_F_XY]+ stress[K_S_XZ]*rot[K_F_XZ];
|
||||
t[1] = stress[K_S_YX]*rot[K_F_XX]+ stress[K_S_YY]*rot[K_F_XY]+ stress[K_S_YZ]*rot[K_F_XZ];
|
||||
t[2] = stress[K_S_ZX]*rot[K_F_XX]+ stress[K_S_ZY]*rot[K_F_XY]+ stress[K_S_ZZ]*rot[K_F_XZ];
|
||||
t[3] = stress[K_S_XX]*rot[K_F_YX]+ stress[K_S_XY]*rot[K_F_YY]+ stress[K_S_XZ]*rot[K_F_YZ];
|
||||
t[4] = stress[K_S_YX]*rot[K_F_YX]+ stress[K_S_YY]*rot[K_F_YY]+ stress[K_S_YZ]*rot[K_F_YZ];
|
||||
t[5] = stress[K_S_ZX]*rot[K_F_YX]+ stress[K_S_ZY]*rot[K_F_YY]+ stress[K_S_ZZ]*rot[K_F_YZ];
|
||||
t[6] = stress[K_S_XX]*rot[K_F_ZX]+ stress[K_S_XY]*rot[K_F_ZY]+ stress[K_S_XZ]*rot[K_F_ZZ];
|
||||
t[7] = stress[K_S_YX]*rot[K_F_ZX]+ stress[K_S_YY]*rot[K_F_ZY]+ stress[K_S_YZ]*rot[K_F_ZZ];
|
||||
t[8] = stress[K_S_ZX]*rot[K_F_ZX]+ stress[K_S_ZY]*rot[K_F_ZY]+ stress[K_S_ZZ]*rot[K_F_ZZ];
|
||||
|
||||
rot_stress[ K_S_XX ] = rot[K_F_XX]*t[0] + rot[K_F_XY]*t[1] + rot[K_F_XZ]*t[2];
|
||||
rot_stress[ K_S_YY ] = rot[K_F_YX]*t[3] + rot[K_F_YY]*t[4] + rot[K_F_YZ]*t[5];
|
||||
rot_stress[ K_S_ZZ ] = rot[K_F_ZX]*t[6] + rot[K_F_ZY]*t[7] + rot[K_F_ZZ]*t[8];
|
||||
|
||||
rot_stress[ K_S_XY ] = rot[K_F_XX]*t[3] + rot[K_F_XY]*t[4] + rot[K_F_XZ]*t[5];
|
||||
rot_stress[ K_S_YZ ] = rot[K_F_YX]*t[6] + rot[K_F_YY]*t[7] + rot[K_F_YZ]*t[8];
|
||||
rot_stress[ K_S_ZX ] = rot[K_F_ZX]*t[0] + rot[K_F_ZY]*t[1] + rot[K_F_ZZ]*t[2];
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
KOKKOS_INLINE_FUNCTION static
|
||||
void update_stress( const float dt ,
|
||||
const float two_mu ,
|
||||
const float bulk_modulus ,
|
||||
const ScalarCompact rot_str[] ,
|
||||
ScalarPrecise stress[] )
|
||||
{
|
||||
const ScalarCompact e = rot_str[ K_S_XX ] + rot_str[ K_S_YY ] + rot_str[ K_S_ZZ ] ;
|
||||
const ScalarCompact eb = e * bulk_modulus ;
|
||||
const ScalarCompact e3 = e / 3.0 ;
|
||||
|
||||
stress[K_S_XX] += dt * ( two_mu * ( rot_str[K_S_XX] - e3 ) + eb );
|
||||
stress[K_S_YY] += dt * ( two_mu * ( rot_str[K_S_YY] - e3 ) + eb );
|
||||
stress[K_S_ZZ] += dt * ( two_mu * ( rot_str[K_S_ZZ] - e3 ) + eb );
|
||||
|
||||
stress[K_S_XY] += dt * two_mu * rot_str[K_S_XY];
|
||||
stress[K_S_YZ] += dt * two_mu * rot_str[K_S_YZ];
|
||||
stress[K_S_ZX] += dt * two_mu * rot_str[K_S_ZX];
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
template< class ScalarPrecise ,
|
||||
class ScalarCompact >
|
||||
static KOKKOS_INLINE_FUNCTION
|
||||
void comp_force( const ScalarPrecise vx[] ,
|
||||
const ScalarPrecise vy[] ,
|
||||
const ScalarPrecise vz[] ,
|
||||
const ScalarCompact grad_x[] ,
|
||||
const ScalarCompact grad_y[] ,
|
||||
const ScalarCompact grad_z[] ,
|
||||
const ScalarCompact total_stress12th[] ,
|
||||
ScalarCompact force[][ SpatialDim ] ,
|
||||
ScalarCompact & energy )
|
||||
{
|
||||
ScalarPrecise internal_energy = 0 ;
|
||||
|
||||
for ( unsigned inode = 0; inode < ElemNodeCount ; ++inode ) {
|
||||
|
||||
force[inode][0] = total_stress12th[K_S_XX] * grad_x[inode] +
|
||||
total_stress12th[K_S_XY] * grad_y[inode] +
|
||||
total_stress12th[K_S_XZ] * grad_z[inode] ;
|
||||
|
||||
force[inode][1] = total_stress12th[K_S_YX] * grad_x[inode] +
|
||||
total_stress12th[K_S_YY] * grad_y[inode] +
|
||||
total_stress12th[K_S_YZ] * grad_z[inode] ;
|
||||
|
||||
force[inode][2] = total_stress12th[K_S_ZX] * grad_x[inode] +
|
||||
total_stress12th[K_S_ZY] * grad_y[inode] +
|
||||
total_stress12th[K_S_ZZ] * grad_z[inode] ;
|
||||
|
||||
internal_energy += force[inode][0] * vx[inode] +
|
||||
force[inode][1] * vy[inode] +
|
||||
force[inode][2] * vz[inode] ;
|
||||
}
|
||||
|
||||
energy = internal_energy ;
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
};
|
||||
|
||||
} // namespace Explicit
|
||||
|
||||
#endif /* #ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP */
|
||||
|
||||
@ -1,341 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef HYBRIDFEM_IMPLICIT_HPP
|
||||
#define HYBRIDFEM_IMPLICIT_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <SparseLinearSystem.hpp>
|
||||
#include <SparseLinearSystemFill.hpp>
|
||||
#include <ImplicitFunctors.hpp>
|
||||
#include <FEMesh.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Implicit {
|
||||
|
||||
struct PerformanceData {
|
||||
double mesh_time ;
|
||||
double graph_time ;
|
||||
double elem_time ;
|
||||
double matrix_gather_fill_time ;
|
||||
double matrix_boundary_condition_time ;
|
||||
double cg_iteration_time ;
|
||||
|
||||
PerformanceData()
|
||||
: mesh_time(0)
|
||||
, graph_time(0)
|
||||
, elem_time(0)
|
||||
, matrix_gather_fill_time(0)
|
||||
, matrix_boundary_condition_time(0)
|
||||
, cg_iteration_time(0)
|
||||
{}
|
||||
|
||||
void best( const PerformanceData & rhs )
|
||||
{
|
||||
mesh_time = std::min( mesh_time , rhs.mesh_time );
|
||||
graph_time = std::min( graph_time , rhs.graph_time );
|
||||
elem_time = std::min( elem_time , rhs.elem_time );
|
||||
matrix_gather_fill_time = std::min( matrix_gather_fill_time , rhs.matrix_gather_fill_time );
|
||||
matrix_boundary_condition_time = std::min( matrix_boundary_condition_time , rhs.matrix_boundary_condition_time );
|
||||
cg_iteration_time = std::min( cg_iteration_time , rhs.cg_iteration_time );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class FixtureType >
|
||||
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
|
||||
const int , // global_max_x ,
|
||||
const int , // global_max_y ,
|
||||
const int global_max_z ,
|
||||
const bool print_sample )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef FixtureType fixture_type ;
|
||||
typedef typename fixture_type::execution_space execution_space;
|
||||
//typedef typename execution_space::size_type size_type ; // unused
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
typedef typename fixture_type::coordinate_scalar_type coordinate_scalar_type ;
|
||||
|
||||
enum { ElementNodeCount = fixture_type::element_node_count };
|
||||
|
||||
const comm::Machine machine = mesh.parallel_data_map.machine ;
|
||||
|
||||
const size_t element_count = mesh.elem_node_ids.dimension_0();
|
||||
|
||||
const size_t iteration_limit = 200 ;
|
||||
const double residual_tolerance = 1e-14 ;
|
||||
|
||||
size_t iteration_count = 0 ;
|
||||
double residual_norm = 0 ;
|
||||
|
||||
PerformanceData perf_data ;
|
||||
|
||||
//------------------------------------
|
||||
// Sparse linear system types:
|
||||
|
||||
typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
|
||||
typedef Kokkos::CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
typedef typename matrix_type::graph_type matrix_graph_type ;
|
||||
typedef typename matrix_type::coefficients_type matrix_coefficients_type ;
|
||||
|
||||
typedef GraphFactory< matrix_graph_type , mesh_type > graph_factory ;
|
||||
|
||||
//------------------------------------
|
||||
// Problem setup types:
|
||||
|
||||
typedef ElementComputation< scalar_type , scalar_type , execution_space > ElementFunctor ;
|
||||
typedef DirichletBoundary< scalar_type , scalar_type , execution_space > BoundaryFunctor ;
|
||||
|
||||
typedef typename ElementFunctor::elem_matrices_type elem_matrices_type ;
|
||||
typedef typename ElementFunctor::elem_vectors_type elem_vectors_type ;
|
||||
|
||||
typedef GatherFill< matrix_type ,
|
||||
mesh_type ,
|
||||
elem_matrices_type ,
|
||||
elem_vectors_type > GatherFillFunctor ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
const scalar_type elem_coeff_K = 2 ;
|
||||
const scalar_type elem_load_Q = 1 ;
|
||||
|
||||
matrix_type linsys_matrix ;
|
||||
vector_type linsys_rhs ;
|
||||
vector_type linsys_solution ;
|
||||
|
||||
typename graph_factory::element_map_type element_map ;
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
//------------------------------------
|
||||
// Generate sparse matrix graph and element->graph map.
|
||||
|
||||
graph_factory::create( mesh , linsys_matrix.graph , element_map );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.graph_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Allocate linear system coefficients and rhs:
|
||||
|
||||
const size_t local_owned_length =
|
||||
linsys_matrix.graph.row_map.dimension_0() - 1 ;
|
||||
|
||||
linsys_matrix.coefficients =
|
||||
matrix_coefficients_type( "coeff" , linsys_matrix.graph.entries.dimension_0() );
|
||||
|
||||
linsys_rhs = vector_type( "rhs" , local_owned_length );
|
||||
linsys_solution = vector_type( "solution" , local_owned_length );
|
||||
|
||||
//------------------------------------
|
||||
// Fill linear system
|
||||
{
|
||||
elem_matrices_type elem_matrices ;
|
||||
elem_vectors_type elem_vectors ;
|
||||
|
||||
if ( element_count ) {
|
||||
elem_matrices = elem_matrices_type( std::string("elem_matrices"), element_count );
|
||||
elem_vectors = elem_vectors_type ( std::string("elem_vectors"), element_count );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Compute element matrices and vectors:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
ElementFunctor::apply( mesh ,
|
||||
elem_matrices , elem_vectors ,
|
||||
elem_coeff_K , elem_load_Q );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.elem_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Fill linear system coefficients:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
GatherFillFunctor::apply( linsys_matrix , linsys_rhs ,
|
||||
mesh , element_map , elem_matrices , elem_vectors );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.matrix_gather_fill_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
// Apply boundary conditions:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
BoundaryFunctor::apply( linsys_matrix , linsys_rhs , mesh ,
|
||||
0 , global_max_z , 0 , global_max_z );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.matrix_boundary_condition_time = comm::max( machine , wall_clock.seconds() );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Solve linear sytem
|
||||
|
||||
cgsolve( mesh.parallel_data_map ,
|
||||
linsys_matrix , linsys_rhs , linsys_solution ,
|
||||
iteration_count , residual_norm ,
|
||||
perf_data.cg_iteration_time ,
|
||||
iteration_limit , residual_tolerance );
|
||||
|
||||
//------------------------------------
|
||||
|
||||
if ( print_sample ) {
|
||||
|
||||
typename mesh_type::node_coords_type::HostMirror coords_h =
|
||||
Kokkos::create_mirror( mesh.node_coords );
|
||||
|
||||
typename vector_type::HostMirror X_h =
|
||||
Kokkos::create_mirror( linsys_solution );
|
||||
|
||||
Kokkos::deep_copy( coords_h , mesh.node_coords );
|
||||
Kokkos::deep_copy( X_h , linsys_solution );
|
||||
|
||||
for ( size_t i = 0 ; i < mesh.parallel_data_map.count_owned ; ++i ) {
|
||||
const coordinate_scalar_type x = coords_h(i,0);
|
||||
const coordinate_scalar_type y = coords_h(i,1);
|
||||
const coordinate_scalar_type z = coords_h(i,2);
|
||||
|
||||
if ( x <= 0 && y <= 0 ) {
|
||||
std::cout << " node( " << x << " " << y << " " << z << " ) = "
|
||||
<< X_h(i) << std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return perf_data ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class Device >
|
||||
void driver( const char * const label ,
|
||||
comm::Machine machine ,
|
||||
const int gang_count ,
|
||||
const int elem_count_beg ,
|
||||
const int elem_count_end ,
|
||||
const int runs )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef Device execution_space ;
|
||||
typedef double coordinate_scalar_type ;
|
||||
typedef FixtureElementHex8 fixture_element_type ;
|
||||
|
||||
typedef BoxMeshFixture< coordinate_scalar_type ,
|
||||
execution_space ,
|
||||
fixture_element_type > fixture_type ;
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
|
||||
const size_t proc_count = comm::size( machine );
|
||||
const size_t proc_rank = comm::rank( machine );
|
||||
|
||||
if ( elem_count_beg == 0 || elem_count_end == 0 || runs == 0 ) return ;
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
std::cout << std::endl ;
|
||||
std::cout << "\"Kokkos::HybridFE::Implicit " << label << "\"" << std::endl;
|
||||
std::cout << "\"Size\" , \"Graphing\" , \"Element\" , \"Fill\" , \"Boundary\" , \"CG-Iter\"" << std::endl
|
||||
<< "\"elems\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\"" << std::endl ;
|
||||
}
|
||||
|
||||
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
|
||||
{
|
||||
const int ix = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
|
||||
const int iy = ix + 1 ;
|
||||
const int iz = 2 * iy ;
|
||||
const int n = ix * iy * iz ;
|
||||
|
||||
mesh_type mesh =
|
||||
fixture_type::create( proc_count , proc_rank , gang_count ,
|
||||
ix , iy , iz );
|
||||
|
||||
mesh.parallel_data_map.machine = machine ;
|
||||
|
||||
PerformanceData perf_data , perf_best ;
|
||||
|
||||
for(int j = 0; j < runs; j++){
|
||||
|
||||
perf_data = run<scalar_type,fixture_type>(mesh,ix,iy,iz, false );
|
||||
|
||||
if( j == 0 ) {
|
||||
perf_best = perf_data ;
|
||||
}
|
||||
else {
|
||||
perf_best.best( perf_data );
|
||||
}
|
||||
}
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
|
||||
std::cout << std::setw(8) << n << " , "
|
||||
<< std::setw(10) << perf_best.graph_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.elem_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.matrix_gather_fill_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.matrix_boundary_condition_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.cg_iteration_time * 1000
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Implicit */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
|
||||
#endif /* #ifndef HYBRIDFEM_IMPLICIT_HPP */
|
||||
|
||||
@ -1,585 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Implicit {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , unsigned Dim , unsigned N >
|
||||
struct TensorIntegration ;
|
||||
|
||||
template<typename Scalar >
|
||||
struct TensorIntegration<Scalar,1,1> {
|
||||
Scalar pts[1] ;
|
||||
Scalar wts[1] ;
|
||||
|
||||
TensorIntegration() { pts[0] = 0 ; wts[0] = 2 ; }
|
||||
};
|
||||
|
||||
template<typename Scalar >
|
||||
struct TensorIntegration<Scalar,1,2>
|
||||
{
|
||||
Scalar pts[2] ;
|
||||
Scalar wts[2] ;
|
||||
|
||||
TensorIntegration()
|
||||
{
|
||||
const Scalar x2 = 0.577350269 ;
|
||||
pts[0] = -x2; wts[0] = 1.0;
|
||||
pts[1] = x2; wts[1] = 1.0;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Scalar >
|
||||
struct TensorIntegration<Scalar,1,3>
|
||||
{
|
||||
Scalar pts[3] ;
|
||||
Scalar wts[3] ;
|
||||
|
||||
TensorIntegration()
|
||||
{
|
||||
const Scalar x3 = 0.774596669 ;
|
||||
const Scalar w1 = 0.555555556 ;
|
||||
const Scalar w2 = 0.888888889 ;
|
||||
pts[0] = -x3 ; wts[0] = w1 ;
|
||||
pts[1] = 0 ; wts[1] = w2 ;
|
||||
pts[2] = x3 ; wts[2] = w1 ;
|
||||
}
|
||||
};
|
||||
|
||||
template< typename Scalar , unsigned Order >
|
||||
struct TensorIntegration<Scalar,3,Order>
|
||||
{
|
||||
static const unsigned N = Order * Order * Order ;
|
||||
|
||||
Scalar pts[N][3] ;
|
||||
Scalar wts[N];
|
||||
|
||||
TensorIntegration()
|
||||
{
|
||||
TensorIntegration<Scalar,1,Order> oneD ;
|
||||
|
||||
unsigned n = 0 ;
|
||||
for ( unsigned k = 0 ; k < Order ; ++k ) {
|
||||
for ( unsigned j = 0 ; j < Order ; ++j ) {
|
||||
for ( unsigned i = 0 ; i < Order ; ++i , ++n ) {
|
||||
pts[n][0] = oneD.pts[i] ;
|
||||
pts[n][1] = oneD.pts[j] ;
|
||||
pts[n][2] = oneD.pts[k] ;
|
||||
wts[n] = oneD.wts[i] * oneD.wts[j] * oneD.wts[k] ;
|
||||
}}}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar >
|
||||
struct ShapeFunctionEvaluation {
|
||||
|
||||
static const unsigned FunctionCount = 8 ;
|
||||
static const unsigned SpatialDimension = 3 ;
|
||||
static const unsigned IntegrationOrder = 2 ;
|
||||
|
||||
typedef TensorIntegration< Scalar , SpatialDimension , IntegrationOrder >
|
||||
TensorIntegrationType ;
|
||||
|
||||
static const unsigned PointCount = TensorIntegrationType::N ;
|
||||
|
||||
Scalar value [ PointCount ][ FunctionCount ] ;
|
||||
Scalar gradient[ PointCount ][ FunctionCount * SpatialDimension ];
|
||||
Scalar weight [ PointCount ];
|
||||
|
||||
ShapeFunctionEvaluation()
|
||||
{
|
||||
const TensorIntegration< Scalar , SpatialDimension , IntegrationOrder >
|
||||
integration ;
|
||||
|
||||
const Scalar ONE8TH = 0.125 ;
|
||||
|
||||
for ( unsigned i = 0 ; i < PointCount ; ++i ) {
|
||||
|
||||
const Scalar u = 1.0 - integration.pts[i][0];
|
||||
const Scalar v = 1.0 - integration.pts[i][1];
|
||||
const Scalar w = 1.0 - integration.pts[i][2];
|
||||
|
||||
const Scalar up1 = 1.0 + integration.pts[i][0];
|
||||
const Scalar vp1 = 1.0 + integration.pts[i][1];
|
||||
const Scalar wp1 = 1.0 + integration.pts[i][2];
|
||||
|
||||
weight[i] = integration.wts[i] ;
|
||||
|
||||
// Vaues:
|
||||
value[i][0] = ONE8TH * u * v * w ;
|
||||
value[i][1] = ONE8TH * up1 * v * w ;
|
||||
value[i][2] = ONE8TH * up1 * vp1 * w ;
|
||||
value[i][3] = ONE8TH * u * vp1 * w ;
|
||||
|
||||
value[i][4] = ONE8TH * u * v * wp1 ;
|
||||
value[i][5] = ONE8TH * up1 * v * wp1 ;
|
||||
value[i][6] = ONE8TH * up1 * vp1 * wp1 ;
|
||||
value[i][7] = ONE8TH * u * vp1 * wp1 ;
|
||||
|
||||
//fn 0 = u * v * w
|
||||
gradient[i][ 0] = ONE8TH * -1 * v * w ;
|
||||
gradient[i][ 1] = ONE8TH * u * -1 * w ;
|
||||
gradient[i][ 2] = ONE8TH * u * v * -1 ;
|
||||
|
||||
//fn 1 = up1 * v * w
|
||||
gradient[i][ 3] = ONE8TH * 1 * v * w ;
|
||||
gradient[i][ 4] = ONE8TH * up1 * -1 * w ;
|
||||
gradient[i][ 5] = ONE8TH * up1 * v * -1 ;
|
||||
|
||||
//fn 2 = up1 * vp1 * w
|
||||
gradient[i][ 6] = ONE8TH * 1 * vp1 * w ;
|
||||
gradient[i][ 7] = ONE8TH * up1 * 1 * w ;
|
||||
gradient[i][ 8] = ONE8TH * up1 * vp1 * -1 ;
|
||||
|
||||
//fn 3 = u * vp1 * w
|
||||
gradient[i][ 9] = ONE8TH * -1 * vp1 * w ;
|
||||
gradient[i][10] = ONE8TH * u * 1 * w ;
|
||||
gradient[i][11] = ONE8TH * u * vp1 * -1 ;
|
||||
|
||||
//fn 4 = u * v * wp1
|
||||
gradient[i][12] = ONE8TH * -1 * v * wp1 ;
|
||||
gradient[i][13] = ONE8TH * u * -1 * wp1 ;
|
||||
gradient[i][14] = ONE8TH * u * v * 1 ;
|
||||
|
||||
//fn 5 = up1 * v * wp1
|
||||
gradient[i][15] = ONE8TH * 1 * v * wp1 ;
|
||||
gradient[i][16] = ONE8TH * up1 * -1 * wp1 ;
|
||||
gradient[i][17] = ONE8TH * up1 * v * 1 ;
|
||||
|
||||
//fn 6 = up1 * vp1 * wp1
|
||||
gradient[i][18] = ONE8TH * 1 * vp1 * wp1 ;
|
||||
gradient[i][19] = ONE8TH * up1 * 1 * wp1 ;
|
||||
gradient[i][20] = ONE8TH * up1 * vp1 * 1 ;
|
||||
|
||||
//fn 7 = u * vp1 * wp1
|
||||
gradient[i][21] = ONE8TH * -1 * vp1 * wp1 ;
|
||||
gradient[i][22] = ONE8TH * u * 1 * wp1 ;
|
||||
gradient[i][23] = ONE8TH * u * vp1 * 1 ;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarType , typename ScalarCoordType , class DeviceType >
|
||||
struct ElementComputation
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef ScalarType scalar_type ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const size_type ElementNodeCount = 8 ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
typedef Kokkos::View< scalar_type[][ElementNodeCount][ElementNodeCount] , execution_space > elem_matrices_type ;
|
||||
typedef Kokkos::View< scalar_type[][ElementNodeCount] , execution_space > elem_vectors_type ;
|
||||
|
||||
typedef ShapeFunctionEvaluation< scalar_type > shape_function_data ;
|
||||
|
||||
static const unsigned SpatialDim = shape_function_data::SpatialDimension ;
|
||||
static const unsigned FunctionCount = shape_function_data::FunctionCount ;
|
||||
|
||||
private:
|
||||
|
||||
const shape_function_data shape_eval ;
|
||||
typename mesh_type::elem_node_ids_type elem_node_ids ;
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
elem_matrices_type element_matrices ;
|
||||
elem_vectors_type element_vectors ;
|
||||
scalar_type coeff_K ;
|
||||
scalar_type coeff_Q ;
|
||||
|
||||
ElementComputation( const mesh_type & arg_mesh ,
|
||||
const elem_matrices_type & arg_element_matrices ,
|
||||
const elem_vectors_type & arg_element_vectors ,
|
||||
const scalar_type arg_coeff_K ,
|
||||
const scalar_type arg_coeff_Q )
|
||||
: shape_eval()
|
||||
, elem_node_ids( arg_mesh.elem_node_ids )
|
||||
, node_coords( arg_mesh.node_coords )
|
||||
, element_matrices( arg_element_matrices )
|
||||
, element_vectors( arg_element_vectors )
|
||||
, coeff_K( arg_coeff_K )
|
||||
, coeff_Q( arg_coeff_Q )
|
||||
{}
|
||||
|
||||
public:
|
||||
|
||||
static void apply( const mesh_type & mesh ,
|
||||
const elem_matrices_type & elem_matrices ,
|
||||
const elem_vectors_type & elem_vectors ,
|
||||
const scalar_type elem_coeff_K ,
|
||||
const scalar_type elem_coeff_Q )
|
||||
{
|
||||
ElementComputation comp( mesh , elem_matrices , elem_vectors , elem_coeff_K , elem_coeff_Q );
|
||||
const size_t elem_count = mesh.elem_node_ids.dimension_0();
|
||||
|
||||
parallel_for( elem_count , comp );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_jacobian =
|
||||
FunctionCount * SpatialDim * SpatialDim * 2 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void jacobian( const ScalarCoordType * x,
|
||||
const ScalarCoordType * y,
|
||||
const ScalarCoordType * z,
|
||||
const scalar_type * grad_vals,
|
||||
scalar_type * J) const
|
||||
{
|
||||
int i_grad = 0 ;
|
||||
|
||||
for( unsigned i = 0; i < ElementNodeCount ; ++i , i_grad += SpatialDim ) {
|
||||
const scalar_type g0 = grad_vals[ i_grad ];
|
||||
const scalar_type g1 = grad_vals[ i_grad + 1 ];
|
||||
const scalar_type g2 = grad_vals[ i_grad + 2 ];
|
||||
const scalar_type x0 = x[i] ;
|
||||
const scalar_type x1 = y[i] ;
|
||||
const scalar_type x2 = z[i] ;
|
||||
|
||||
J[0] += g0 * x0 ;
|
||||
J[1] += g0 * x1 ;
|
||||
J[2] += g0 * x2 ;
|
||||
|
||||
J[3] += g1 * x0 ;
|
||||
J[4] += g1 * x1 ;
|
||||
J[5] += g1 * x2 ;
|
||||
|
||||
J[6] += g2 * x0 ;
|
||||
J[7] += g2 * x1 ;
|
||||
J[8] += g2 * x2 ;
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_inverse_and_det = 46 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
scalar_type inverse_and_determinant3x3( scalar_type * const J ) const
|
||||
{
|
||||
const scalar_type J00 = J[0];
|
||||
const scalar_type J01 = J[1];
|
||||
const scalar_type J02 = J[2];
|
||||
|
||||
const scalar_type J10 = J[3];
|
||||
const scalar_type J11 = J[4];
|
||||
const scalar_type J12 = J[5];
|
||||
|
||||
const scalar_type J20 = J[6];
|
||||
const scalar_type J21 = J[7];
|
||||
const scalar_type J22 = J[8];
|
||||
|
||||
const scalar_type term0 = J22*J11 - J21*J12;
|
||||
const scalar_type term1 = J22*J01 - J21*J02;
|
||||
const scalar_type term2 = J12*J01 - J11*J02;
|
||||
|
||||
const scalar_type detJ = J00*term0 - J10*term1 + J20*term2;
|
||||
const scalar_type inv_detJ = 1.0/detJ;
|
||||
|
||||
J[0] = term0*inv_detJ;
|
||||
J[1] = -term1*inv_detJ;
|
||||
J[2] = term2*inv_detJ;
|
||||
|
||||
J[3] = -(J22*J10 - J20*J12)*inv_detJ;
|
||||
J[4] = (J22*J00 - J20*J02)*inv_detJ;
|
||||
J[5] = -(J12*J00 - J10*J02)*inv_detJ;
|
||||
|
||||
J[6] = (J21*J10 - J20*J11)*inv_detJ;
|
||||
J[7] = -(J21*J00 - J20*J01)*inv_detJ;
|
||||
J[8] = (J11*J00 - J10*J01)*inv_detJ;
|
||||
|
||||
return detJ ;
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void matTransMat3x3_X_3xn( const scalar_type * A, int n,
|
||||
const scalar_type * B,
|
||||
scalar_type * C ) const
|
||||
{
|
||||
//A is 3x3, B is 3xn. So C is also 3xn.
|
||||
//A,B,C are all assumed to be ordered such that columns are contiguous.
|
||||
|
||||
scalar_type * Cj = C;
|
||||
const scalar_type * Bj = B;
|
||||
|
||||
for(int j=0; j<n; ++j) {
|
||||
Cj[0] = A[0]*Bj[0] + A[1]*Bj[1] + A[2]*Bj[2];
|
||||
Cj[1] = A[3]*Bj[0] + A[4]*Bj[1] + A[5]*Bj[2];
|
||||
Cj[2] = A[6]*Bj[0] + A[7]*Bj[1] + A[8]*Bj[2];
|
||||
Bj += 3;
|
||||
Cj += 3;
|
||||
}
|
||||
|
||||
}
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_contributeDiffusionMatrix = FunctionCount * ( 3 * 5 + FunctionCount * 7 ) ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void contributeDiffusionMatrix(
|
||||
const scalar_type weight ,
|
||||
const scalar_type grad_vals[] ,
|
||||
const scalar_type invJ[] ,
|
||||
scalar_type elem_mat[][8] ) const
|
||||
{
|
||||
scalar_type dpsidx[8], dpsidy[8], dpsidz[8];
|
||||
|
||||
int i_grad = 0 ;
|
||||
for( unsigned i = 0; i < FunctionCount ; ++i , i_grad += 3 ) {
|
||||
const scalar_type g0 = grad_vals[i_grad+0];
|
||||
const scalar_type g1 = grad_vals[i_grad+1];
|
||||
const scalar_type g2 = grad_vals[i_grad+2];
|
||||
|
||||
dpsidx[i] = g0 * invJ[0] + g1 * invJ[1] + g2 * invJ[2];
|
||||
dpsidy[i] = g0 * invJ[3] + g1 * invJ[4] + g2 * invJ[5];
|
||||
dpsidz[i] = g0 * invJ[6] + g1 * invJ[7] + g2 * invJ[8];
|
||||
}
|
||||
|
||||
for( unsigned m = 0; m < FunctionCount; m++) {
|
||||
for( unsigned n = 0; n < FunctionCount; n++) {
|
||||
|
||||
elem_mat[m][n] += weight *
|
||||
((dpsidx[m] * dpsidx[n]) +
|
||||
(dpsidy[m] * dpsidy[n]) +
|
||||
(dpsidz[m] * dpsidz[n]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_contributeSourceVector = FunctionCount * 2 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void contributeSourceVector( const scalar_type term ,
|
||||
const scalar_type psi[] ,
|
||||
scalar_type elem_vec[] ) const
|
||||
{
|
||||
for( unsigned i=0; i< FunctionCount ; ++i) {
|
||||
elem_vec[i] += psi[i] * term ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const unsigned FLOPS_operator =
|
||||
shape_function_data::PointCount * ( 3
|
||||
+ FLOPS_jacobian
|
||||
+ FLOPS_inverse_and_det
|
||||
+ FLOPS_contributeDiffusionMatrix
|
||||
+ FLOPS_contributeSourceVector ) ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( int ielem )const {
|
||||
|
||||
scalar_type elem_vec[8] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
|
||||
scalar_type elem_mat[8][8] =
|
||||
{ { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ,
|
||||
{ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } };
|
||||
|
||||
ScalarCoordType x[8], y[8], z[8];
|
||||
|
||||
for ( int i = 0 ; i < 8 ; ++i ) {
|
||||
const int node_index = elem_node_ids( ielem , i );
|
||||
x[i] = node_coords( node_index , 0 );
|
||||
y[i] = node_coords( node_index , 1 );
|
||||
z[i] = node_coords( node_index , 2 );
|
||||
}
|
||||
|
||||
// This loop could be parallelized; however,
|
||||
// it would require additional per-thread temporaries
|
||||
// of 'elem_vec' and 'elem_mat' which would
|
||||
// consume more local memory and have to be reduced.
|
||||
|
||||
for ( unsigned i = 0 ; i < shape_function_data::PointCount ; ++i ) {
|
||||
|
||||
scalar_type J[SpatialDim*SpatialDim] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
jacobian( x, y, z, shape_eval.gradient[i] , J );
|
||||
|
||||
// Overwrite J with its inverse to save scratch memory space.
|
||||
const scalar_type detJ_w = shape_eval.weight[i] * inverse_and_determinant3x3(J);
|
||||
const scalar_type k_detJ_w = coeff_K * detJ_w ;
|
||||
const scalar_type Q_detJ_w = coeff_Q * detJ_w ;
|
||||
|
||||
contributeDiffusionMatrix( k_detJ_w , shape_eval.gradient[i] , J , elem_mat );
|
||||
|
||||
contributeSourceVector( Q_detJ_w , shape_eval.value[i] , elem_vec );
|
||||
}
|
||||
|
||||
for( size_type i=0; i< ElementNodeCount ; ++i) {
|
||||
element_vectors(ielem, i) = elem_vec[i] ;
|
||||
}
|
||||
|
||||
for( size_type i = 0; i < ElementNodeCount ; i++){
|
||||
for( size_type j = 0; j < ElementNodeCount ; j++){
|
||||
element_matrices(ielem, i, j) = elem_mat[i][j] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}; /* ElementComputation */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarType , typename ScalarCoordType , class DeviceType >
|
||||
struct DirichletBoundary
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const size_type ElementNodeCount = 8 ;
|
||||
|
||||
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
|
||||
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
matrix_type matrix ;
|
||||
vector_type rhs ;
|
||||
ScalarCoordType bc_lower_z ;
|
||||
ScalarCoordType bc_upper_z ;
|
||||
ScalarType bc_lower_value ;
|
||||
ScalarType bc_upper_value ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type inode ) const
|
||||
{
|
||||
// Apply a dirichlet boundary condition to 'irow'
|
||||
// to maintain the symmetry of the original
|
||||
// global stiffness matrix, zero out the columns
|
||||
// that correspond to boundary conditions, and
|
||||
// adjust the load vector accordingly
|
||||
|
||||
const size_type iBeg = matrix.graph.row_map[inode];
|
||||
const size_type iEnd = matrix.graph.row_map[inode+1];
|
||||
|
||||
const ScalarCoordType z = node_coords(inode,2);
|
||||
const bool bc_lower = z <= bc_lower_z ;
|
||||
const bool bc_upper = bc_upper_z <= z ;
|
||||
|
||||
if ( bc_lower || bc_upper ) {
|
||||
const ScalarType bc_value = bc_lower ? bc_lower_value
|
||||
: bc_upper_value ;
|
||||
|
||||
rhs(inode) = bc_value ; // set the rhs vector
|
||||
|
||||
// zero each value on the row, and leave a one
|
||||
// on the diagonal
|
||||
|
||||
for( size_type i = iBeg ; i < iEnd ; i++) {
|
||||
matrix.coefficients(i) =
|
||||
(int) inode == matrix.graph.entries(i) ? 1 : 0 ;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Find any columns that are boundary conditions.
|
||||
// Clear them and adjust the load vector
|
||||
|
||||
for( size_type i = iBeg ; i < iEnd ; i++ ) {
|
||||
const size_type cnode = matrix.graph.entries(i) ;
|
||||
|
||||
const ScalarCoordType zc = node_coords(cnode,2);
|
||||
const bool c_bc_lower = zc <= bc_lower_z ;
|
||||
const bool c_bc_upper = bc_upper_z <= zc ;
|
||||
|
||||
if ( c_bc_lower || c_bc_upper ) {
|
||||
|
||||
const ScalarType c_bc_value = c_bc_lower ? bc_lower_value
|
||||
: bc_upper_value ;
|
||||
|
||||
rhs( inode ) -= c_bc_value * matrix.coefficients(i);
|
||||
|
||||
matrix.coefficients(i) = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void apply( const matrix_type & linsys_matrix ,
|
||||
const vector_type & linsys_rhs ,
|
||||
const mesh_type & mesh ,
|
||||
const ScalarCoordType bc_lower_z ,
|
||||
const ScalarCoordType bc_upper_z ,
|
||||
const ScalarType bc_lower_value ,
|
||||
const ScalarType bc_upper_value )
|
||||
{
|
||||
const size_t row_count = linsys_matrix.graph.row_map.dimension_0() - 1 ;
|
||||
DirichletBoundary op ;
|
||||
op.node_coords = mesh.node_coords ;
|
||||
op.matrix = linsys_matrix ;
|
||||
op.rhs = linsys_rhs ;
|
||||
op.bc_lower_z = bc_lower_z ;
|
||||
op.bc_upper_z = bc_upper_z ;
|
||||
op.bc_lower_value = bc_lower_value ;
|
||||
op.bc_upper_value = bc_upper_value ;
|
||||
parallel_for( row_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Implicit */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
@ -1,567 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef USESCASES_LINALG_BLAS_HPP
|
||||
#define USESCASES_LINALG_BLAS_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include <ParallelComm.hpp>
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< class Scalar , class Layout , class DeviceType > struct Dot ;
|
||||
|
||||
template< class Scalar , class Layout , class DeviceType > struct Dot1 ;
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarY ,
|
||||
class Layout , class Device >
|
||||
struct Scale ;
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarY ,
|
||||
class Layout , class Device >
|
||||
struct Fill ;
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarY ,
|
||||
class Layout , class Device >
|
||||
struct AXPY ;
|
||||
|
||||
template< typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
class Layout , class Device >
|
||||
struct XPBY ;
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
typename ScalarW ,
|
||||
class Layout , class Device >
|
||||
struct WAXPBY ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
typename ScalarY /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ ,
|
||||
class MY /* Allow any management type */ >
|
||||
inline
|
||||
double dot( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
const View< ScalarY * , L , D , MY > & y ,
|
||||
comm::Machine machine )
|
||||
{
|
||||
double global_result = 0 ;
|
||||
double local_result = 0 ;
|
||||
|
||||
Impl::Dot< ScalarX , L , D >( n , x , y , local_result );
|
||||
|
||||
MPI_Allreduce( & local_result , & global_result , 1 ,
|
||||
MPI_DOUBLE , MPI_SUM , machine.mpi_comm );
|
||||
|
||||
return global_result ;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
typename ScalarY /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ ,
|
||||
class MY /* Allow any management type */ >
|
||||
inline
|
||||
double dot( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
const View< ScalarY * , L , D , MY > & y ,
|
||||
comm::Machine )
|
||||
{
|
||||
double global_result = 0 ;
|
||||
|
||||
Impl::Dot< ScalarX , L , D >( n , x , y , global_result );
|
||||
|
||||
return global_result ;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ >
|
||||
inline
|
||||
double dot( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
comm::Machine machine )
|
||||
{
|
||||
double global_result = 0 ;
|
||||
double local_result = 0 ;
|
||||
|
||||
Impl::Dot1< ScalarX , L , D >( n , x , local_result );
|
||||
|
||||
MPI_Allreduce( & local_result , & global_result , 1 ,
|
||||
MPI_DOUBLE , MPI_SUM , machine.mpi_comm );
|
||||
|
||||
return global_result ;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ >
|
||||
inline
|
||||
double dot( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
comm::Machine )
|
||||
{
|
||||
double global_result = 0 ;
|
||||
|
||||
Impl::Dot1< ScalarX , L , D >( n , x , global_result );
|
||||
|
||||
return global_result ;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarX /* Allow mix of const and non-const */ ,
|
||||
class L , class D ,
|
||||
class MX /* Allow any management type */ >
|
||||
inline
|
||||
double norm2( const size_t n ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
comm::Machine machine )
|
||||
{
|
||||
return std::sqrt( dot( n , x , machine ) );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
class L ,
|
||||
class D ,
|
||||
class MX >
|
||||
void scale( const size_t n ,
|
||||
const ScalarA & alpha ,
|
||||
const View< ScalarX * , L , D , MX > & x )
|
||||
{
|
||||
Impl::Scale< ScalarA , ScalarX , L , D >( n , alpha , x );
|
||||
}
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
class L ,
|
||||
class D ,
|
||||
class MX >
|
||||
void fill( const size_t n ,
|
||||
const ScalarA & alpha ,
|
||||
const View< ScalarX * , L , D , MX > & x )
|
||||
{
|
||||
Impl::Fill< ScalarA , ScalarX , L , D >( n , alpha , x );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarY ,
|
||||
class L ,
|
||||
class D ,
|
||||
class MX ,
|
||||
class MY >
|
||||
void axpy( const size_t n ,
|
||||
const ScalarA & alpha ,
|
||||
const View< ScalarX *, L , D , MX > & x ,
|
||||
const View< ScalarY *, L , D , MY > & y )
|
||||
{
|
||||
Impl::AXPY< ScalarA, ScalarX, ScalarY , L , D >( n, alpha, x, y );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
class L ,
|
||||
class D ,
|
||||
class MX ,
|
||||
class MY >
|
||||
void xpby( const size_t n ,
|
||||
const View< ScalarX *, L , D , MX > & x ,
|
||||
const ScalarB & beta ,
|
||||
const View< ScalarY *, L , D , MY > & y )
|
||||
{
|
||||
Impl::XPBY< ScalarX, ScalarB, ScalarY , L , D >( n, x, beta, y );
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// w = alpha * x + beta * y
|
||||
|
||||
template< typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
typename ScalarW ,
|
||||
class L , class D ,
|
||||
class MX , class MY , class MW >
|
||||
void waxpby( const size_t n ,
|
||||
const ScalarA & alpha ,
|
||||
const View< ScalarX * , L , D , MX > & x ,
|
||||
const ScalarB & beta ,
|
||||
const View< ScalarY * , L , D , MY > & y ,
|
||||
const View< ScalarW * , L , D , MW > & w )
|
||||
{
|
||||
Impl::WAXPBY<ScalarA,ScalarX,ScalarB,ScalarY,ScalarW,L,D>
|
||||
( n , alpha , x , beta , y , w );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename Scalar , class L , class D >
|
||||
struct Dot
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< const Scalar*, L, D, MemoryUnmanaged > vector_const_type ;
|
||||
|
||||
const vector_const_type x ;
|
||||
const vector_const_type y ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename vector_const_type::execution_space execution_space ; // Manycore device
|
||||
typedef double value_type ; // Reduction value
|
||||
|
||||
template< class ArgX , class ArgY >
|
||||
inline
|
||||
Dot( const size_t n , const ArgX & arg_x , const ArgY & arg_y , double & result )
|
||||
: x( arg_x ), y( arg_y )
|
||||
{
|
||||
parallel_reduce( n , *this , result );
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i , value_type & update ) const
|
||||
{ update += x(i) * y(i); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & update ,
|
||||
const volatile value_type & source )
|
||||
{ update += source; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & update )
|
||||
{ update = 0 ; }
|
||||
}; // Dot
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class L , class D >
|
||||
struct Dot1
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< const Scalar*, L, D , MemoryUnmanaged > vector_const_type ;
|
||||
|
||||
const vector_const_type x ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename vector_const_type::execution_space execution_space ; // Manycore device
|
||||
typedef double value_type ; // Reduction value
|
||||
|
||||
template< class ArgX >
|
||||
inline
|
||||
Dot1( const size_t n , const ArgX & arg_x , double & result )
|
||||
: x( arg_x )
|
||||
{
|
||||
parallel_reduce( n , *this , result );
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i , value_type & update ) const
|
||||
{ update += x(i) * x(i) ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & update ,
|
||||
const volatile value_type & source )
|
||||
{ update += source ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & update )
|
||||
{ update = 0 ; }
|
||||
}; // Dot
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarY ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct WAXPBY
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
|
||||
typedef View< const ScalarY *, L , D , MemoryUnmanaged > ViewY ;
|
||||
|
||||
const ViewW w ;
|
||||
const ViewX x ;
|
||||
const ViewY y ;
|
||||
const ScalarA alpha ;
|
||||
const ScalarB beta ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType inode ) const
|
||||
{
|
||||
w(inode) = alpha * x(inode) + beta * y(inode);
|
||||
}
|
||||
|
||||
template< class ArgX , class ArgY , class ArgW >
|
||||
inline
|
||||
WAXPBY( const size_t n ,
|
||||
const ScalarA & arg_alpha ,
|
||||
const ArgX & arg_x ,
|
||||
const ScalarB & arg_beta ,
|
||||
const ArgY & arg_y ,
|
||||
const ArgW & arg_w )
|
||||
: w( arg_w ), x( arg_x ), y( arg_y )
|
||||
, alpha( arg_alpha ), beta( arg_beta )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
}; // WAXPBY
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename ScalarB ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct Scale
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
const ViewW w ;
|
||||
const ScalarB beta ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i ) const
|
||||
{ w(i) *= beta ; }
|
||||
|
||||
template< class ArgW >
|
||||
inline
|
||||
Scale( const size_t n , const ScalarB & arg_beta , const ArgW & arg_w )
|
||||
: w( arg_w )
|
||||
, beta( arg_beta )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
};
|
||||
|
||||
template < typename ScalarB ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct Fill
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
const ViewW w ;
|
||||
const ScalarB beta ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i ) const
|
||||
{ w(i) = beta ; }
|
||||
|
||||
template< class ArgW >
|
||||
inline
|
||||
Fill( const size_t n , const ScalarB & arg_beta , const ArgW & arg_w )
|
||||
: w( arg_w )
|
||||
, beta( arg_beta )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template < typename ScalarA ,
|
||||
typename ScalarX ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct AXPY
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
|
||||
|
||||
const ViewW w ;
|
||||
const ViewX x ;
|
||||
const ScalarA alpha ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i ) const
|
||||
{ w(i) += alpha * x(i); }
|
||||
|
||||
template< class ArgX , class ArgW >
|
||||
inline
|
||||
AXPY( const size_t n ,
|
||||
const ScalarA & arg_alpha ,
|
||||
const ArgX & arg_x ,
|
||||
const ArgW & arg_w )
|
||||
: w( arg_w ), x( arg_x )
|
||||
, alpha( arg_alpha )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
}; // AXPY
|
||||
|
||||
template< typename ScalarX ,
|
||||
typename ScalarB ,
|
||||
typename ScalarW ,
|
||||
class L , class D >
|
||||
struct XPBY
|
||||
{
|
||||
private:
|
||||
|
||||
typedef View< ScalarW *, L , D , MemoryUnmanaged > ViewW ;
|
||||
typedef View< const ScalarX *, L , D , MemoryUnmanaged > ViewX ;
|
||||
|
||||
const ViewW w ;
|
||||
const ViewX x ;
|
||||
const ScalarB beta ;
|
||||
|
||||
public:
|
||||
|
||||
typedef typename ViewW::execution_space execution_space ;
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const iType & i ) const
|
||||
{ w(i) = x(i) + beta * w(i); }
|
||||
|
||||
template< class ArgX , class ArgW >
|
||||
inline
|
||||
XPBY( const size_t n ,
|
||||
const ArgX & arg_x ,
|
||||
const ScalarB & arg_beta ,
|
||||
const ArgW & arg_w )
|
||||
: w( arg_w ), x( arg_x )
|
||||
, beta( arg_beta )
|
||||
{
|
||||
parallel_for( n , *this );
|
||||
}
|
||||
}; // XPBY
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef USESCASES_LINALG_BLAS_HPP */
|
||||
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
KOKKOS_PATH ?= ../..
|
||||
|
||||
MAKEFILE_PATH := $(abspath $(lastword $(MAKEFILE_LIST)))
|
||||
SRC_DIR := $(dir $(MAKEFILE_PATH))
|
||||
|
||||
SRC = $(wildcard $(SRC_DIR)/*.cpp)
|
||||
OBJ = $(SRC:$(SRC_DIR)/%.cpp=%.o)
|
||||
|
||||
#SRC = $(wildcard *.cpp)
|
||||
#OBJ = $(SRC:%.cpp=%.o)
|
||||
|
||||
default: build
|
||||
echo "Start Build"
|
||||
|
||||
# use installed Makefile.kokkos
|
||||
include $(KOKKOS_PATH)/Makefile.kokkos
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = $(NVCC_WRAPPER)
|
||||
CXXFLAGS = -I$(SRC_DIR) -I$(CUDA_PATH) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS = -L$(CUDA_PATH)/lib64 -lcusparse
|
||||
EXE = $(addsuffix .cuda, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "Cuda,OpenMP"
|
||||
#KOKKOS_ARCH = "SNB,Kepler35"
|
||||
else
|
||||
CXX = g++
|
||||
CXXFLAGS = -I$(SRC_DIR) -O3
|
||||
LINK = $(CXX)
|
||||
LINKFLAGS =
|
||||
EXE = $(addsuffix .host, $(shell basename $(SRC_DIR)))
|
||||
#KOKKOS_DEVICES = "OpenMP"
|
||||
#KOKKOS_ARCH = "SNB"
|
||||
endif
|
||||
|
||||
DEPFLAGS = -M
|
||||
|
||||
LIB =
|
||||
|
||||
|
||||
build: $(EXE)
|
||||
|
||||
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
|
||||
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
|
||||
|
||||
clean:
|
||||
rm -f *.a *.o *.cuda *.host
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:$(SRC_DIR)/%.cpp $(KOKKOS_CPP_DEPENDS)
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
@ -1,573 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef HYBRIDFEM_NONLINEAR_HPP
|
||||
#define HYBRIDFEM_NONLINEAR_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <SparseLinearSystem.hpp>
|
||||
#include <SparseLinearSystemFill.hpp>
|
||||
#include <NonlinearFunctors.hpp>
|
||||
|
||||
#include <FEMesh.hpp>
|
||||
#include <HexElement.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Nonlinear {
|
||||
|
||||
struct PerformanceData {
|
||||
double mesh_time ;
|
||||
double graph_time ;
|
||||
double elem_time ;
|
||||
double matrix_gather_fill_time ;
|
||||
double matrix_boundary_condition_time ;
|
||||
double cg_iteration_time ;
|
||||
size_t cg_iteration_count ;
|
||||
size_t newton_iteration_count ;
|
||||
double error_max ;
|
||||
|
||||
PerformanceData()
|
||||
: mesh_time(0)
|
||||
, graph_time(0)
|
||||
, elem_time(0)
|
||||
, matrix_gather_fill_time(0)
|
||||
, matrix_boundary_condition_time(0)
|
||||
, cg_iteration_time(0)
|
||||
, cg_iteration_count(0)
|
||||
, newton_iteration_count(0)
|
||||
, error_max(0)
|
||||
{}
|
||||
|
||||
void best( const PerformanceData & rhs )
|
||||
{
|
||||
mesh_time = std::min( mesh_time , rhs.mesh_time );
|
||||
graph_time = std::min( graph_time , rhs.graph_time );
|
||||
elem_time = std::min( elem_time , rhs.elem_time );
|
||||
matrix_gather_fill_time = std::min( matrix_gather_fill_time , rhs.matrix_gather_fill_time );
|
||||
matrix_boundary_condition_time = std::min( matrix_boundary_condition_time , rhs.matrix_boundary_condition_time );
|
||||
cg_iteration_time = std::min( cg_iteration_time , rhs.cg_iteration_time );
|
||||
cg_iteration_count = std::min( cg_iteration_count , rhs.cg_iteration_count );
|
||||
newton_iteration_count = std::min( newton_iteration_count , rhs.newton_iteration_count );
|
||||
error_max = std::min( error_max , rhs.error_max );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
class ManufacturedSolution {
|
||||
public:
|
||||
|
||||
// Manufactured solution for one dimensional nonlinear PDE
|
||||
//
|
||||
// -K T_zz + T^2 = 0 ; T(zmin) = T_zmin ; T(zmax) = T_zmax
|
||||
//
|
||||
// Has an analytic solution of the form:
|
||||
//
|
||||
// T(z) = ( a ( z - zmin ) + b )^(-2) where K = 1 / ( 6 a^2 )
|
||||
//
|
||||
// Given T_0 and T_L compute K for this analytic solution.
|
||||
//
|
||||
// Two analytic solutions:
|
||||
//
|
||||
// Solution with singularity:
|
||||
// , a( ( 1.0 / sqrt(T_zmax) + 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
// , b( -1.0 / sqrt(T_zmin) )
|
||||
//
|
||||
// Solution without singularity:
|
||||
// , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
// , b( 1.0 / sqrt(T_zmin) )
|
||||
|
||||
const double zmin ;
|
||||
const double zmax ;
|
||||
const double T_zmin ;
|
||||
const double T_zmax ;
|
||||
const double a ;
|
||||
const double b ;
|
||||
const double K ;
|
||||
|
||||
ManufacturedSolution( const double arg_zmin ,
|
||||
const double arg_zmax ,
|
||||
const double arg_T_zmin ,
|
||||
const double arg_T_zmax )
|
||||
: zmin( arg_zmin )
|
||||
, zmax( arg_zmax )
|
||||
, T_zmin( arg_T_zmin )
|
||||
, T_zmax( arg_T_zmax )
|
||||
, a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) )
|
||||
, b( 1.0 / sqrt(T_zmin) )
|
||||
, K( 1.0 / ( 6.0 * a * a ) )
|
||||
{}
|
||||
|
||||
double operator()( const double z ) const
|
||||
{
|
||||
const double tmp = a * ( z - zmin ) + b ;
|
||||
return 1.0 / ( tmp * tmp );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class FixtureType >
|
||||
PerformanceData run( const typename FixtureType::FEMeshType & mesh ,
|
||||
const int , // global_max_x ,
|
||||
const int , // global_max_y ,
|
||||
const int global_max_z ,
|
||||
const bool print_error )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef FixtureType fixture_type ;
|
||||
typedef typename fixture_type::execution_space execution_space;
|
||||
//typedef typename execution_space::size_type size_type ; // unused
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
typedef typename fixture_type::coordinate_scalar_type coordinate_scalar_type ;
|
||||
|
||||
enum { ElementNodeCount = fixture_type::element_node_count };
|
||||
|
||||
const comm::Machine machine = mesh.parallel_data_map.machine ;
|
||||
|
||||
const size_t element_count = mesh.elem_node_ids.dimension_0();
|
||||
|
||||
//------------------------------------
|
||||
// The amount of nonlinearity is proportional to the ratio
|
||||
// between T(zmax) and T(zmin). For the manufactured solution
|
||||
// 0 < T(zmin) and 0 < T(zmax)
|
||||
|
||||
const ManufacturedSolution
|
||||
exact_solution( /* zmin */ 0 ,
|
||||
/* zmax */ global_max_z ,
|
||||
/* T(zmin) */ 1 ,
|
||||
/* T(zmax) */ 20 );
|
||||
|
||||
//-----------------------------------
|
||||
// Convergence Criteria and perf data:
|
||||
|
||||
const size_t cg_iteration_limit = 200 ;
|
||||
const double cg_tolerance = 1e-14 ;
|
||||
|
||||
const size_t newton_iteration_limit = 150 ;
|
||||
const double newton_tolerance = 1e-14 ;
|
||||
|
||||
size_t cg_iteration_count_total = 0 ;
|
||||
double cg_iteration_time = 0 ;
|
||||
|
||||
size_t newton_iteration_count = 0 ;
|
||||
double residual_norm_init = 0 ;
|
||||
double residual_norm = 0 ;
|
||||
|
||||
PerformanceData perf_data ;
|
||||
|
||||
//------------------------------------
|
||||
// Sparse linear system types:
|
||||
|
||||
typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
|
||||
typedef Kokkos::CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
typedef typename matrix_type::graph_type matrix_graph_type ;
|
||||
typedef typename matrix_type::coefficients_type matrix_coefficients_type ;
|
||||
|
||||
typedef GraphFactory< matrix_graph_type , mesh_type > graph_factory ;
|
||||
|
||||
//------------------------------------
|
||||
// Problem setup types:
|
||||
|
||||
typedef ElementComputation < mesh_type , scalar_type > ElementFunctor ;
|
||||
typedef DirichletSolution < mesh_type , scalar_type > DirichletSolutionFunctor ;
|
||||
typedef DirichletResidual < mesh_type , scalar_type > DirichletResidualFunctor ;
|
||||
|
||||
typedef typename ElementFunctor::elem_matrices_type elem_matrices_type ;
|
||||
typedef typename ElementFunctor::elem_vectors_type elem_vectors_type ;
|
||||
|
||||
typedef GatherFill< matrix_type ,
|
||||
mesh_type ,
|
||||
elem_matrices_type ,
|
||||
elem_vectors_type > GatherFillFunctor ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
matrix_type jacobian ;
|
||||
vector_type residual ;
|
||||
vector_type delta ;
|
||||
vector_type nodal_solution ;
|
||||
|
||||
typename graph_factory::element_map_type element_map ;
|
||||
|
||||
//------------------------------------
|
||||
// Generate mesh and corresponding sparse matrix graph
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
//------------------------------------
|
||||
// Generate sparse matrix graph and element->graph map.
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
graph_factory::create( mesh , jacobian.graph , element_map );
|
||||
|
||||
execution_space::fence();
|
||||
|
||||
perf_data.graph_time = comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Allocate linear system coefficients and rhs:
|
||||
|
||||
const size_t local_owned_length = jacobian.graph.row_map.dimension_0() - 1 ;
|
||||
const size_t local_total_length = mesh.node_coords.dimension_0();
|
||||
|
||||
jacobian.coefficients =
|
||||
matrix_coefficients_type( "jacobian_coeff" , jacobian.graph.entries.dimension_0() );
|
||||
|
||||
// Nonlinear residual for owned nodes:
|
||||
residual = vector_type( "residual" , local_owned_length );
|
||||
|
||||
// Nonlinear solution for owned and ghosted nodes:
|
||||
nodal_solution = vector_type( "solution" , local_total_length );
|
||||
|
||||
// Nonlinear solution update for owned nodes:
|
||||
delta = vector_type( "delta" , local_owned_length );
|
||||
|
||||
//------------------------------------
|
||||
// Allocation of arrays to fill the linear system
|
||||
|
||||
elem_matrices_type elem_matrices ; // Jacobian matrices
|
||||
elem_vectors_type elem_vectors ; // Residual vectors
|
||||
|
||||
if ( element_count ) {
|
||||
elem_matrices = elem_matrices_type( std::string("elem_matrices"), element_count );
|
||||
elem_vectors = elem_vectors_type( std::string("elem_vectors"), element_count );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// For boundary condition set the correct values in the solution vector
|
||||
// The 'zmin' face is assigned to 'T_zmin'.
|
||||
// The 'zmax' face is assigned to 'T_zmax'.
|
||||
// The resulting solution is one dimensional along the 'Z' axis.
|
||||
|
||||
DirichletSolutionFunctor::apply( nodal_solution , mesh ,
|
||||
exact_solution.zmin ,
|
||||
exact_solution.zmax ,
|
||||
exact_solution.T_zmin ,
|
||||
exact_solution.T_zmax );
|
||||
|
||||
for(;;) { // Nonlinear loop
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
{ //------------------------------------
|
||||
// Import off-processor nodal solution values
|
||||
// for residual and jacobian computations
|
||||
|
||||
Kokkos::AsyncExchange< typename vector_type::value_type , execution_space ,
|
||||
Kokkos::ParallelDataMap >
|
||||
exchange( mesh.parallel_data_map , 1 );
|
||||
|
||||
Kokkos::PackArray< vector_type >
|
||||
::pack( exchange.buffer() ,
|
||||
mesh.parallel_data_map.count_interior ,
|
||||
mesh.parallel_data_map.count_send ,
|
||||
nodal_solution );
|
||||
|
||||
exchange.setup();
|
||||
|
||||
exchange.send_receive();
|
||||
|
||||
Kokkos::UnpackArray< vector_type >
|
||||
::unpack( nodal_solution , exchange.buffer() ,
|
||||
mesh.parallel_data_map.count_owned ,
|
||||
mesh.parallel_data_map.count_receive );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//------------------------------------
|
||||
// Compute element matrices and vectors:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
ElementFunctor( mesh ,
|
||||
elem_matrices ,
|
||||
elem_vectors ,
|
||||
nodal_solution ,
|
||||
exact_solution.K );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.elem_time += comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Fill linear system coefficients:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
fill( jacobian.coefficients.dimension_0(), 0 , jacobian.coefficients );
|
||||
fill( residual.dimension_0() , 0 , residual );
|
||||
|
||||
GatherFillFunctor::apply( jacobian ,
|
||||
residual ,
|
||||
mesh ,
|
||||
element_map ,
|
||||
elem_matrices ,
|
||||
elem_vectors );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.matrix_gather_fill_time += comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
// Apply boundary conditions:
|
||||
|
||||
wall_clock.reset();
|
||||
|
||||
// Updates jacobian matrix to 1 on the diagonal, zero elsewhere,
|
||||
// and 0 in the residual due to the solution vector having the correct value
|
||||
DirichletResidualFunctor::apply( jacobian, residual, mesh ,
|
||||
exact_solution.zmin ,
|
||||
exact_solution.zmax );
|
||||
|
||||
execution_space::fence();
|
||||
perf_data.matrix_boundary_condition_time +=
|
||||
comm::max( machine , wall_clock.seconds() );
|
||||
|
||||
//------------------------------------
|
||||
// Has the residual converged?
|
||||
|
||||
residual_norm = norm2( mesh.parallel_data_map.count_owned,
|
||||
residual,
|
||||
mesh.parallel_data_map.machine );
|
||||
|
||||
if ( 0 == newton_iteration_count ) {
|
||||
residual_norm_init = residual_norm ;
|
||||
}
|
||||
|
||||
if ( residual_norm / residual_norm_init < newton_tolerance ) {
|
||||
break ;
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
// Solve linear sytem
|
||||
|
||||
size_t cg_iteration_count = 0 ;
|
||||
double cg_residual_norm = 0 ;
|
||||
|
||||
cgsolve( mesh.parallel_data_map ,
|
||||
jacobian , residual , delta ,
|
||||
cg_iteration_count ,
|
||||
cg_residual_norm ,
|
||||
cg_iteration_time ,
|
||||
cg_iteration_limit , cg_tolerance ) ;
|
||||
|
||||
perf_data.cg_iteration_time += cg_iteration_time ;
|
||||
cg_iteration_count_total += cg_iteration_count ;
|
||||
|
||||
// Update non-linear solution with delta...
|
||||
// delta is : - Dx = [Jacobian]^1 * Residual which is the negative update
|
||||
// LaTeX:
|
||||
// \vec {x}_{n+1} = \vec {x}_{n} - ( - \Delta \vec{x}_{n} )
|
||||
// text:
|
||||
// x[n+1] = x[n] + Dx
|
||||
|
||||
axpy( mesh.parallel_data_map.count_owned ,
|
||||
-1.0, delta, nodal_solution);
|
||||
|
||||
++newton_iteration_count ;
|
||||
|
||||
if ( newton_iteration_limit < newton_iteration_count ) {
|
||||
break ;
|
||||
}
|
||||
};
|
||||
|
||||
if ( newton_iteration_count ) {
|
||||
perf_data.elem_time /= newton_iteration_count ;
|
||||
perf_data.matrix_gather_fill_time /= newton_iteration_count ;
|
||||
perf_data.matrix_boundary_condition_time /= newton_iteration_count ;
|
||||
}
|
||||
|
||||
if ( cg_iteration_count_total ) {
|
||||
perf_data.cg_iteration_time /= cg_iteration_count_total ;
|
||||
}
|
||||
|
||||
perf_data.newton_iteration_count = newton_iteration_count ;
|
||||
perf_data.cg_iteration_count = cg_iteration_count_total ;
|
||||
|
||||
//------------------------------------
|
||||
|
||||
{
|
||||
// For extracting the nodal solution and its coordinates:
|
||||
|
||||
typename mesh_type::node_coords_type::HostMirror node_coords_host =
|
||||
Kokkos::create_mirror( mesh.node_coords );
|
||||
|
||||
typename vector_type::HostMirror nodal_solution_host =
|
||||
Kokkos::create_mirror( nodal_solution );
|
||||
|
||||
Kokkos::deep_copy( node_coords_host , mesh.node_coords );
|
||||
Kokkos::deep_copy( nodal_solution_host , nodal_solution );
|
||||
|
||||
double tmp = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < mesh.parallel_data_map.count_owned ; ++i ) {
|
||||
const coordinate_scalar_type x = node_coords_host(i,0);
|
||||
const coordinate_scalar_type y = node_coords_host(i,1);
|
||||
const coordinate_scalar_type z = node_coords_host(i,2);
|
||||
|
||||
const double Tx = exact_solution(z);
|
||||
const double Ts = nodal_solution_host(i);
|
||||
const double Te = std::abs( Tx - Ts ) / std::abs( Tx );
|
||||
|
||||
tmp = std::max( tmp , Te );
|
||||
|
||||
if ( print_error && 0.02 < Te ) {
|
||||
std::cout << " node( " << x << " " << y << " " << z << " ) = "
|
||||
<< Ts << " != exact_solution " << Tx
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
perf_data.error_max = comm::max( machine , tmp );
|
||||
}
|
||||
|
||||
return perf_data ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename Scalar , class Device , class FixtureElement >
|
||||
void driver( const char * const label ,
|
||||
comm::Machine machine ,
|
||||
const int gang_count ,
|
||||
const int elem_count_beg ,
|
||||
const int elem_count_end ,
|
||||
const int runs )
|
||||
{
|
||||
typedef Scalar scalar_type ;
|
||||
typedef Device execution_space ;
|
||||
typedef double coordinate_scalar_type ;
|
||||
typedef FixtureElement fixture_element_type ;
|
||||
|
||||
typedef BoxMeshFixture< coordinate_scalar_type ,
|
||||
execution_space ,
|
||||
fixture_element_type > fixture_type ;
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
|
||||
const size_t proc_count = comm::size( machine );
|
||||
const size_t proc_rank = comm::rank( machine );
|
||||
|
||||
if ( elem_count_beg == 0 || elem_count_end == 0 || runs == 0 ) return ;
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
std::cout << std::endl ;
|
||||
std::cout << "\"Kokkos::HybridFE::Nonlinear " << label << "\"" << std::endl;
|
||||
std::cout
|
||||
<< "\"Size\" , \"Size\" , \"Graphing\" , \"Element\" , \"Fill\" , \"Boundary\" , \"CG-Iter\" , \"CG-Iter\" , \"Newton-Iter\" , \"Max-node-error\""
|
||||
<< std::endl
|
||||
<< "\"elems\" , \"nodes\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"millisec\" , \"total-count\" , \"total-count\" , \"ratio\""
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
const bool print_sample = 0 ;
|
||||
const double x_curve = 1.0 ;
|
||||
const double y_curve = 1.0 ;
|
||||
const double z_curve = 0.8 ;
|
||||
|
||||
for(int i = elem_count_beg ; i < elem_count_end ; i *= 2 )
|
||||
{
|
||||
const int ix = std::max( 1 , (int) cbrt( ((double) i) / 2.0 ) );
|
||||
const int iy = 1 + ix ;
|
||||
const int iz = 2 * iy ;
|
||||
const int global_elem_count = ix * iy * iz ;
|
||||
const int global_node_count = ( 2 * ix + 1 ) *
|
||||
( 2 * iy + 1 ) *
|
||||
( 2 * iz + 1 );
|
||||
|
||||
mesh_type mesh =
|
||||
fixture_type::create( proc_count , proc_rank , gang_count ,
|
||||
ix , iy , iz ,
|
||||
x_curve , y_curve , z_curve );
|
||||
|
||||
mesh.parallel_data_map.machine = machine ;
|
||||
|
||||
|
||||
PerformanceData perf_data , perf_best ;
|
||||
|
||||
for(int j = 0; j < runs; j++){
|
||||
|
||||
perf_data = run<scalar_type,fixture_type>(mesh,ix,iy,iz, print_sample );
|
||||
|
||||
if( j == 0 ) {
|
||||
perf_best = perf_data ;
|
||||
}
|
||||
else {
|
||||
perf_best.best( perf_data );
|
||||
}
|
||||
}
|
||||
|
||||
if ( comm::rank( machine ) == 0 ) {
|
||||
|
||||
std::cout << std::setw(8) << global_elem_count << " , "
|
||||
<< std::setw(8) << global_node_count << " , "
|
||||
<< std::setw(10) << perf_best.graph_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.elem_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.matrix_gather_fill_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.matrix_boundary_condition_time * 1000 << " , "
|
||||
<< std::setw(10) << perf_best.cg_iteration_time * 1000 << " , "
|
||||
<< std::setw(7) << perf_best.cg_iteration_count << " , "
|
||||
<< std::setw(3) << perf_best.newton_iteration_count << " , "
|
||||
<< std::setw(10) << perf_best.error_max
|
||||
<< std::endl ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Nonlinear */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
|
||||
#endif /* #ifndef HYBRIDFEM_IMPLICIT_HPP */
|
||||
|
||||
@ -1,390 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <HexElement.hpp>
|
||||
#include <FEMesh.hpp>
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Nonlinear {
|
||||
|
||||
template< class MeshType , typename ScalarType > struct ElementComputation ;
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<>
|
||||
struct ElementComputation< FEMesh< double , 27 , Kokkos::Cuda > , double >
|
||||
{
|
||||
typedef Kokkos::Cuda execution_space ;
|
||||
|
||||
static const unsigned ElementNodeCount = 27 ;
|
||||
|
||||
typedef HexElement_Data< ElementNodeCount > element_data_type ;
|
||||
typedef FEMesh< double , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
static const unsigned SpatialDim = element_data_type::spatial_dimension ;
|
||||
static const unsigned FunctionCount = element_data_type::function_count ;
|
||||
static const unsigned IntegrationCount = element_data_type::integration_count ;
|
||||
static const unsigned TensorDim = SpatialDim * SpatialDim ;
|
||||
|
||||
typedef Kokkos::View< double[][FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
|
||||
typedef Kokkos::View< double[][FunctionCount] , execution_space > elem_vectors_type ;
|
||||
typedef Kokkos::View< double[] , execution_space > value_vector_type ;
|
||||
|
||||
private:
|
||||
|
||||
const element_data_type elem_data ;
|
||||
const typename mesh_type::elem_node_ids_type elem_node_ids ;
|
||||
const typename mesh_type::node_coords_type node_coords ;
|
||||
const value_vector_type nodal_values ;
|
||||
const elem_matrices_type element_matrices ;
|
||||
const elem_vectors_type element_vectors ;
|
||||
const float coeff_K ;
|
||||
const unsigned elem_count ;
|
||||
unsigned invJacIndex[9][4] ;
|
||||
|
||||
static const unsigned j11 = 0 , j12 = 1 , j13 = 2 ,
|
||||
j21 = 3 , j22 = 4 , j23 = 5 ,
|
||||
j31 = 6 , j32 = 7 , j33 = 8 ;
|
||||
|
||||
// Can only handle up to 16 warps:
|
||||
static const unsigned BlockDimX = 32 ;
|
||||
static const unsigned BlockDimY = 7 ;
|
||||
|
||||
struct WorkSpace {
|
||||
double sum[ BlockDimY ][ BlockDimX ];
|
||||
|
||||
double value_at_integ[ IntegrationCount ];
|
||||
double gradx_at_integ[ IntegrationCount ];
|
||||
double grady_at_integ[ IntegrationCount ];
|
||||
double gradz_at_integ[ IntegrationCount ];
|
||||
|
||||
float spaceJac[ BlockDimY ][ 9 ];
|
||||
float spaceInvJac[ BlockDimY ][ 9 ];
|
||||
|
||||
float detJweight[ IntegrationCount ];
|
||||
|
||||
float dpsidx[ FunctionCount ][ IntegrationCount ];
|
||||
float dpsidy[ FunctionCount ][ IntegrationCount ];
|
||||
float dpsidz[ FunctionCount ][ IntegrationCount ];
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
ElementComputation ( const mesh_type & arg_mesh ,
|
||||
const elem_matrices_type & arg_element_matrices ,
|
||||
const elem_vectors_type & arg_element_vectors ,
|
||||
const value_vector_type & arg_nodal_values ,
|
||||
const float arg_coeff_K )
|
||||
: elem_data()
|
||||
, elem_node_ids( arg_mesh.elem_node_ids )
|
||||
, node_coords( arg_mesh.node_coords )
|
||||
, nodal_values( arg_nodal_values )
|
||||
, element_matrices( arg_element_matrices )
|
||||
, element_vectors( arg_element_vectors )
|
||||
, coeff_K( arg_coeff_K )
|
||||
, elem_count( arg_mesh.elem_node_ids.dimension_0() )
|
||||
{
|
||||
const unsigned jInvJ[9][4] =
|
||||
{ { j22 , j33 , j23 , j32 } ,
|
||||
{ j13 , j32 , j12 , j33 } ,
|
||||
{ j12 , j23 , j13 , j22 } ,
|
||||
|
||||
{ j23 , j31 , j21 , j33 } ,
|
||||
{ j11 , j33 , j13 , j31 } ,
|
||||
{ j13 , j21 , j11 , j23 } ,
|
||||
|
||||
{ j21 , j32 , j22 , j31 } ,
|
||||
{ j12 , j31 , j11 , j32 } ,
|
||||
{ j11 , j22 , j12 , j21 } };
|
||||
|
||||
for ( unsigned i = 0 ; i < 9 ; ++i ) {
|
||||
for ( unsigned j = 0 ; j < 4 ; ++j ) {
|
||||
invJacIndex[i][j] = jInvJ[i][j] ;
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned shmem = sizeof(WorkSpace);
|
||||
const unsigned grid_max = 65535 ;
|
||||
const unsigned grid_count = std::min( grid_max , elem_count );
|
||||
|
||||
// For compute capability 2.x up to 1024 threads per block
|
||||
const dim3 block( BlockDimX , BlockDimY , 1 );
|
||||
const dim3 grid( grid_count , 1 , 1 );
|
||||
|
||||
Kokkos::Impl::CudaParallelLaunch< ElementComputation >( *this , grid , block , shmem );
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
//------------------------------------
|
||||
// Sum among the threadIdx.x
|
||||
|
||||
template< typename Type >
|
||||
__device__ inline static
|
||||
void sum_x( Type & result , const double value )
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
volatile double * const base_sum =
|
||||
& work_data->sum[ threadIdx.y ][ threadIdx.x ] ;
|
||||
|
||||
base_sum[ 0] = value ;
|
||||
|
||||
if ( threadIdx.x < 16 ) {
|
||||
base_sum[0] += base_sum[16];
|
||||
base_sum[0] += base_sum[ 8];
|
||||
base_sum[0] += base_sum[ 4];
|
||||
base_sum[0] += base_sum[ 2];
|
||||
base_sum[0] += base_sum[ 1];
|
||||
}
|
||||
|
||||
if ( 0 == threadIdx.x ) {
|
||||
result = base_sum[0] ;
|
||||
}
|
||||
}
|
||||
|
||||
__device__ inline static
|
||||
void sum_x_clear()
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
work_data->sum[ threadIdx.y ][ threadIdx.x ] = 0 ;
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
//------------------------------------
|
||||
|
||||
__device__ inline
|
||||
void evaluateFunctions( const unsigned ielem ) const
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
// Each warp (threadIdx.y) computes an integration point
|
||||
// Each thread is responsible for a node / function.
|
||||
|
||||
const unsigned iFunc = threadIdx.x ;
|
||||
const bool hasFunc = iFunc < FunctionCount ;
|
||||
|
||||
//------------------------------------
|
||||
// Each warp gathers a different variable into 'elem_mat' shared memory.
|
||||
|
||||
if ( hasFunc ) {
|
||||
|
||||
const unsigned node = elem_node_ids( ielem , iFunc );
|
||||
|
||||
for ( unsigned iy = threadIdx.y ; iy < 4 ; iy += blockDim.y ) {
|
||||
switch( iy ) {
|
||||
case 0 : work_data->sum[0][iFunc] = node_coords(node,0); break ;
|
||||
case 1 : work_data->sum[1][iFunc] = node_coords(node,1); break ;
|
||||
case 2 : work_data->sum[2][iFunc] = node_coords(node,2); break ;
|
||||
case 3 : work_data->sum[3][iFunc] = nodal_values(node); break ;
|
||||
default: break ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); // Wait for all warps to finish gathering
|
||||
|
||||
// now get local 'const' copies in register space:
|
||||
|
||||
const double x = work_data->sum[0][ iFunc ];
|
||||
const double y = work_data->sum[1][ iFunc ];
|
||||
const double z = work_data->sum[2][ iFunc ];
|
||||
const double dof_val = work_data->sum[3][ iFunc ];
|
||||
|
||||
__syncthreads(); // Wait for all warps to finish extracting
|
||||
|
||||
sum_x_clear(); // Make sure summation scratch is zero
|
||||
|
||||
//------------------------------------
|
||||
// Each warp is now on its own computing an integration point
|
||||
// so no further explicit synchronizations are required.
|
||||
|
||||
if ( hasFunc ) {
|
||||
|
||||
float * const J = work_data->spaceJac[ threadIdx.y ];
|
||||
float * const invJ = work_data->spaceInvJac[ threadIdx.y ];
|
||||
|
||||
for ( unsigned iInt = threadIdx.y ;
|
||||
iInt < IntegrationCount ; iInt += blockDim.y ) {
|
||||
|
||||
const float val = elem_data.values[iInt][iFunc] ;
|
||||
const float gx = elem_data.gradients[iInt][0][iFunc] ;
|
||||
const float gy = elem_data.gradients[iInt][1][iFunc] ;
|
||||
const float gz = elem_data.gradients[iInt][2][iFunc] ;
|
||||
|
||||
sum_x( J[j11], gx * x );
|
||||
sum_x( J[j12], gx * y );
|
||||
sum_x( J[j13], gx * z );
|
||||
|
||||
sum_x( J[j21], gy * x );
|
||||
sum_x( J[j22], gy * y );
|
||||
sum_x( J[j23], gy * z );
|
||||
|
||||
sum_x( J[j31], gz * x );
|
||||
sum_x( J[j32], gz * y );
|
||||
sum_x( J[j33], gz * z );
|
||||
|
||||
// Inverse jacobian, only enough parallel work for 9 threads in the warp
|
||||
|
||||
if ( iFunc < TensorDim ) {
|
||||
|
||||
invJ[ iFunc ] =
|
||||
J[ invJacIndex[iFunc][0] ] * J[ invJacIndex[iFunc][1] ] -
|
||||
J[ invJacIndex[iFunc][2] ] * J[ invJacIndex[iFunc][3] ] ;
|
||||
|
||||
// Let all threads in the warp compute determinant into a register
|
||||
|
||||
const float detJ = J[j11] * invJ[j11] +
|
||||
J[j21] * invJ[j12] +
|
||||
J[j31] * invJ[j13] ;
|
||||
|
||||
invJ[ iFunc ] /= detJ ;
|
||||
|
||||
if ( 0 == iFunc ) {
|
||||
work_data->detJweight[ iInt ] = detJ * elem_data.weights[ iInt ] ;
|
||||
}
|
||||
}
|
||||
|
||||
// Transform bases gradients and compute value and gradient
|
||||
|
||||
const float dx = gx * invJ[j11] + gy * invJ[j12] + gz * invJ[j13];
|
||||
const float dy = gx * invJ[j21] + gy * invJ[j22] + gz * invJ[j23];
|
||||
const float dz = gx * invJ[j31] + gy * invJ[j32] + gz * invJ[j33];
|
||||
|
||||
work_data->dpsidx[iFunc][iInt] = dx ;
|
||||
work_data->dpsidy[iFunc][iInt] = dy ;
|
||||
work_data->dpsidz[iFunc][iInt] = dz ;
|
||||
|
||||
sum_x( work_data->gradx_at_integ[iInt] , dof_val * dx );
|
||||
sum_x( work_data->grady_at_integ[iInt] , dof_val * dy );
|
||||
sum_x( work_data->gradz_at_integ[iInt] , dof_val * dz );
|
||||
sum_x( work_data->value_at_integ[iInt] , dof_val * val );
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); // All shared data must be populated at return.
|
||||
}
|
||||
|
||||
__device__ inline
|
||||
void contributeResidualJacobian( const unsigned ielem ) const
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
sum_x_clear(); // Make sure summation scratch is zero
|
||||
|
||||
// $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
|
||||
// $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
|
||||
|
||||
const unsigned iInt = threadIdx.x ;
|
||||
|
||||
if ( iInt < IntegrationCount ) {
|
||||
|
||||
const double value_at_integ = work_data->value_at_integ[ iInt ] ;
|
||||
const double gradx_at_integ = work_data->gradx_at_integ[ iInt ] ;
|
||||
const double grady_at_integ = work_data->grady_at_integ[ iInt ] ;
|
||||
const double gradz_at_integ = work_data->gradz_at_integ[ iInt ] ;
|
||||
|
||||
const float detJweight = work_data->detJweight[ iInt ] ;
|
||||
const float coeff_K_detJweight = coeff_K * detJweight ;
|
||||
|
||||
for ( unsigned iRow = threadIdx.y ;
|
||||
iRow < FunctionCount ; iRow += blockDim.y ) {
|
||||
|
||||
const float value_row = elem_data.values[ iInt ][ iRow ] * detJweight ;
|
||||
const float dpsidx_row = work_data->dpsidx[ iRow ][ iInt ] * coeff_K_detJweight ;
|
||||
const float dpsidy_row = work_data->dpsidy[ iRow ][ iInt ] * coeff_K_detJweight ;
|
||||
const float dpsidz_row = work_data->dpsidz[ iRow ][ iInt ] * coeff_K_detJweight ;
|
||||
|
||||
const double res_del = dpsidx_row * gradx_at_integ +
|
||||
dpsidy_row * grady_at_integ +
|
||||
dpsidz_row * gradz_at_integ ;
|
||||
|
||||
const double res_val = value_at_integ * value_at_integ * value_row ;
|
||||
const double jac_val_row = 2 * value_at_integ * value_row ;
|
||||
|
||||
sum_x( element_vectors( ielem , iRow ) , res_del + res_val );
|
||||
|
||||
for ( unsigned iCol = 0 ; iCol < FunctionCount ; ++iCol ) {
|
||||
|
||||
const float jac_del =
|
||||
dpsidx_row * work_data->dpsidx[iCol][iInt] +
|
||||
dpsidy_row * work_data->dpsidy[iCol][iInt] +
|
||||
dpsidz_row * work_data->dpsidz[iCol][iInt] ;
|
||||
|
||||
const double jac_val =
|
||||
jac_val_row * elem_data.values[ iInt ][ iCol ] ;
|
||||
|
||||
sum_x( element_matrices( ielem , iRow , iCol ) , jac_del + jac_val );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads(); // All warps finish before refilling shared data
|
||||
}
|
||||
|
||||
__device__ inline
|
||||
void operator()(void) const
|
||||
{
|
||||
extern __shared__ WorkSpace work_data[] ;
|
||||
|
||||
for ( unsigned ielem = blockIdx.x ; ielem < elem_count ; ielem += gridDim.x ) {
|
||||
|
||||
evaluateFunctions( ielem );
|
||||
|
||||
contributeResidualJacobian( ielem );
|
||||
}
|
||||
}
|
||||
|
||||
}; /* ElementComputation */
|
||||
|
||||
} /* namespace Nonlinear */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
@ -1,482 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_NONLINEARFUNCTORS_HPP
|
||||
#define KOKKOS_NONLINEARFUNCTORS_HPP
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Nonlinear {
|
||||
|
||||
template< class MeshType , typename ScalarType > struct ElementComputation ;
|
||||
template< class MeshType , typename ScalarType > struct DirichletSolution ;
|
||||
template< class MeshType , typename ScalarType > struct DirichletResidual ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* A Cuda-specific specialization for the element computation functor. */
|
||||
#if defined( __CUDACC__ )
|
||||
#include <NonlinearElement_Cuda.hpp>
|
||||
#endif
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
namespace Nonlinear {
|
||||
|
||||
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
|
||||
typename ScalarType >
|
||||
struct ElementComputation<
|
||||
FEMesh< ScalarCoordType , ElemNode , DeviceType > , ScalarType >
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef ScalarType scalar_type ;
|
||||
|
||||
static const unsigned ElementNodeCount = ElemNode ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
typedef HexElement_Data< ElementNodeCount > element_data_type ;
|
||||
|
||||
static const unsigned SpatialDim = element_data_type::spatial_dimension ;
|
||||
static const unsigned FunctionCount = element_data_type::function_count ;
|
||||
static const unsigned IntegrationCount = element_data_type::integration_count ;
|
||||
static const unsigned TensorDim = SpatialDim * SpatialDim ;
|
||||
|
||||
typedef Kokkos::View< scalar_type[][FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
|
||||
typedef Kokkos::View< scalar_type[][FunctionCount] , execution_space > elem_vectors_type ;
|
||||
typedef Kokkos::View< scalar_type[] , execution_space > value_vector_type ;
|
||||
|
||||
|
||||
private:
|
||||
|
||||
const element_data_type elem_data ;
|
||||
typename mesh_type::elem_node_ids_type elem_node_ids ;
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
value_vector_type nodal_values ;
|
||||
elem_matrices_type element_matrices ;
|
||||
elem_vectors_type element_vectors ;
|
||||
scalar_type coeff_K ;
|
||||
|
||||
public:
|
||||
|
||||
ElementComputation( const mesh_type & arg_mesh ,
|
||||
const elem_matrices_type & arg_element_matrices ,
|
||||
const elem_vectors_type & arg_element_vectors ,
|
||||
const value_vector_type & arg_nodal_values ,
|
||||
const scalar_type arg_coeff_K )
|
||||
: elem_data()
|
||||
, elem_node_ids( arg_mesh.elem_node_ids )
|
||||
, node_coords( arg_mesh.node_coords )
|
||||
, nodal_values( arg_nodal_values )
|
||||
, element_matrices( arg_element_matrices )
|
||||
, element_vectors( arg_element_vectors )
|
||||
, coeff_K( arg_coeff_K )
|
||||
{
|
||||
const size_t elem_count = arg_mesh.elem_node_ids.dimension_0();
|
||||
|
||||
parallel_for( elem_count , *this );
|
||||
}
|
||||
|
||||
//------------------------------------
|
||||
|
||||
static const unsigned FLOPS_transform_gradients =
|
||||
/* Jacobian */ FunctionCount * TensorDim * 2 +
|
||||
/* Inverse jacobian */ TensorDim * 6 + 6 +
|
||||
/* Gradient transform */ FunctionCount * 15 ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
float transform_gradients(
|
||||
const float grad[][ FunctionCount ] , // Gradient of bases master element
|
||||
const double x[] ,
|
||||
const double y[] ,
|
||||
const double z[] ,
|
||||
float dpsidx[] ,
|
||||
float dpsidy[] ,
|
||||
float dpsidz[] ) const
|
||||
{
|
||||
enum { j11 = 0 , j12 = 1 , j13 = 2 ,
|
||||
j21 = 3 , j22 = 4 , j23 = 5 ,
|
||||
j31 = 6 , j32 = 7 , j33 = 8 };
|
||||
|
||||
// Jacobian accumulation:
|
||||
|
||||
double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
for( unsigned i = 0; i < FunctionCount ; ++i ) {
|
||||
const double x1 = x[i] ;
|
||||
const double x2 = y[i] ;
|
||||
const double x3 = z[i] ;
|
||||
|
||||
const float g1 = grad[0][i] ;
|
||||
const float g2 = grad[1][i] ;
|
||||
const float g3 = grad[2][i] ;
|
||||
|
||||
J[j11] += g1 * x1 ;
|
||||
J[j12] += g1 * x2 ;
|
||||
J[j13] += g1 * x3 ;
|
||||
|
||||
J[j21] += g2 * x1 ;
|
||||
J[j22] += g2 * x2 ;
|
||||
J[j23] += g2 * x3 ;
|
||||
|
||||
J[j31] += g3 * x1 ;
|
||||
J[j32] += g3 * x2 ;
|
||||
J[j33] += g3 * x3 ;
|
||||
}
|
||||
|
||||
// Inverse jacobian:
|
||||
|
||||
float invJ[ TensorDim ] = {
|
||||
static_cast<float>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
|
||||
static_cast<float>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
|
||||
static_cast<float>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
|
||||
|
||||
static_cast<float>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
|
||||
static_cast<float>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
|
||||
static_cast<float>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
|
||||
|
||||
static_cast<float>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
|
||||
static_cast<float>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
|
||||
static_cast<float>( J[j11] * J[j22] - J[j12] * J[j21] ) };
|
||||
|
||||
const float detJ = J[j11] * invJ[j11] +
|
||||
J[j21] * invJ[j12] +
|
||||
J[j31] * invJ[j13] ;
|
||||
|
||||
const float detJinv = 1.0 / detJ ;
|
||||
|
||||
for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
|
||||
|
||||
// Transform gradients:
|
||||
|
||||
for( unsigned i = 0; i < FunctionCount ; ++i ) {
|
||||
const float g0 = grad[0][i];
|
||||
const float g1 = grad[1][i];
|
||||
const float g2 = grad[2][i];
|
||||
|
||||
dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
|
||||
dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
|
||||
dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
|
||||
}
|
||||
|
||||
return detJ ;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void contributeResidualJacobian(
|
||||
const float coeff_k ,
|
||||
const double dof_values[] ,
|
||||
const float dpsidx[] ,
|
||||
const float dpsidy[] ,
|
||||
const float dpsidz[] ,
|
||||
const float detJ ,
|
||||
const float integ_weight ,
|
||||
const float bases_vals[] ,
|
||||
double elem_res[] ,
|
||||
double elem_mat[][ FunctionCount ] ) const
|
||||
{
|
||||
double value_at_pt = 0 ;
|
||||
double gradx_at_pt = 0 ;
|
||||
double grady_at_pt = 0 ;
|
||||
double gradz_at_pt = 0 ;
|
||||
|
||||
for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
|
||||
value_at_pt += dof_values[m] * bases_vals[m] ;
|
||||
gradx_at_pt += dof_values[m] * dpsidx[m] ;
|
||||
grady_at_pt += dof_values[m] * dpsidy[m] ;
|
||||
gradz_at_pt += dof_values[m] * dpsidz[m] ;
|
||||
}
|
||||
|
||||
const scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
|
||||
const double res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
|
||||
const double mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
|
||||
|
||||
// $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
|
||||
// $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
|
||||
|
||||
for ( unsigned m = 0; m < FunctionCount; m++) {
|
||||
double * const mat = elem_mat[m] ;
|
||||
const float bases_val_m = bases_vals[m];
|
||||
const float dpsidx_m = dpsidx[m] ;
|
||||
const float dpsidy_m = dpsidy[m] ;
|
||||
const float dpsidz_m = dpsidz[m] ;
|
||||
|
||||
elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
|
||||
dpsidy_m * grady_at_pt +
|
||||
dpsidz_m * gradz_at_pt ) +
|
||||
res_val * bases_val_m ;
|
||||
|
||||
for( unsigned n = 0; n < FunctionCount; n++) {
|
||||
|
||||
mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
|
||||
dpsidy_m * dpsidy[n] +
|
||||
dpsidz_m * dpsidz[n] ) +
|
||||
mat_val * bases_val_m * bases_vals[n];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const unsigned ielem ) const
|
||||
{
|
||||
// Gather nodal coordinates and solution vector:
|
||||
|
||||
double x[ FunctionCount ] ;
|
||||
double y[ FunctionCount ] ;
|
||||
double z[ FunctionCount ] ;
|
||||
double val[ FunctionCount ] ;
|
||||
|
||||
for ( unsigned i = 0 ; i < ElementNodeCount ; ++i ) {
|
||||
const unsigned node_index = elem_node_ids( ielem , i );
|
||||
|
||||
x[i] = node_coords( node_index , 0 );
|
||||
y[i] = node_coords( node_index , 1 );
|
||||
z[i] = node_coords( node_index , 2 );
|
||||
|
||||
val[i] = nodal_values( node_index );
|
||||
}
|
||||
|
||||
double elem_vec[ FunctionCount ] ;
|
||||
double elem_mat[ FunctionCount ][ FunctionCount ] ;
|
||||
|
||||
for( unsigned i = 0; i < FunctionCount ; i++ ) {
|
||||
elem_vec[i] = 0 ;
|
||||
for( unsigned j = 0; j < FunctionCount ; j++){
|
||||
elem_mat[i][j] = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
|
||||
float dpsidx[ FunctionCount ] ;
|
||||
float dpsidy[ FunctionCount ] ;
|
||||
float dpsidz[ FunctionCount ] ;
|
||||
|
||||
const float detJ =
|
||||
transform_gradients( elem_data.gradients[i] , x , y , z ,
|
||||
dpsidx , dpsidy , dpsidz );
|
||||
|
||||
contributeResidualJacobian( coeff_K ,
|
||||
val , dpsidx , dpsidy , dpsidz ,
|
||||
detJ ,
|
||||
elem_data.weights[i] ,
|
||||
elem_data.values[i] ,
|
||||
elem_vec , elem_mat );
|
||||
}
|
||||
|
||||
for( unsigned i = 0; i < FunctionCount ; i++){
|
||||
element_vectors(ielem, i) = elem_vec[i] ;
|
||||
for( unsigned j = 0; j < FunctionCount ; j++){
|
||||
element_matrices(ielem, i, j) = elem_mat[i][j] ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}; /* ElementComputation */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
|
||||
typename ScalarType >
|
||||
struct DirichletSolution<
|
||||
FEMesh< ScalarCoordType , ElemNode , DeviceType > ,
|
||||
ScalarType >
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
|
||||
static const unsigned ElementNodeCount = ElemNode ;
|
||||
|
||||
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
|
||||
vector_type solution ;
|
||||
ScalarCoordType bc_lower_z ;
|
||||
ScalarCoordType bc_upper_z ;
|
||||
ScalarType bc_lower_value ;
|
||||
ScalarType bc_upper_value ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const unsigned inode ) const
|
||||
{
|
||||
|
||||
// Apply dirichlet boundary condition on the Solution vector.
|
||||
// Define boundary node values to be either bc_lower_value or
|
||||
// bc_upper_value, depending on which boundary face they lie on.
|
||||
// Non-boundary terms will be left at their previous value.
|
||||
|
||||
const ScalarCoordType z = node_coords(inode,2);
|
||||
const bool bc_lower = z <= bc_lower_z ;
|
||||
const bool bc_upper = bc_upper_z <= z ;
|
||||
|
||||
if ( bc_lower || bc_upper ) {
|
||||
const ScalarType bc_value = bc_lower ? bc_lower_value
|
||||
: bc_upper_value ;
|
||||
|
||||
solution(inode) = bc_value ; // set the solution vector
|
||||
}
|
||||
}
|
||||
|
||||
static void apply( const vector_type & solution ,
|
||||
const mesh_type & mesh ,
|
||||
const ScalarCoordType bc_lower_z ,
|
||||
const ScalarCoordType bc_upper_z ,
|
||||
const ScalarType bc_lower_value ,
|
||||
const ScalarType bc_upper_value )
|
||||
{
|
||||
DirichletSolution op ;
|
||||
op.node_coords = mesh.node_coords ;
|
||||
op.solution = solution ;
|
||||
op.bc_lower_z = bc_lower_z ;
|
||||
op.bc_upper_z = bc_upper_z ;
|
||||
op.bc_lower_value = bc_lower_value ;
|
||||
op.bc_upper_value = bc_upper_value ;
|
||||
parallel_for( solution.dimension_0() , op );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename ScalarCoordType , unsigned ElemNode , class DeviceType ,
|
||||
typename ScalarType >
|
||||
struct DirichletResidual<
|
||||
FEMesh< ScalarCoordType , ElemNode , DeviceType > , ScalarType >
|
||||
{
|
||||
typedef DeviceType execution_space;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const unsigned ElementNodeCount = ElemNode ;
|
||||
|
||||
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
|
||||
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
|
||||
|
||||
typedef FEMesh< ScalarCoordType , ElementNodeCount , execution_space > mesh_type ;
|
||||
|
||||
typename mesh_type::node_coords_type node_coords ;
|
||||
matrix_type matrix ;
|
||||
vector_type rhs ;
|
||||
ScalarCoordType bc_lower_z ;
|
||||
ScalarCoordType bc_upper_z ;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const unsigned inode ) const
|
||||
{
|
||||
// Apply a dirichlet boundary condition to 'irow'
|
||||
// to maintain the symmetry of the original
|
||||
// global stiffness matrix, zero out the columns
|
||||
// that correspond to boundary conditions, and
|
||||
// adjust the load vector accordingly
|
||||
|
||||
const size_type iBeg = matrix.graph.row_map[inode];
|
||||
const size_type iEnd = matrix.graph.row_map[inode+1];
|
||||
|
||||
const ScalarCoordType z = node_coords(inode,2);
|
||||
const bool bc_lower = z <= bc_lower_z ;
|
||||
const bool bc_upper = bc_upper_z <= z ;
|
||||
|
||||
if ( bc_lower || bc_upper ) {
|
||||
rhs(inode) = 0 ; // set the residual vector
|
||||
|
||||
// zero each value on the row, and leave a one
|
||||
// on the diagonal
|
||||
|
||||
for( size_type i = iBeg ; i < iEnd ; i++) {
|
||||
matrix.coefficients(i) =
|
||||
(int) inode == matrix.graph.entries(i) ? 1 : 0 ;
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
// Find any columns that are boundary conditions.
|
||||
// Clear them and adjust the load vector
|
||||
|
||||
for( size_type i = iBeg ; i < iEnd ; i++ ) {
|
||||
const size_type cnode = matrix.graph.entries(i) ;
|
||||
|
||||
const ScalarCoordType zc = node_coords(cnode,2);
|
||||
const bool c_bc_lower = zc <= bc_lower_z ;
|
||||
const bool c_bc_upper = bc_upper_z <= zc ;
|
||||
|
||||
if ( c_bc_lower || c_bc_upper ) {
|
||||
|
||||
matrix.coefficients(i) = 0 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void apply( const matrix_type & linsys_matrix ,
|
||||
const vector_type & linsys_rhs ,
|
||||
const mesh_type & mesh ,
|
||||
const ScalarCoordType bc_lower_z ,
|
||||
const ScalarCoordType bc_upper_z)
|
||||
{
|
||||
const size_t row_count = linsys_matrix.graph.row_map.dimension_0() - 1 ;
|
||||
|
||||
DirichletResidual op ;
|
||||
op.node_coords = mesh.node_coords ;
|
||||
op.matrix = linsys_matrix ;
|
||||
op.rhs = linsys_rhs ;
|
||||
op.bc_lower_z = bc_lower_z ;
|
||||
op.bc_upper_z = bc_upper_z ;
|
||||
parallel_for( row_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} /* namespace Nonlinear */
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
#endif /* #ifndef KOKKOS_NONLINEARFUNCTORS_HPP */
|
||||
|
||||
@ -1,167 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef PARALLELCOMM_HPP
|
||||
#define PARALLELCOMM_HPP
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_Macros.hpp>
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
|
||||
#include <mpi.h>
|
||||
#include <string>
|
||||
|
||||
namespace comm {
|
||||
|
||||
struct Machine {
|
||||
MPI_Comm mpi_comm ;
|
||||
|
||||
Machine() : mpi_comm( MPI_COMM_NULL ) {}
|
||||
|
||||
Machine( const Machine & rhs )
|
||||
: mpi_comm( rhs.mpi_comm ) {}
|
||||
|
||||
Machine( MPI_Comm c ) : mpi_comm( c ) {}
|
||||
|
||||
static Machine init( int * argc , char *** argv )
|
||||
{
|
||||
MPI_Init( argc , argv );
|
||||
return Machine( MPI_COMM_WORLD );
|
||||
}
|
||||
|
||||
static void finalize() { MPI_Finalize(); }
|
||||
};
|
||||
|
||||
inline
|
||||
unsigned size( Machine machine )
|
||||
{
|
||||
int np ; MPI_Comm_size( machine.mpi_comm , & np ); return np ;
|
||||
}
|
||||
|
||||
inline
|
||||
unsigned rank( Machine machine )
|
||||
{
|
||||
int ip ; MPI_Comm_rank( machine.mpi_comm , & ip ); return ip ;
|
||||
}
|
||||
|
||||
inline
|
||||
double max( Machine machine , double local )
|
||||
{
|
||||
double global = 0;
|
||||
MPI_Allreduce( & local , & global , 1 , MPI_DOUBLE , MPI_MAX , machine.mpi_comm );
|
||||
return global ;
|
||||
}
|
||||
|
||||
inline
|
||||
std::string command_line( Machine machine , const int argc , const char * const * const argv )
|
||||
{
|
||||
std::string argline ;
|
||||
|
||||
if ( 0 == rank( machine ) ) {
|
||||
for ( int i = 1 ; i < argc ; ++i ) {
|
||||
argline.append(" ").append( argv[i] );
|
||||
}
|
||||
}
|
||||
|
||||
int length = argline.length();
|
||||
MPI_Bcast( & length , 1 , MPI_INT , 0 , machine.mpi_comm );
|
||||
argline.resize( length , ' ' );
|
||||
MPI_Bcast( (void*) argline.data() , length , MPI_CHAR , 0 , machine.mpi_comm );
|
||||
|
||||
return argline ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else /* ! defined( KOKKOS_HAVE_MPI ) */
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace comm {
|
||||
|
||||
// Stub for non-parallel
|
||||
|
||||
struct Machine {
|
||||
static Machine init( int * , char *** )
|
||||
{ return Machine(); }
|
||||
|
||||
static void finalize() {}
|
||||
};
|
||||
|
||||
inline
|
||||
unsigned size( Machine ) { return 1 ; }
|
||||
|
||||
inline
|
||||
unsigned rank( Machine ) { return 0 ; }
|
||||
|
||||
inline
|
||||
double max( Machine , double local )
|
||||
{ return local ; }
|
||||
|
||||
inline
|
||||
std::string command_line( Machine machine , const int argc , const char * const * const argv )
|
||||
{
|
||||
std::string argline ;
|
||||
|
||||
if ( 0 == rank( machine ) ) {
|
||||
for ( int i = 1 ; i < argc ; ++i ) {
|
||||
argline.append(" ").append( argv[i] );
|
||||
}
|
||||
}
|
||||
|
||||
return argline ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* ! defined( KOKKOS_HAVE_MPI ) */
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef PARALLELCOMM_HPP */
|
||||
|
||||
|
||||
@ -1,517 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef KOKKOS_PARALLELDATAMAP_HPP
|
||||
#define KOKKOS_PARALLELDATAMAP_HPP
|
||||
|
||||
#include <utility>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <ParallelComm.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
/** \brief Parallel distributed data mapping
|
||||
*
|
||||
* ordering { interior : { owned items not sent elsewhere }
|
||||
* send : { owned items sent }
|
||||
* receive : { not-owned items received } }
|
||||
*
|
||||
* recv { { N ghosted items from process P : ( P , N ) } }
|
||||
*
|
||||
* send { { N send items to process P : ( P , N ) } }
|
||||
*
|
||||
* send_item { send item offsets within 'send' range }
|
||||
*/
|
||||
struct ParallelDataMap {
|
||||
typedef View< unsigned*[2], HostSpace > host_recv_type ;
|
||||
typedef View< unsigned*[2], HostSpace > host_send_type ;
|
||||
typedef View< unsigned* , HostSpace > host_send_item_type ;
|
||||
|
||||
comm::Machine machine ;
|
||||
host_recv_type host_recv ;
|
||||
host_send_type host_send ;
|
||||
host_send_item_type host_send_item ;
|
||||
unsigned count_interior ;
|
||||
unsigned count_send ;
|
||||
unsigned count_owned ; // = count_interior + count_send
|
||||
unsigned count_receive ;
|
||||
|
||||
void assign( const unsigned arg_count_interior ,
|
||||
const unsigned arg_count_owned ,
|
||||
const unsigned arg_count_total ,
|
||||
const unsigned arg_recv_msg ,
|
||||
const unsigned arg_send_msg ,
|
||||
const unsigned arg_send_count )
|
||||
{
|
||||
const std::string label("Kokkos::ParallelDataMap buffer");
|
||||
|
||||
count_interior = arg_count_interior ;
|
||||
count_owned = arg_count_owned ;
|
||||
count_send = arg_count_owned - arg_count_interior ;
|
||||
count_receive = arg_count_total - arg_count_owned ;
|
||||
|
||||
host_recv = host_recv_type( label , arg_recv_msg );
|
||||
host_send = host_send_type( label , arg_send_msg );
|
||||
host_send_item = host_send_item_type( label , arg_send_count );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//PackArray
|
||||
//----------------------------------------------------------------------------
|
||||
template< class ArrayType , class Rank = void >
|
||||
struct PackArray ;
|
||||
|
||||
template< typename DeviceType, typename ValueType >
|
||||
struct PackArray< View< ValueType* , DeviceType > , void >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename DeviceType::size_type size_type ;
|
||||
typedef View< ValueType* , execution_space > array_type ;
|
||||
typedef View< ValueType* , execution_space > buffer_type ;
|
||||
|
||||
private:
|
||||
|
||||
buffer_type output ;
|
||||
array_type input ;
|
||||
size_type base ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i ) const
|
||||
{ output[i] = input(base+i); }
|
||||
|
||||
inline
|
||||
static
|
||||
void pack( const buffer_type & arg_output ,
|
||||
const size_type arg_begin ,
|
||||
const size_type arg_count ,
|
||||
const array_type & arg_input )
|
||||
{
|
||||
PackArray op ;
|
||||
op.output = arg_output ;
|
||||
op.input = arg_input ;
|
||||
op.base = arg_begin ;
|
||||
parallel_for( arg_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DeviceType, typename ValueType , unsigned N1 >
|
||||
struct PackArray< View< ValueType*[N1] , DeviceType > , void >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename DeviceType::size_type size_type ;
|
||||
typedef View< ValueType*[N1] , execution_space > array_type ;
|
||||
typedef View< ValueType* , execution_space > buffer_type ;
|
||||
|
||||
private:
|
||||
|
||||
buffer_type output ;
|
||||
array_type input ;
|
||||
size_type base ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i ) const
|
||||
{
|
||||
for ( size_type j = 0 , k = i * N1 ; j < N1 ; ++j , ++k ) {
|
||||
output[k] = input(base+i,j);
|
||||
}
|
||||
}
|
||||
|
||||
inline static
|
||||
void pack( const buffer_type & arg_output ,
|
||||
const size_type arg_begin ,
|
||||
const size_type arg_count ,
|
||||
const array_type & arg_input )
|
||||
{
|
||||
if ( arg_count ) {
|
||||
PackArray op ;
|
||||
op.output = arg_output ;
|
||||
op.input = arg_input ;
|
||||
op.base = arg_begin ;
|
||||
parallel_for( arg_count , op );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//UnpackArray
|
||||
//----------------------------------------------------------------------------
|
||||
template< class ArrayType , class Rank = void > struct UnpackArray ;
|
||||
|
||||
template< typename DeviceType, typename ValueType >
|
||||
struct UnpackArray< View< ValueType* , DeviceType > , void >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename DeviceType::size_type size_type ;
|
||||
typedef View< ValueType* , execution_space > array_type ;
|
||||
typedef View< ValueType* , execution_space > buffer_type ;
|
||||
|
||||
private:
|
||||
|
||||
array_type output ;
|
||||
buffer_type input ;
|
||||
size_type base ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i ) const
|
||||
{ output(base+i) = input[i]; }
|
||||
|
||||
inline
|
||||
static
|
||||
void unpack( const array_type & arg_output ,
|
||||
const buffer_type & arg_input ,
|
||||
const size_type arg_begin ,
|
||||
const size_type arg_count )
|
||||
{
|
||||
UnpackArray op ;
|
||||
op.output = arg_output ;
|
||||
op.input = arg_input ;
|
||||
op.base = arg_begin ;
|
||||
parallel_for( arg_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
template< typename DeviceType, typename ValueType , unsigned N1 >
|
||||
struct UnpackArray< View< ValueType*[N1] , DeviceType > , void >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename DeviceType::size_type size_type ;
|
||||
typedef View< ValueType* , execution_space > buffer_type ;
|
||||
typedef View< ValueType*[N1] , execution_space > array_type ;
|
||||
|
||||
private:
|
||||
|
||||
array_type output ;
|
||||
buffer_type input ;
|
||||
size_type base ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i ) const
|
||||
{
|
||||
for ( size_type j = 0 , k = i * N1 ; j < N1 ; ++j , ++k ) {
|
||||
output(base+i,j) = input(k);
|
||||
}
|
||||
}
|
||||
|
||||
inline
|
||||
static
|
||||
void unpack( const array_type & arg_output ,
|
||||
const buffer_type & arg_input ,
|
||||
const size_type arg_begin ,
|
||||
const size_type arg_count )
|
||||
{
|
||||
if ( arg_count ) {
|
||||
UnpackArray op ;
|
||||
op.output = arg_output ;
|
||||
op.input = arg_input ;
|
||||
op.base = arg_begin ;
|
||||
parallel_for( arg_count , op );
|
||||
}
|
||||
}
|
||||
};
|
||||
//----------------------------------------------------------------------------
|
||||
template< class ValueType , class Device , class DataMap >
|
||||
class AsyncExchange ;
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Application call procedure:
|
||||
//
|
||||
// construct: AsyncExchange object
|
||||
// * pack send buffer on device
|
||||
// initiate: copy send buffer from device to host
|
||||
// * dispatch asynchronous local work
|
||||
// complete: send/receive on host, copy receive buffer to device
|
||||
// * unpack receive buffer on device
|
||||
// destroy: AsyncExchange object
|
||||
//
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#ifdef KOKKOS_HAVE_MPI
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class ValueType , class Device >
|
||||
class AsyncExchange< ValueType, Device , Kokkos::ParallelDataMap > {
|
||||
public:
|
||||
|
||||
typedef Device execution_space ;
|
||||
typedef Kokkos::ParallelDataMap data_map_type ;
|
||||
typedef Kokkos::View< ValueType* , execution_space > buffer_dev_type ;
|
||||
typedef typename buffer_dev_type::HostMirror buffer_host_type ;
|
||||
|
||||
private:
|
||||
|
||||
static const int mpi_tag = 11 ;
|
||||
|
||||
const data_map_type data_map ;
|
||||
unsigned chunk_size ;
|
||||
unsigned send_count_max ;
|
||||
buffer_host_type host_recv_buffer ;
|
||||
buffer_host_type host_send_buffer ;
|
||||
buffer_host_type send_msg_buffer ;
|
||||
buffer_dev_type dev_buffer ;
|
||||
buffer_dev_type dev_send_buffer ; // Subview for send
|
||||
buffer_dev_type dev_recv_buffer ; // Subview for receive
|
||||
std::vector< MPI_Request > recv_request ;
|
||||
|
||||
public:
|
||||
|
||||
const buffer_dev_type & buffer() const { return dev_buffer ; }
|
||||
|
||||
AsyncExchange( const data_map_type & arg_data_map ,
|
||||
const size_t arg_chunk )
|
||||
: data_map( arg_data_map )
|
||||
, chunk_size( arg_chunk )
|
||||
, send_count_max( 0 )
|
||||
, host_recv_buffer()
|
||||
, host_send_buffer()
|
||||
, send_msg_buffer()
|
||||
, dev_buffer()
|
||||
, dev_send_buffer()
|
||||
, dev_recv_buffer()
|
||||
, recv_request()
|
||||
{
|
||||
const size_t send_msg_count = arg_data_map.host_send.dimension_0();
|
||||
const size_t recv_msg_count = arg_data_map.host_recv.dimension_0();
|
||||
|
||||
const size_t send_msg_length = arg_chunk * arg_data_map.count_send ;
|
||||
const size_t recv_msg_length = arg_chunk * arg_data_map.count_receive ;
|
||||
|
||||
for ( size_t i = 0 ; i < send_msg_count ; ++i ) {
|
||||
send_count_max = std::max( send_count_max ,
|
||||
(unsigned) arg_data_map.host_send(i,1) );
|
||||
}
|
||||
|
||||
// A single shared buffer on the device can be used for
|
||||
// send and receive message buffers.
|
||||
dev_buffer = buffer_dev_type(
|
||||
std::string("AsyncExchange dev_buffer") ,
|
||||
std::max( send_msg_length , recv_msg_length ) );
|
||||
|
||||
// Total send subview of the device buffer
|
||||
dev_send_buffer =
|
||||
Kokkos::subview( dev_buffer , std::pair<size_t,size_t>( 0 , send_msg_length ) );
|
||||
|
||||
// Total receive subview of the device buffer
|
||||
dev_recv_buffer =
|
||||
Kokkos::subview( dev_buffer , std::pair<size_t,size_t>( 0 , recv_msg_length ) );
|
||||
|
||||
// Total receive message buffer on the host:
|
||||
host_recv_buffer = buffer_host_type(
|
||||
std::string("AsyncExchange host_recv_buffer") ,
|
||||
recv_msg_length );
|
||||
|
||||
// Total send message buffer on the host:
|
||||
host_send_buffer = buffer_host_type(
|
||||
std::string("AsyncExchange host_send_buffer") ,
|
||||
send_msg_length );
|
||||
|
||||
// Individual send message buffer on the host:
|
||||
send_msg_buffer = buffer_host_type(
|
||||
std::string("AsyncExchange send_msg_buffer") ,
|
||||
arg_chunk * send_count_max );
|
||||
|
||||
// MPI asynchronous receive request handles:
|
||||
recv_request.assign( recv_msg_count , MPI_REQUEST_NULL );
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
void setup()
|
||||
{
|
||||
{ // Post receives:
|
||||
const size_t recv_msg_count = data_map.host_recv.dimension_0();
|
||||
|
||||
ValueType * ptr = host_recv_buffer.ptr_on_device();
|
||||
|
||||
for ( size_t i = 0 ; i < recv_msg_count ; ++i ) {
|
||||
const int proc = data_map.host_recv(i,0);
|
||||
const int count = data_map.host_recv(i,1) * chunk_size ;
|
||||
|
||||
MPI_Irecv( ptr , count * sizeof(ValueType) , MPI_BYTE ,
|
||||
proc , mpi_tag , data_map.machine.mpi_comm ,
|
||||
& recv_request[i] );
|
||||
|
||||
ptr += count ;
|
||||
}
|
||||
}
|
||||
|
||||
// Copy send buffer from the device to host memory for sending
|
||||
|
||||
Kokkos::deep_copy( host_send_buffer , dev_send_buffer );
|
||||
|
||||
// Done with the device until communication is complete.
|
||||
// Application can dispatch asynchronous work on the device.
|
||||
}
|
||||
|
||||
// Application can dispatch local work to device ...
|
||||
// No communication progress until main thread calls 'send_receive'
|
||||
|
||||
void send_receive()
|
||||
{
|
||||
const size_t recv_msg_count = data_map.host_recv.dimension_0();
|
||||
const size_t send_msg_count = data_map.host_send.dimension_0();
|
||||
|
||||
// Pack and send:
|
||||
|
||||
for ( size_t i = 0 , j = 0 ; i < send_msg_count ; ++i ) {
|
||||
const int proc = data_map.host_send(i,0);
|
||||
const int count = data_map.host_send(i,1);
|
||||
|
||||
for ( int k = 0 , km = 0 ; k < count ; ++k , ++j ) {
|
||||
const int km_end = km + chunk_size ;
|
||||
int ki = chunk_size * data_map.host_send_item(j);
|
||||
|
||||
for ( ; km < km_end ; ++km , ++ki ) {
|
||||
send_msg_buffer[km] = host_send_buffer[ki];
|
||||
}
|
||||
}
|
||||
|
||||
// MPI_Ssend blocks until
|
||||
// (1) a receive is matched for the message and
|
||||
// (2) the send buffer can be re-used.
|
||||
//
|
||||
// It is suggested that MPI_Ssend will have the best performance:
|
||||
// http://www.mcs.anl.gov/research/projects/mpi/sendmode.html .
|
||||
|
||||
MPI_Ssend( send_msg_buffer.ptr_on_device(),
|
||||
count * chunk_size * sizeof(ValueType) , MPI_BYTE ,
|
||||
proc , mpi_tag , data_map.machine.mpi_comm );
|
||||
}
|
||||
|
||||
// Wait for receives and verify:
|
||||
|
||||
for ( size_t i = 0 ; i < recv_msg_count ; ++i ) {
|
||||
MPI_Status recv_status ;
|
||||
int recv_which = 0 ;
|
||||
int recv_size = 0 ;
|
||||
|
||||
MPI_Waitany( recv_msg_count , & recv_request[0] ,
|
||||
& recv_which , & recv_status );
|
||||
|
||||
const int recv_proc = recv_status.MPI_SOURCE ;
|
||||
|
||||
MPI_Get_count( & recv_status , MPI_BYTE , & recv_size );
|
||||
|
||||
// Verify message properly received:
|
||||
|
||||
const int expected_proc = data_map.host_recv(recv_which,0);
|
||||
const int expected_size = data_map.host_recv(recv_which,1) *
|
||||
chunk_size * sizeof(ValueType);
|
||||
|
||||
if ( ( expected_proc != recv_proc ) ||
|
||||
( expected_size != recv_size ) ) {
|
||||
std::ostringstream msg ;
|
||||
msg << "AsyncExchange error:"
|
||||
<< " P" << comm::rank( data_map.machine )
|
||||
<< " received from P" << recv_proc
|
||||
<< " size " << recv_size
|
||||
<< " expected " << expected_size
|
||||
<< " from P" << expected_proc ;
|
||||
throw std::runtime_error( msg.str() );
|
||||
}
|
||||
}
|
||||
|
||||
// Copy received data to device memory.
|
||||
|
||||
Kokkos::deep_copy( dev_recv_buffer , host_recv_buffer );
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#else /* ! #ifdef KOKKOS_HAVE_MPI */
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< class ValueType , class Device >
|
||||
class AsyncExchange< ValueType, Device , Kokkos::ParallelDataMap > {
|
||||
public:
|
||||
|
||||
typedef Device execution_space ;
|
||||
typedef Kokkos::ParallelDataMap data_map_type ;
|
||||
typedef Kokkos::View< ValueType* , execution_space > buffer_dev_type ;
|
||||
typedef typename buffer_dev_type::HostMirror buffer_host_type ;
|
||||
|
||||
buffer_dev_type dev_buffer ;
|
||||
|
||||
public:
|
||||
|
||||
const buffer_dev_type & buffer() const { return dev_buffer ; }
|
||||
|
||||
AsyncExchange( const data_map_type & , const size_t )
|
||||
: dev_buffer()
|
||||
{ }
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
void setup() { }
|
||||
|
||||
void send_receive() { }
|
||||
};
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
#endif /* ! #ifdef KOKKOS_HAVE_MPI */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef KOKKOS_PARALLELDATAMAP_HPP */
|
||||
|
||||
|
||||
@ -1,178 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#if 0
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <ParallelMachine.hpp>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#if ! defined( KOKKOS_HAVE_MPI )
|
||||
#define MPI_COMM_NULL 0
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
namespace Parallel {
|
||||
|
||||
Machine::Machine( int * argc , char *** argv )
|
||||
: m_mpi_comm( MPI_COMM_NULL )
|
||||
, m_mpi_size(0)
|
||||
, m_mpi_rank(0)
|
||||
, m_mpi_gpu(0)
|
||||
{
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
//------------------------------------
|
||||
// Might be using a Cuda aware version of MPI.
|
||||
// Must select Cuda device before initializing MPI.
|
||||
{
|
||||
int i = 1 ;
|
||||
for ( ; i < *argc && strcmp((*argv)[i],"mpi_cuda") ; ++i );
|
||||
|
||||
if ( i < *argc ) {
|
||||
// Determine, if possible, what will be the node-local
|
||||
// rank of the MPI process once MPI has been initialized.
|
||||
// This rank is needed to set the Cuda device before 'mvapich'
|
||||
// is initialized.
|
||||
|
||||
const char * const mvapich_local_rank = getenv("MV2_COMM_WORLD_LOCAL_RANK");
|
||||
const char * const slurm_local_rank = getenv("SLURM_LOCALID");
|
||||
|
||||
const int pre_mpi_local_rank =
|
||||
0 != mvapich_local_rank ? atoi( mvapich_local_rank ) : (
|
||||
0 != slurm_local_rank ? atoi( slurm_local_rank ) : (
|
||||
-1 ) );
|
||||
|
||||
if ( 0 <= pre_mpi_local_rank ) {
|
||||
|
||||
const int ngpu = Kokkos::Cuda::detect_device_count();
|
||||
|
||||
const int cuda_device_rank = pre_mpi_local_rank % ngpu ;
|
||||
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cuda_device_rank ) );
|
||||
|
||||
m_mpi_gpu = 1 ;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
//------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Init( argc , argv );
|
||||
m_mpi_comm = MPI_COMM_WORLD ;
|
||||
MPI_Comm_size( m_mpi_comm , & m_mpi_size );
|
||||
MPI_Comm_rank( m_mpi_comm , & m_mpi_rank );
|
||||
#endif
|
||||
|
||||
// Query hwloc after MPI initialization to allow MPI binding:
|
||||
//------------------------------------
|
||||
// Request to use host device:
|
||||
{
|
||||
int i = 1 ;
|
||||
for ( ; i < *argc && strcmp((*argv)[i],"host") ; ++i );
|
||||
|
||||
if ( i < *argc ) {
|
||||
|
||||
unsigned team_count = Kokkos::hwloc::get_available_numa_count();
|
||||
unsigned threads_per_team = Kokkos::hwloc::get_available_cores_per_numa() *
|
||||
Kokkos::hwloc::get_available_threads_per_core();
|
||||
|
||||
if ( i + 2 < *argc ) {
|
||||
team_count = atoi( (*argv)[i+1] );
|
||||
threads_per_team = atoi( (*argv)[i+2] );
|
||||
}
|
||||
|
||||
Kokkos::Threads::initialize( team_count * threads_per_team );
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
//------------------------------------
|
||||
// Request to use Cuda device and not already initialized.
|
||||
if ( ! m_mpi_gpu ) {
|
||||
int i = 1 ;
|
||||
for ( ; i < *argc && strcmp((*argv)[i],"mpi_cuda") && strcmp((*argv)[i],"cuda") ; ++i );
|
||||
|
||||
if ( i < *argc ) {
|
||||
|
||||
const int ngpu = Kokkos::Cuda::detect_device_count();
|
||||
|
||||
const int cuda_device_rank = m_mpi_rank % ngpu ;
|
||||
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( cuda_device_rank ) );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
Machine::~Machine()
|
||||
{
|
||||
Kokkos::Threads::finalize();
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
Kokkos::Cuda::finalize();
|
||||
#endif
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Machine::print_configuration( std::ostream & msg ) const
|
||||
{
|
||||
msg << "MPI [ " << m_mpi_rank << " / " << m_mpi_size << " ]" << std::endl ;
|
||||
Kokkos::Threads::print_configuration( msg );
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
Kokkos::Cuda::print_configuration( msg );
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif /* #if 0 */
|
||||
|
||||
@ -1,118 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#error "ParallelMachine"
|
||||
|
||||
#ifndef PARALLELMACHINE_HPP
|
||||
#define PARALLELMACHINE_HPP
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include <iosfwd>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_MPI )
|
||||
#include <mpi.h>
|
||||
#else
|
||||
typedef int MPI_Comm ;
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
namespace Parallel {
|
||||
|
||||
/** \brief Hybrid parallel machine with MPI+Kokkos::Threads or MPI+Kokkos::Cuda.
|
||||
*
|
||||
* Initialization of MPI and Kokkos device has interdependencies which this
|
||||
* class manages. The command line and environment variables are queried to initialize
|
||||
* the Threads or Cuda device:
|
||||
*
|
||||
* 1) cuda : initializes Cuda device
|
||||
* 2) host : initializes Threads device with all hwloc detected cores.
|
||||
* 3) host #gang #worker : initializes Threads with specified
|
||||
*/
|
||||
class Machine {
|
||||
private:
|
||||
|
||||
MPI_Comm m_mpi_comm ;
|
||||
int m_mpi_size ;
|
||||
int m_mpi_rank ;
|
||||
unsigned m_mpi_gpu ;
|
||||
unsigned m_gpu_arch ;
|
||||
|
||||
Machine();
|
||||
Machine( const Machine & );
|
||||
Machine & operator = ( const Machine & );
|
||||
|
||||
public:
|
||||
|
||||
/** \brief Coordinated initialize MPI, Cuda, or Threads devices from 'main'. */
|
||||
Machine( int * argc , char *** argv );
|
||||
|
||||
~Machine();
|
||||
|
||||
MPI_Comm mpi_comm() const { return m_mpi_comm ; }
|
||||
|
||||
int mpi_size() const { return m_mpi_size ; }
|
||||
int mpi_rank() const { return m_mpi_rank ; }
|
||||
|
||||
/** \brief If using MPI that can directly operate on GPU memory */
|
||||
bool mpi_gpu() const { return m_mpi_gpu ; }
|
||||
|
||||
/** \brief If using GPU then what architecture */
|
||||
unsigned gpu_arch() const { return m_gpu_arch ; }
|
||||
|
||||
void print_configuration( std::ostream & ) const ;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef PARALLELMACHINE_HPP */
|
||||
|
||||
|
||||
@ -1,400 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef SPARSELINEARSYSTEM_HPP
|
||||
#define SPARSELINEARSYSTEM_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_StaticCrsGraph.hpp>
|
||||
|
||||
#include <LinAlgBLAS.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< typename ScalarType , class Device >
|
||||
struct CrsMatrix {
|
||||
typedef Device execution_space ;
|
||||
typedef ScalarType value_type ;
|
||||
|
||||
typedef StaticCrsGraph< int , execution_space , void , int > graph_type ;
|
||||
typedef View< value_type* , execution_space > coefficients_type ;
|
||||
|
||||
graph_type graph ;
|
||||
coefficients_type coefficients ;
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Impl {
|
||||
|
||||
template< class Matrix , class OutputVector , class InputVector >
|
||||
struct Multiply ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
template< typename AScalarType ,
|
||||
typename VScalarType ,
|
||||
class DeviceType >
|
||||
struct Multiply< CrsMatrix<AScalarType,DeviceType> ,
|
||||
View<VScalarType*,DeviceType > ,
|
||||
View<VScalarType*,DeviceType > >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
typedef View< VScalarType*, execution_space, MemoryUnmanaged > vector_type ;
|
||||
typedef View< const VScalarType*, execution_space, MemoryUnmanaged > vector_const_type ;
|
||||
|
||||
typedef CrsMatrix< AScalarType , execution_space > matrix_type ;
|
||||
|
||||
private:
|
||||
|
||||
matrix_type m_A ;
|
||||
vector_const_type m_x ;
|
||||
vector_type m_y ;
|
||||
|
||||
public:
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type iRow ) const
|
||||
{
|
||||
const size_type iEntryBegin = m_A.graph.row_map[iRow];
|
||||
const size_type iEntryEnd = m_A.graph.row_map[iRow+1];
|
||||
|
||||
double sum = 0 ;
|
||||
|
||||
#if defined( __INTEL_COMPILER )
|
||||
#pragma simd reduction(+:sum)
|
||||
#pragma ivdep
|
||||
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
|
||||
sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
|
||||
}
|
||||
#else
|
||||
for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
|
||||
sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
|
||||
}
|
||||
#endif
|
||||
|
||||
m_y(iRow) = sum ;
|
||||
}
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type , // ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
: m_A( A ), m_x( x ), m_y( y )
|
||||
{
|
||||
parallel_for( nrow , *this );
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename AScalarType ,
|
||||
typename VScalarType ,
|
||||
class Device >
|
||||
class Operator {
|
||||
typedef CrsMatrix<AScalarType,Device> matrix_type ;
|
||||
typedef View<VScalarType*,Device> vector_type ;
|
||||
|
||||
private:
|
||||
const CrsMatrix<AScalarType,Device> A ;
|
||||
|
||||
ParallelDataMap data_map ;
|
||||
AsyncExchange< VScalarType , Device , ParallelDataMap > exchange ;
|
||||
|
||||
public:
|
||||
|
||||
Operator( const ParallelDataMap & arg_data_map ,
|
||||
const CrsMatrix<AScalarType,Device> & arg_A )
|
||||
: A( arg_A )
|
||||
, data_map( arg_data_map )
|
||||
, exchange( arg_data_map , 1 )
|
||||
{}
|
||||
|
||||
void apply( const View<VScalarType*,Device> & x ,
|
||||
const View<VScalarType*,Device> & y )
|
||||
{
|
||||
// Gather off-processor data for 'x'
|
||||
|
||||
PackArray< vector_type >::pack( exchange.buffer() ,
|
||||
data_map.count_interior ,
|
||||
data_map.count_send , x );
|
||||
|
||||
exchange.setup();
|
||||
|
||||
// If interior & boundary matrices then could launch interior multiply
|
||||
|
||||
exchange.send_receive();
|
||||
|
||||
UnpackArray< vector_type >::unpack( x , exchange.buffer() ,
|
||||
data_map.count_owned ,
|
||||
data_map.count_receive );
|
||||
|
||||
const typename Device::size_type nrow = data_map.count_owned ;
|
||||
const typename Device::size_type ncol = data_map.count_owned +
|
||||
data_map.count_receive ;
|
||||
|
||||
Impl::Multiply<matrix_type,vector_type,vector_type>( A, nrow, ncol, x, y);
|
||||
}
|
||||
};
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< typename AScalarType , typename VScalarType , class Device >
|
||||
void cgsolve(
|
||||
const ParallelDataMap data_map ,
|
||||
const CrsMatrix<AScalarType,Device> A ,
|
||||
const View<VScalarType*,Device> b ,
|
||||
const View<VScalarType*,Device> x ,
|
||||
size_t & iteration ,
|
||||
double & normr ,
|
||||
double & iter_time ,
|
||||
const size_t maximum_iteration = 200 ,
|
||||
const double tolerance = std::numeric_limits<VScalarType>::epsilon() )
|
||||
{
|
||||
typedef View<VScalarType*,Device> vector_type ;
|
||||
//typedef View<VScalarType, Device> value_type ; // unused
|
||||
|
||||
const size_t count_owned = data_map.count_owned ;
|
||||
const size_t count_total = data_map.count_owned + data_map.count_receive ;
|
||||
|
||||
Operator<AScalarType,VScalarType,Device> matrix_operator( data_map , A );
|
||||
|
||||
// Need input vector to matvec to be owned + received
|
||||
vector_type pAll ( "cg::p" , count_total );
|
||||
|
||||
vector_type p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
|
||||
vector_type r ( "cg::r" , count_owned );
|
||||
vector_type Ap( "cg::Ap", count_owned );
|
||||
|
||||
/* r = b - A * x ; */
|
||||
|
||||
/* p = x */ deep_copy( p , x );
|
||||
/* Ap = A * p */ matrix_operator.apply( pAll , Ap );
|
||||
/* r = b - Ap */ waxpby( count_owned , 1.0 , b , -1.0 , Ap , r );
|
||||
/* p = r */ deep_copy( p , r );
|
||||
|
||||
double old_rdot = dot( count_owned , r , data_map.machine );
|
||||
|
||||
normr = sqrt( old_rdot );
|
||||
iteration = 0 ;
|
||||
|
||||
Kokkos::Impl::Timer wall_clock ;
|
||||
|
||||
while ( tolerance < normr && iteration < maximum_iteration ) {
|
||||
|
||||
/* pAp_dot = dot( p , Ap = A * p ) */
|
||||
|
||||
/* Ap = A * p */ matrix_operator.apply( pAll , Ap );
|
||||
|
||||
const double pAp_dot = dot( count_owned , p , Ap , data_map.machine );
|
||||
const double alpha = old_rdot / pAp_dot ;
|
||||
|
||||
/* x += alpha * p ; */ axpy( count_owned, alpha, p , x );
|
||||
/* r -= alpha * Ap ; */ axpy( count_owned, -alpha, Ap, r );
|
||||
|
||||
const double r_dot = dot( count_owned , r , data_map.machine );
|
||||
const double beta = r_dot / old_rdot ;
|
||||
|
||||
/* p = r + beta * p ; */ xpby( count_owned , r , beta , p );
|
||||
|
||||
normr = sqrt( old_rdot = r_dot );
|
||||
++iteration ;
|
||||
}
|
||||
|
||||
iter_time = wall_clock.seconds();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
#if ( CUDA_VERSION < 6000 )
|
||||
#pragma message "cusparse_v2.h"
|
||||
#include <cusparse_v2.h>
|
||||
#else
|
||||
#pragma message "cusparse.h"
|
||||
#include <cusparse.h>
|
||||
#endif
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
struct CudaSparseSingleton {
|
||||
cusparseHandle_t handle;
|
||||
cusparseMatDescr_t descra;
|
||||
|
||||
CudaSparseSingleton()
|
||||
{
|
||||
cusparseCreate( & handle );
|
||||
cusparseCreateMatDescr( & descra );
|
||||
cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL );
|
||||
cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO );
|
||||
}
|
||||
|
||||
static CudaSparseSingleton & singleton();
|
||||
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Multiply< CrsMatrix<double,Cuda> ,
|
||||
View<double*,Cuda > ,
|
||||
View<double*,Cuda > >
|
||||
{
|
||||
typedef Cuda execution_space ;
|
||||
typedef execution_space::size_type size_type ;
|
||||
typedef double scalar_type ;
|
||||
typedef View< scalar_type* , execution_space > vector_type ;
|
||||
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
|
||||
public:
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
{
|
||||
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
|
||||
const scalar_type alpha = 1 , beta = 0 ;
|
||||
|
||||
cusparseStatus_t status =
|
||||
cusparseDcsrmv( s.handle ,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE ,
|
||||
nrow , ncol , A.coefficients.dimension_0() ,
|
||||
&alpha ,
|
||||
s.descra ,
|
||||
A.coefficients.ptr_on_device() ,
|
||||
A.graph.row_map.ptr_on_device() ,
|
||||
A.graph.entries.ptr_on_device() ,
|
||||
x.ptr_on_device() ,
|
||||
&beta ,
|
||||
y.ptr_on_device() );
|
||||
|
||||
if ( CUSPARSE_STATUS_SUCCESS != status ) {
|
||||
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct Multiply< CrsMatrix<float,Cuda> ,
|
||||
View<float*,Cuda > ,
|
||||
View<float*,Cuda > >
|
||||
{
|
||||
typedef Cuda execution_space ;
|
||||
typedef execution_space::size_type size_type ;
|
||||
typedef float scalar_type ;
|
||||
typedef View< scalar_type* , execution_space > vector_type ;
|
||||
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
|
||||
public:
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
{
|
||||
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
|
||||
const scalar_type alpha = 1 , beta = 0 ;
|
||||
|
||||
cusparseStatus_t status =
|
||||
cusparseScsrmv( s.handle ,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE ,
|
||||
nrow , ncol , A.coefficients.dimension_0() ,
|
||||
&alpha ,
|
||||
s.descra ,
|
||||
A.coefficients.ptr_on_device() ,
|
||||
A.graph.row_map.ptr_on_device() ,
|
||||
A.graph.entries.ptr_on_device() ,
|
||||
x.ptr_on_device() ,
|
||||
&beta ,
|
||||
y.ptr_on_device() );
|
||||
|
||||
if ( CUSPARSE_STATUS_SUCCESS != status ) {
|
||||
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef SPARSELINEARSYSTEM_HPP */
|
||||
|
||||
@ -1,276 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef SPARSELINEARSYSTEMFILL_HPP
|
||||
#define SPARSELINEARSYSTEMFILL_HPP
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include <FEMesh.hpp>
|
||||
#include <SparseLinearSystem.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
|
||||
template< class MatrixType , class MeshType ,
|
||||
class elem_matrices_type ,
|
||||
class elem_vectors_type > struct GatherFill ;
|
||||
|
||||
|
||||
template< typename ScalarType ,
|
||||
class DeviceType ,
|
||||
unsigned ElemNode ,
|
||||
typename CoordScalarType ,
|
||||
class elem_matrices_type ,
|
||||
class elem_vectors_type >
|
||||
struct GatherFill<
|
||||
Kokkos::CrsMatrix< ScalarType , DeviceType > ,
|
||||
FEMesh< CoordScalarType , ElemNode , DeviceType > ,
|
||||
elem_matrices_type , elem_vectors_type >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const size_type ElemNodeCount = ElemNode ;
|
||||
|
||||
typedef Kokkos::CrsMatrix< ScalarType , execution_space > matrix_type ;
|
||||
typedef typename matrix_type::coefficients_type coefficients_type ;
|
||||
typedef Kokkos::View< ScalarType[] , execution_space > vector_type ;
|
||||
typedef Kokkos::View< size_type[][ElemNodeCount][ElemNodeCount] , execution_space > elem_graph_type ;
|
||||
|
||||
typedef FEMesh< CoordScalarType , ElemNodeCount , execution_space > mesh_type ;
|
||||
typedef typename mesh_type::node_elem_ids_type node_elem_ids_type ;
|
||||
|
||||
private:
|
||||
|
||||
node_elem_ids_type node_elem_ids ;
|
||||
elem_graph_type elem_graph ;
|
||||
elem_matrices_type elem_matrices ;
|
||||
elem_vectors_type elem_vectors ;
|
||||
coefficients_type system_coeff ;
|
||||
vector_type system_rhs ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( size_type irow ) const
|
||||
{
|
||||
const size_type node_elem_begin = node_elem_ids.row_map[irow];
|
||||
const size_type node_elem_end = node_elem_ids.row_map[irow+1];
|
||||
|
||||
// for each element that a node belongs to
|
||||
|
||||
for ( size_type i = node_elem_begin ; i < node_elem_end ; i++ ) {
|
||||
|
||||
const size_type elem_id = node_elem_ids.entries( i, 0);
|
||||
const size_type row_index = node_elem_ids.entries( i, 1);
|
||||
|
||||
system_rhs(irow) += elem_vectors(elem_id, row_index);
|
||||
|
||||
// for each node in a particular related element
|
||||
// gather the contents of the element stiffness
|
||||
// matrix that belong in irow
|
||||
|
||||
for ( size_type j = 0 ; j < ElemNodeCount ; ++j ){
|
||||
const size_type A_index = elem_graph( elem_id , row_index , j );
|
||||
|
||||
system_coeff( A_index ) += elem_matrices( elem_id, row_index, j );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void apply( const matrix_type & matrix ,
|
||||
const vector_type & rhs ,
|
||||
const mesh_type & mesh ,
|
||||
const elem_graph_type & elem_graph ,
|
||||
const elem_matrices_type & elem_matrices ,
|
||||
const elem_vectors_type & elem_vectors )
|
||||
{
|
||||
const size_t row_count = matrix.graph.row_map.dimension_0() - 1 ;
|
||||
GatherFill op ;
|
||||
op.node_elem_ids = mesh.node_elem_ids ;
|
||||
op.elem_graph = elem_graph ;
|
||||
op.elem_matrices = elem_matrices ;
|
||||
op.elem_vectors = elem_vectors ;
|
||||
op.system_coeff = matrix.coefficients ;
|
||||
op.system_rhs = rhs ;
|
||||
|
||||
parallel_for( row_count , op );
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace HybridFEM */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace HybridFEM {
|
||||
|
||||
template< class GraphType , class MeshType >
|
||||
struct GraphFactory {
|
||||
typedef GraphType graph_type ;
|
||||
typedef MeshType mesh_type ;
|
||||
typedef typename graph_type::execution_space execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
|
||||
static const unsigned ElemNodeCount = mesh_type::element_node_count ;
|
||||
|
||||
typedef Kokkos::View< size_type[][ElemNodeCount][ElemNodeCount] , execution_space > element_map_type ;
|
||||
|
||||
static
|
||||
void
|
||||
create( const mesh_type & mesh ,
|
||||
graph_type & graph ,
|
||||
element_map_type & elem_map )
|
||||
{
|
||||
typename mesh_type::node_elem_ids_type::HostMirror
|
||||
node_elem_ids = create_mirror( mesh.node_elem_ids );
|
||||
|
||||
typename mesh_type::elem_node_ids_type::HostMirror
|
||||
elem_node_ids = create_mirror( mesh.elem_node_ids );
|
||||
|
||||
typedef typename element_map_type::HostMirror element_map_host_type ;
|
||||
|
||||
deep_copy( elem_node_ids , mesh.elem_node_ids );
|
||||
deep_copy( node_elem_ids.entries , mesh.node_elem_ids.entries );
|
||||
|
||||
const size_t owned_node = mesh.parallel_data_map.count_owned ;
|
||||
const size_t total_elem = mesh.elem_node_ids.dimension_0();
|
||||
|
||||
if ( total_elem ) {
|
||||
elem_map = element_map_type( std::string("element_map"), total_elem );
|
||||
}
|
||||
|
||||
element_map_host_type elem_map_host = create_mirror( elem_map );
|
||||
|
||||
//------------------------------------
|
||||
// Node->node mapping for the CrsMatrix graph
|
||||
|
||||
std::vector< std::vector< unsigned > > node_node_ids( owned_node );
|
||||
std::vector< unsigned > node_node_begin( owned_node );
|
||||
|
||||
size_t offset = 0 ;
|
||||
for ( size_t i = 0 ; i < owned_node ; ++i ) {
|
||||
const size_t j_end = node_elem_ids.row_map[i+1];
|
||||
size_t j = node_elem_ids.row_map[i];
|
||||
|
||||
node_node_begin[i] = offset ;
|
||||
|
||||
std::vector< unsigned > & work = node_node_ids[i] ;
|
||||
|
||||
for ( ; j < j_end ; ++j ) {
|
||||
const size_t elem_id = node_elem_ids.entries(j,0);
|
||||
for ( size_t k = 0 ; k < ElemNodeCount ; ++k ) {
|
||||
work.push_back( elem_node_ids( elem_id , k ) );
|
||||
}
|
||||
}
|
||||
|
||||
std::sort( work.begin() , work.end() );
|
||||
|
||||
work.erase( std::unique( work.begin() , work.end() ) , work.end() );
|
||||
|
||||
offset += work.size();
|
||||
}
|
||||
|
||||
graph = Kokkos::create_staticcrsgraph< graph_type >( "node_node_ids" , node_node_ids );
|
||||
|
||||
//------------------------------------
|
||||
// ( element , node_row , node_column ) -> matrix_crs_column
|
||||
|
||||
for ( size_t elem_id = 0 ; elem_id < total_elem ; ++elem_id ) {
|
||||
for ( size_t i = 0 ; i < ElemNodeCount ; ++i ) {
|
||||
|
||||
const size_t node_row = elem_node_ids( elem_id , i );
|
||||
const size_t node_row_begin = node_node_begin[ node_row ];
|
||||
const std::vector< unsigned > & column = node_node_ids[ node_row ] ;
|
||||
|
||||
if ( owned_node <= node_row ) {
|
||||
for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) {
|
||||
elem_map_host( elem_id , i , j ) = std::numeric_limits<size_type>::max();
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) {
|
||||
const size_type node_col = elem_node_ids( elem_id , j );
|
||||
|
||||
int col_search = 0 ;
|
||||
|
||||
for ( int len = column.size() ; 0 < len ; ) {
|
||||
|
||||
const int half = len >> 1;
|
||||
const int middle = col_search + half ;
|
||||
|
||||
if ( column[middle] < node_col ){
|
||||
col_search = middle + 1 ;
|
||||
len -= half + 1 ;
|
||||
}
|
||||
else {
|
||||
len = half ;
|
||||
}
|
||||
}
|
||||
if ( node_col != column[col_search] ) {
|
||||
throw std::runtime_error(std::string("Failed"));
|
||||
}
|
||||
elem_map_host( elem_id , i , j ) = col_search + node_row_begin ;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
deep_copy( elem_map , elem_map_host );
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace HybridFEM
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #ifndef SPARSELINEARSYSTEMFILL_HPP */
|
||||
|
||||
@ -1,164 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef SPARSELINEARSYSTEM_CUDA_HPP
|
||||
#define SPARSELINEARSYSTEM_CUDA_HPP
|
||||
|
||||
#if defined( BUILD_FROM_CU_FILE )
|
||||
|
||||
#include <cusparse_v2.h>
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
|
||||
struct CudaSparseSingleton {
|
||||
cusparseHandle_t handle;
|
||||
cusparseMatDescr_t descra;
|
||||
|
||||
CudaSparseSingleton()
|
||||
{
|
||||
cusparseCreate( & handle );
|
||||
cusparseCreateMatDescr( & descra );
|
||||
cusparseSetMatType( descra , CUSPARSE_MATRIX_TYPE_GENERAL );
|
||||
cusparseSetMatIndexBase( descra , CUSPARSE_INDEX_BASE_ZERO );
|
||||
}
|
||||
|
||||
static CudaSparseSingleton & singleton();
|
||||
|
||||
};
|
||||
|
||||
CudaSparseSingleton & CudaSparseSingleton::singleton()
|
||||
{ static CudaSparseSingleton s ; return s ; }
|
||||
|
||||
|
||||
template<>
|
||||
struct Multiply< CrsMatrix<double,Cuda> ,
|
||||
View<double*,Cuda > ,
|
||||
View<double*,Cuda > >
|
||||
{
|
||||
typedef Cuda execution_space ;
|
||||
typedef execution_space::size_type size_type ;
|
||||
typedef double scalar_type ;
|
||||
typedef View< scalar_type* , execution_space > vector_type ;
|
||||
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
|
||||
public:
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
{
|
||||
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
|
||||
const scalar_type alpha = 1 , beta = 0 ;
|
||||
|
||||
cusparseStatus_t status =
|
||||
cusparseDcsrmv( s.handle ,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE ,
|
||||
nrow , ncol , A.coefficients.dimension_0() ,
|
||||
&alpha ,
|
||||
s.descra ,
|
||||
A.coefficients.ptr_on_device() ,
|
||||
A.graph.row_map.ptr_on_device() ,
|
||||
A.graph.entries.ptr_on_device() ,
|
||||
x.ptr_on_device() ,
|
||||
&beta ,
|
||||
y.ptr_on_device() );
|
||||
|
||||
if ( CUSPARSE_STATUS_SUCCESS != status ) {
|
||||
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct Multiply< CrsMatrix<float,Cuda> ,
|
||||
View<float*,Cuda > ,
|
||||
View<float*,Cuda > >
|
||||
{
|
||||
typedef Cuda execution_space ;
|
||||
typedef execution_space::size_type size_type ;
|
||||
typedef float scalar_type ;
|
||||
typedef View< scalar_type* , execution_space > vector_type ;
|
||||
typedef CrsMatrix< scalar_type , execution_space > matrix_type ;
|
||||
|
||||
public:
|
||||
|
||||
Multiply( const matrix_type & A ,
|
||||
const size_type nrow ,
|
||||
const size_type ncol ,
|
||||
const vector_type & x ,
|
||||
const vector_type & y )
|
||||
{
|
||||
CudaSparseSingleton & s = CudaSparseSingleton::singleton();
|
||||
const scalar_type alpha = 1 , beta = 0 ;
|
||||
|
||||
cusparseStatus_t status =
|
||||
cusparseScsrmv( s.handle ,
|
||||
CUSPARSE_OPERATION_NON_TRANSPOSE ,
|
||||
nrow , ncol , A.coefficients.dimension_0() ,
|
||||
&alpha ,
|
||||
s.descra ,
|
||||
A.coefficients.ptr_on_device() ,
|
||||
A.graph.row_map.ptr_on_device() ,
|
||||
A.graph.entries.ptr_on_device() ,
|
||||
x.ptr_on_device() ,
|
||||
&beta ,
|
||||
y.ptr_on_device() );
|
||||
|
||||
if ( CUSPARSE_STATUS_SUCCESS != status ) {
|
||||
throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
#endif /* #if defined( __CUDACC__ ) */
|
||||
#endif /* #ifndef SPARSELINEARSYSTEM_CUDA_HPP */
|
||||
|
||||
@ -1,242 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#ifndef TESTFEMESHBOXFIXTURE_HPP
|
||||
#define TESTFEMESHBOXFIXTURE_HPP
|
||||
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <BoxMeshFixture.hpp>
|
||||
|
||||
#include <ParallelComm.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace TestFEMesh {
|
||||
|
||||
template< class ViewType >
|
||||
struct VerifyUnpack ;
|
||||
|
||||
template< typename DeviceType, typename T >
|
||||
struct VerifyUnpack< Kokkos::View< T*[3] , DeviceType > >
|
||||
{
|
||||
typedef DeviceType execution_space ;
|
||||
typedef typename execution_space::size_type size_type ;
|
||||
typedef size_type value_type ;
|
||||
|
||||
typedef Kokkos::View< T* , execution_space > buffer_type ;
|
||||
typedef Kokkos::View< T*[3] , execution_space > array_type ;
|
||||
|
||||
private:
|
||||
|
||||
array_type node_coords ;
|
||||
buffer_type buffer ;
|
||||
size_type node_begin ;
|
||||
|
||||
public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void init( value_type & update )
|
||||
{ update = 0 ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void join( volatile value_type & update ,
|
||||
const volatile value_type & source )
|
||||
{ update += source ; }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()( const size_type i , value_type & update ) const
|
||||
{
|
||||
const size_type node_id = i + node_begin ;
|
||||
const size_type k = i * 3 ;
|
||||
|
||||
const long xb = buffer[k];
|
||||
const long yb = buffer[k+1];
|
||||
const long zb = buffer[k+2];
|
||||
const long xn = node_coords(node_id,0);
|
||||
const long yn = node_coords(node_id,1);
|
||||
const long zn = node_coords(node_id,2);
|
||||
|
||||
if ( xb != xn || yb != yn || zb != zn ) {
|
||||
printf("TestFEMesh::VerifyUnpack failed at %d : node %d : { %ld %ld %ld } != { %ld %ld %ld }\n",
|
||||
(int)i,(int)node_id, xb,yb,zb, xn, yn, zn );
|
||||
++update ;
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
size_type unpack( const array_type & arg_node_coords ,
|
||||
const size_type arg_node_begin ,
|
||||
const size_type arg_node_count ,
|
||||
const buffer_type & arg_buffer )
|
||||
{
|
||||
VerifyUnpack op ;
|
||||
op.node_coords = arg_node_coords ;
|
||||
op.buffer = arg_buffer ;
|
||||
op.node_begin = arg_node_begin ;
|
||||
size_type count = 0 ;
|
||||
Kokkos::parallel_reduce( arg_node_count , op , count );
|
||||
return count ;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#ifdef KOKKOS_HAVE_MPI
|
||||
|
||||
namespace TestFEMesh {
|
||||
|
||||
template< typename coordinate_scalar_type ,
|
||||
unsigned ElemNodeCount ,
|
||||
class Device >
|
||||
void verify_parallel(
|
||||
const HybridFEM::FEMesh< coordinate_scalar_type ,
|
||||
ElemNodeCount ,
|
||||
Device > & mesh )
|
||||
{
|
||||
typedef HybridFEM::FEMesh< coordinate_scalar_type, ElemNodeCount, Device > femesh_type ;
|
||||
typedef typename femesh_type::node_coords_type node_coords_type ;
|
||||
|
||||
comm::Machine machine = mesh.parallel_data_map.machine ;
|
||||
|
||||
// Communicate node coordinates to verify communication and setup.
|
||||
|
||||
const size_t chunk_size = 3 ;
|
||||
|
||||
Kokkos::AsyncExchange< coordinate_scalar_type, Device, Kokkos::ParallelDataMap >
|
||||
exchange( mesh.parallel_data_map , chunk_size );
|
||||
|
||||
const size_t send_begin = mesh.parallel_data_map.count_interior ;
|
||||
const size_t send_count = mesh.parallel_data_map.count_send ;
|
||||
|
||||
const size_t recv_begin = mesh.parallel_data_map.count_owned ;
|
||||
const size_t recv_count = mesh.parallel_data_map.count_receive ;
|
||||
|
||||
typedef Kokkos::PackArray< node_coords_type > pack_type ;
|
||||
|
||||
pack_type::pack( exchange.buffer(), send_begin, send_count, mesh.node_coords );
|
||||
|
||||
exchange.setup();
|
||||
|
||||
// Launch local-action device kernels
|
||||
|
||||
exchange.send_receive();
|
||||
|
||||
unsigned long local[3] ;
|
||||
local[0] = mesh.parallel_data_map.count_owned ;
|
||||
local[1] = mesh.parallel_data_map.count_receive ;
|
||||
local[2] = TestFEMesh::VerifyUnpack< node_coords_type >::unpack( mesh.node_coords, recv_begin, recv_count, exchange.buffer() );
|
||||
|
||||
unsigned long global[3] = { 0 , 0 , 0 };
|
||||
|
||||
MPI_Allreduce( local , global ,
|
||||
3 , MPI_UNSIGNED_LONG , MPI_SUM , machine.mpi_comm );
|
||||
|
||||
if ( 0 == comm::rank( machine ) ) {
|
||||
std::cout << ( global[2] ? "FAILED" : "PASSED" )
|
||||
<< ": TestFEMesh::verify_parallel "
|
||||
<< "NP(" << comm::size( machine )
|
||||
<< ") total_node(" << global[0]
|
||||
<< ") verified_nodes(" << global[1]
|
||||
<< ") failed_nodes(" << global[2]
|
||||
<< ")" << std::endl ;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace TestFEMesh
|
||||
|
||||
#else /* ! #ifdef KOKKOS_HAVE_MPI */
|
||||
|
||||
namespace TestFEMesh {
|
||||
|
||||
template< typename coordinate_scalar_type ,
|
||||
unsigned ElemNodeCount ,
|
||||
class Device >
|
||||
void verify_parallel(
|
||||
const HybridFEM::FEMesh< coordinate_scalar_type ,
|
||||
ElemNodeCount ,
|
||||
Device > & )
|
||||
{}
|
||||
|
||||
} // namespace TestFEMesh
|
||||
|
||||
#endif /* ! #ifdef KOKKOS_HAVE_MPI */
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template< class Device >
|
||||
void test_box_fixture( comm::Machine machine ,
|
||||
const size_t gang_count ,
|
||||
const size_t nodes_nx ,
|
||||
const size_t nodes_ny ,
|
||||
const size_t nodes_nz )
|
||||
{
|
||||
typedef long coordinate_scalar_type ;
|
||||
typedef FixtureElementHex8 fixture_element_type ;
|
||||
|
||||
typedef BoxMeshFixture< coordinate_scalar_type ,
|
||||
Device ,
|
||||
fixture_element_type > fixture_type ;
|
||||
|
||||
typedef typename fixture_type::FEMeshType mesh_type ;
|
||||
|
||||
const size_t proc_count = comm::size( machine );
|
||||
const size_t proc_local = comm::rank( machine ) ;
|
||||
|
||||
mesh_type mesh =
|
||||
fixture_type::create( proc_count , proc_local , gang_count ,
|
||||
nodes_nx - 1 , nodes_ny - 1 , nodes_nz - 1 );
|
||||
|
||||
mesh.parallel_data_map.machine = machine ;
|
||||
|
||||
TestFEMesh::verify_parallel( mesh );
|
||||
}
|
||||
|
||||
#endif /* #ifndef TESTFEMESHBOXFIXTURE_HPP */
|
||||
|
||||
|
||||
@ -1,172 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
#include <BoxMeshPartition.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_box_partition( bool print )
|
||||
{
|
||||
const size_t np_max = 10000 ;
|
||||
|
||||
const BoxBoundsLinear use_box ;
|
||||
|
||||
BoxType root_box ;
|
||||
|
||||
root_box[0][0] = 0 ; root_box[0][1] = 100 ;
|
||||
root_box[1][0] = 0 ; root_box[1][1] = 200 ;
|
||||
root_box[2][0] = 0 ; root_box[2][1] = 300 ;
|
||||
|
||||
const size_t cell_total =
|
||||
( root_box[0][1] - root_box[0][0] ) *
|
||||
( root_box[1][1] - root_box[1][0] ) *
|
||||
( root_box[2][1] - root_box[2][0] );
|
||||
|
||||
for ( size_t np = 2 ; np < np_max ; np = 2 * ( np + 1 ) ) {
|
||||
|
||||
std::vector<BoxType> part_boxes( np );
|
||||
|
||||
box_partition_rcb( root_box , part_boxes );
|
||||
|
||||
size_t cell_goal = ( cell_total + np - 1 ) / np ;
|
||||
size_t cell_max = 0 ;
|
||||
|
||||
for ( size_t i = 0 ; i < np ; ++i ) {
|
||||
cell_max = std::max( cell_max , count( part_boxes[i] ) );
|
||||
}
|
||||
|
||||
if ( print ) {
|
||||
std::cout << std::endl
|
||||
<< "box_part( " << np
|
||||
<< " ) max( " << cell_max
|
||||
<< " ) goal( " << cell_goal
|
||||
<< " ) ratio( " << double(cell_max) / double(cell_goal)
|
||||
<< " )" << std::endl ;
|
||||
}
|
||||
|
||||
const size_t nsample = std::min(np,(size_t)4);
|
||||
const size_t stride = ( np + nsample - 1 ) / nsample ;
|
||||
|
||||
for ( size_t my_part = 0 ; my_part < np ; my_part += stride ) {
|
||||
BoxType my_use_box ;
|
||||
std::vector<size_t> my_use_id_map ;
|
||||
size_t my_count_interior ;
|
||||
size_t my_count_owned ;
|
||||
size_t my_count_uses ;
|
||||
std::vector<size_t> my_recv_counts ;
|
||||
std::vector<std::vector<size_t> > my_send_map ;
|
||||
|
||||
size_t count_verify = 0 ;
|
||||
|
||||
box_partition_maps( root_box , part_boxes ,
|
||||
use_box , my_part ,
|
||||
my_use_box , my_use_id_map ,
|
||||
my_count_interior ,
|
||||
my_count_owned ,
|
||||
my_count_uses ,
|
||||
my_recv_counts ,
|
||||
my_send_map );
|
||||
|
||||
count_verify = my_count_owned ;
|
||||
|
||||
if ( print ) {
|
||||
std::cout << " my_part(" << my_part << ") layout { "
|
||||
<< "P" << my_part
|
||||
<< "(" << my_count_interior
|
||||
<< "," << ( my_count_owned - my_count_interior )
|
||||
<< ")" ;
|
||||
}
|
||||
|
||||
for ( size_t i = 1 ; i < np ; ++i ) {
|
||||
if ( my_recv_counts[i] ) {
|
||||
count_verify += my_recv_counts[i] ;
|
||||
const size_t ip = ( my_part + i ) % np ;
|
||||
|
||||
if ( print ) {
|
||||
std::cout << " P" << ip << "(" << my_recv_counts[i] << ")" ;
|
||||
}
|
||||
|
||||
// Compare recv & send lists
|
||||
|
||||
BoxType ip_use_box ;
|
||||
std::vector<size_t> ip_use_id_map ;
|
||||
size_t ip_count_interior ;
|
||||
size_t ip_count_owned ;
|
||||
size_t ip_count_uses ;
|
||||
std::vector<size_t> ip_recv_counts ;
|
||||
std::vector<std::vector<size_t> > ip_send_map ;
|
||||
|
||||
box_partition_maps( root_box , part_boxes ,
|
||||
use_box , ip ,
|
||||
ip_use_box , ip_use_id_map ,
|
||||
ip_count_interior ,
|
||||
ip_count_owned ,
|
||||
ip_count_uses ,
|
||||
ip_recv_counts ,
|
||||
ip_send_map );
|
||||
|
||||
// Sent by ip, received by my_part:
|
||||
|
||||
const BoxType recv_send = intersect( part_boxes[ip] , my_use_box );
|
||||
const size_t recv_send_count = count( recv_send );
|
||||
|
||||
const size_t j = ( my_part + np - ip ) % np ;
|
||||
|
||||
if ( recv_send_count != my_recv_counts[i] ||
|
||||
recv_send_count != ip_send_map[j].size() ) {
|
||||
throw std::runtime_error( std::string("bad recv/send map") );
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( print ) { std::cout << " }" << std::endl ; }
|
||||
|
||||
if ( count_verify != my_count_uses ) {
|
||||
throw std::runtime_error( std::string("bad partition map") );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,192 +0,0 @@
|
||||
/*
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <TestBoxMeshFixture.hpp>
|
||||
#include <Implicit.hpp>
|
||||
#include <Nonlinear.hpp>
|
||||
#include <Explicit.hpp>
|
||||
|
||||
#include <SparseLinearSystem.hpp>
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Impl {
|
||||
|
||||
CudaSparseSingleton & CudaSparseSingleton::singleton()
|
||||
{ static CudaSparseSingleton s ; return s ; }
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_query( comm::Machine machine )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
std::cout << "P" << comm_rank
|
||||
<< ": Cuda device_count = "
|
||||
<< Kokkos::Cuda::detect_device_count()
|
||||
<< std::endl ;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_fixture( comm::Machine machine ,
|
||||
size_t nx , size_t ny , size_t nz )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
test_box_fixture<Kokkos::Cuda>( machine , gang_count , nx , ny , nz );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_implicit( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
HybridFEM::Implicit::driver<double,Kokkos::Cuda>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_explicit( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
Explicit::driver<double,Kokkos::Cuda>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_nonlinear( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
|
||||
typedef Kokkos::Cuda device ;
|
||||
typedef FixtureElementHex8 hex8 ;
|
||||
HybridFEM::Nonlinear::driver<double,device,hex8>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
void test_cuda_nonlinear_quadratic( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
const size_t comm_rank = comm::rank( machine );
|
||||
const size_t comm_size = comm::size( machine );
|
||||
const size_t dev_count = Kokkos::Cuda::detect_device_count();
|
||||
const size_t dev_rank =
|
||||
dev_count && dev_count <= comm_size ? comm_rank % dev_count : 0 ;
|
||||
const size_t gang_count = 0 ;
|
||||
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::SelectDevice select_device( dev_rank );
|
||||
Kokkos::Cuda::initialize( select_device );
|
||||
|
||||
typedef Kokkos::Cuda device ;
|
||||
typedef FixtureElementHex27 hex27 ;
|
||||
HybridFEM::Nonlinear::driver<double,device,hex27>( "Cuda" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
|
||||
|
||||
@ -1,137 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
// Must be included first on Intel-Phi systems due to
|
||||
// redefinition of SEEK_SET in <mpi.h>.
|
||||
|
||||
#include <ParallelComm.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
#include <BoxMeshFixture.hpp>
|
||||
#include <TestBoxMeshFixture.hpp>
|
||||
#include <Implicit.hpp>
|
||||
#include <Nonlinear.hpp>
|
||||
#include <Explicit.hpp>
|
||||
#include <SparseLinearSystem.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_fixture( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t nx , size_t ny , size_t nz )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
test_box_fixture<Kokkos::HostSpace::execution_space>( machine , gang_count , nx , ny , nz );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_implicit( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
HybridFEM::Implicit::driver<double,Kokkos::HostSpace::execution_space>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_explicit( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
Explicit::driver<double,Kokkos::HostSpace::execution_space>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_nonlinear( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
typedef FixtureElementHex8 hex8 ;
|
||||
typedef Kokkos::HostSpace::execution_space device ;
|
||||
HybridFEM::Nonlinear::driver<double,device,hex8>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
void test_host_nonlinear_quadratic( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run )
|
||||
{
|
||||
Kokkos::HostSpace::execution_space::initialize( gang_count * gang_worker_count );
|
||||
typedef FixtureElementHex27 hex27 ;
|
||||
typedef Kokkos::HostSpace::execution_space device ;
|
||||
HybridFEM::Nonlinear::driver<double,device,hex27>( "Threads" , machine , gang_count , elem_count_begin , elem_count_end , count_run );
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@ -1,348 +0,0 @@
|
||||
//@HEADER
|
||||
// ************************************************************************
|
||||
//
|
||||
// Kokkos v. 2.0
|
||||
// Copyright (2014) Sandia Corporation
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the Corporation nor the names of the
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
||||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
// Must be included first on Intel-Phi systems due to
|
||||
// redefinition of SEEK_SET in <mpi.h>.
|
||||
|
||||
#include <ParallelComm.hpp>
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <Kokkos_hwloc.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_box_partition( bool print );
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_host_fixture( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t nx , size_t ny , size_t nz );
|
||||
|
||||
void test_host_implicit( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_host_explicit( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_host_nonlinear( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_host_nonlinear_quadratic( comm::Machine machine ,
|
||||
size_t gang_count ,
|
||||
size_t gang_worker_count ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
void test_cuda_query( comm::Machine );
|
||||
|
||||
void test_cuda_fixture( comm::Machine machine ,
|
||||
size_t nx , size_t ny , size_t nz );
|
||||
|
||||
void test_cuda_implicit( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_cuda_explicit( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_cuda_nonlinear( comm:: Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
void test_cuda_nonlinear_quadratic( comm::Machine machine ,
|
||||
size_t elem_count_begin ,
|
||||
size_t elem_count_end ,
|
||||
size_t count_run );
|
||||
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
bool run_host( std::istream & input ,
|
||||
comm::Machine machine ,
|
||||
const size_t host_gang_count ,
|
||||
const size_t host_gang_worker_count )
|
||||
{
|
||||
bool cmd_error = false ;
|
||||
|
||||
std::string which ; input >> which ;
|
||||
|
||||
if ( which == std::string("fixture") ) {
|
||||
|
||||
size_t nx = 0 , ny = 0 , nz = 0 ;
|
||||
input >> nx >> ny >> nz ;
|
||||
test_host_fixture( machine , host_gang_count , host_gang_worker_count , nx , ny , nz );
|
||||
|
||||
}
|
||||
else if ( which == std::string("explicit") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_host_explicit( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("implicit") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_host_implicit( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("nonlinear") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_host_nonlinear( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("nonlinear_quadratic") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_host_nonlinear_quadratic( machine , host_gang_count , host_gang_worker_count , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else {
|
||||
cmd_error = true ;
|
||||
}
|
||||
|
||||
return cmd_error ;
|
||||
}
|
||||
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
bool run_cuda( std::istream & input , comm::Machine machine )
|
||||
{
|
||||
bool cmd_error = false ;
|
||||
|
||||
std::string which ; input >> which ;
|
||||
|
||||
if ( which == std::string("fixture") ) {
|
||||
|
||||
size_t nx = 0 , ny = 0 , nz = 0 ;
|
||||
input >> nx >> ny >> nz ;
|
||||
test_cuda_fixture( machine , nx , ny , nz );
|
||||
|
||||
}
|
||||
else if ( which == std::string("explicit") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_cuda_explicit( machine , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("implicit") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_cuda_implicit( machine , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("nonlinear") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_cuda_nonlinear( machine , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else if ( which == std::string("nonlinear_quadratic") ) {
|
||||
|
||||
size_t mesh_node_begin = 100 ;
|
||||
size_t mesh_node_end = 300 ;
|
||||
size_t run = 1 ;
|
||||
input >> mesh_node_begin >> mesh_node_end >> run ;
|
||||
test_cuda_nonlinear_quadratic( machine , mesh_node_begin , mesh_node_end , run );
|
||||
|
||||
}
|
||||
else {
|
||||
cmd_error = true ;
|
||||
}
|
||||
|
||||
return cmd_error ;
|
||||
}
|
||||
#endif
|
||||
|
||||
void run( const std::string & argline , comm::Machine machine )
|
||||
{
|
||||
const unsigned numa_count = Kokkos::hwloc::get_available_numa_count();
|
||||
const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa();
|
||||
const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core();
|
||||
|
||||
std::istringstream input( argline );
|
||||
|
||||
bool cmd_error = false ;
|
||||
|
||||
std::string which ; input >> which ;
|
||||
|
||||
if ( which == std::string("query") ) {
|
||||
std::cout << "P" << comm::rank( machine )
|
||||
<< ": hwloc { NUMA[" << numa_count << "]"
|
||||
<< " CORE[" << cores_per_numa << "]"
|
||||
<< " PU[" << threads_per_core << "] }"
|
||||
<< std::endl ;
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
test_cuda_query( machine );
|
||||
#endif
|
||||
}
|
||||
else if ( which == std::string("partition") ) {
|
||||
if ( 0 == comm::rank( machine ) ) {
|
||||
test_box_partition( false /* print flag */ );
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ( which == std::string("host") ) {
|
||||
size_t host_gang_count = 0 ;
|
||||
size_t host_gang_worker_count = 1 ;
|
||||
|
||||
input >> host_gang_count ;
|
||||
input >> host_gang_worker_count ;
|
||||
|
||||
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
|
||||
}
|
||||
else if ( which == std::string("host-all") ) {
|
||||
size_t host_gang_count = numa_count ;
|
||||
size_t host_gang_worker_count = cores_per_numa * threads_per_core ;
|
||||
|
||||
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
|
||||
}
|
||||
else if ( which == std::string("host-most") ) {
|
||||
size_t host_gang_count = numa_count ;
|
||||
size_t host_gang_worker_count = ( cores_per_numa - 1 ) * threads_per_core ;
|
||||
|
||||
cmd_error = run_host( input , machine , host_gang_count , host_gang_worker_count );
|
||||
}
|
||||
#if defined( KOKKOS_HAVE_CUDA )
|
||||
else if ( which == std::string("cuda") ) {
|
||||
cmd_error = run_cuda( input , machine );
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
cmd_error = true ;
|
||||
}
|
||||
}
|
||||
|
||||
if ( cmd_error && 0 == comm::rank( machine ) ) {
|
||||
std::cout << "Expecting command line with" << std::endl
|
||||
<< " query" << std::endl
|
||||
<< " partition" << std::endl
|
||||
<< " host NumNumaNode NumThreadPerNode <test>" << std::endl
|
||||
<< " host-all <test>" << std::endl
|
||||
<< " host-most <test>" << std::endl
|
||||
<< " cuda <test>" << std::endl
|
||||
<< "where <test> is" << std::endl
|
||||
<< " fixture NumElemX NumElemY NumElemZ" << std::endl
|
||||
<< " implicit NumElemBegin NumElemEnd NumRun" << std::endl
|
||||
<< " explicit NumElemBegin NumElemEnd NumRun" << std::endl
|
||||
<< " nonlinear NumElemBegin NumElemEnd NumRun" << std::endl
|
||||
<< " nonlinear_quadratic NumElemBegin NumElemEnd NumRun" << std::endl ;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
int main( int argc , char ** argv )
|
||||
{
|
||||
comm::Machine machine = comm::Machine::init( & argc , & argv );
|
||||
|
||||
const unsigned comm_rank = comm::rank( machine );
|
||||
|
||||
const std::string argline = comm::command_line( machine , argc , argv );
|
||||
|
||||
try {
|
||||
run( argline , machine );
|
||||
}
|
||||
catch( const std::exception & x ) {
|
||||
std::cerr << "P" << comm_rank << " throw: " << x.what() << std::endl ;
|
||||
}
|
||||
catch( ... ) {
|
||||
std::cerr << "P" << comm_rank << " throw: unknown exception" << std::endl ;
|
||||
}
|
||||
|
||||
comm::Machine::finalize();
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user