Updating kokkos lib

git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14918 f3b2605a-c512-4ea7-a41b-209d697bcdaa
2016-05-02 22:06:50 +00:00
parent c5d0c55bee
commit 0a1b765248
411 changed files with 0 additions and 133424 deletions
--- a/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp
+++ b/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp
@ -1,400 +0,0 @@
-/*
-//@HEADER
-// ************************************************************************
-// 
-//                        Kokkos v. 2.0
-//              Copyright (2014) Sandia Corporation
-// 
-// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
-// the U.S. Government retains certain rights in this software.
-// 
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// 3. Neither the name of the Corporation nor the names of the
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
-// ************************************************************************
-//@HEADER
-*/
-
-#ifndef SPARSELINEARSYSTEM_HPP
-#define SPARSELINEARSYSTEM_HPP
-
-#include <cmath>
-#include <impl/Kokkos_Timer.hpp>
-
-#include <Kokkos_Core.hpp>
-#include <Kokkos_StaticCrsGraph.hpp>
-
-#include <LinAlgBLAS.hpp>
-
-namespace Kokkos {
-
-template< typename ScalarType , class Device >
-struct CrsMatrix {
-  typedef Device      execution_space ;
-  typedef ScalarType  value_type ;
-
-  typedef StaticCrsGraph< int , execution_space , void , int >  graph_type ;
-  typedef View< value_type* , execution_space >   coefficients_type ;
-
-  graph_type         graph ;
-  coefficients_type  coefficients ;
-};
-
-//----------------------------------------------------------------------------
-
-namespace Impl {
-
-template< class Matrix , class OutputVector , class InputVector >
-struct Multiply ;
-
-}
-}
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-namespace Impl {
-
-template< typename AScalarType ,
-          typename VScalarType ,
-          class DeviceType >
-struct Multiply< CrsMatrix<AScalarType,DeviceType> ,
-                 View<VScalarType*,DeviceType > ,
-                 View<VScalarType*,DeviceType > >
-{
-  typedef DeviceType                       execution_space ;
-  typedef typename execution_space::size_type  size_type ;
-
-  typedef View<       VScalarType*, execution_space, MemoryUnmanaged >  vector_type ;
-  typedef View< const VScalarType*, execution_space, MemoryUnmanaged >  vector_const_type ;
-
-  typedef CrsMatrix< AScalarType , execution_space >    matrix_type ;
-
-private:
-
-  matrix_type        m_A ;
-  vector_const_type  m_x ;
-  vector_type        m_y ;
-
-public:
-
-  //--------------------------------------------------------------------------
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()( const size_type iRow ) const
-  {
-    const size_type iEntryBegin = m_A.graph.row_map[iRow];
-    const size_type iEntryEnd   = m_A.graph.row_map[iRow+1];
-
-    double sum = 0 ;
-
-#if defined( __INTEL_COMPILER )
-#pragma simd reduction(+:sum)
-#pragma ivdep
-    for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
-      sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
-    }
-#else
-    for ( size_type iEntry = iEntryBegin ; iEntry < iEntryEnd ; ++iEntry ) {
-      sum += m_A.coefficients(iEntry) * m_x( m_A.graph.entries(iEntry) );
-    }
-#endif
-
-    m_y(iRow) = sum ;
-  }
-
-  Multiply( const matrix_type & A ,
-            const size_type nrow ,
-            const size_type , // ncol ,
-            const vector_type & x ,
-            const vector_type & y )
-    : m_A( A ), m_x( x ), m_y( y )
-  {
-    parallel_for( nrow , *this );
-  }
-};
-
-//----------------------------------------------------------------------------
-
-} // namespace Impl
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-namespace Kokkos {
-
-//----------------------------------------------------------------------------
-
-template< typename AScalarType ,
-          typename VScalarType ,
-          class Device >
-class Operator {
-  typedef CrsMatrix<AScalarType,Device>  matrix_type ;
-  typedef View<VScalarType*,Device>     vector_type ;
-
-private:
-  const CrsMatrix<AScalarType,Device> A ;
-
-  ParallelDataMap                                         data_map ;
-  AsyncExchange< VScalarType , Device , ParallelDataMap > exchange ;
-
-public:
-
-  Operator( const ParallelDataMap                  & arg_data_map ,
-            const CrsMatrix<AScalarType,Device>    & arg_A )
-    : A( arg_A )
-    , data_map( arg_data_map )
-    , exchange( arg_data_map , 1 )
-    {}
-
-  void apply( const View<VScalarType*,Device>  & x ,
-              const View<VScalarType*,Device>  & y )
-  {
-    // Gather off-processor data for 'x'
-
-    PackArray< vector_type >::pack( exchange.buffer() ,
-                                    data_map.count_interior ,
-                                    data_map.count_send , x );
-
-    exchange.setup();
-
-    // If interior & boundary matrices then could launch interior multiply
-
-    exchange.send_receive();
-
-    UnpackArray< vector_type >::unpack( x , exchange.buffer() ,
-                                        data_map.count_owned ,
-                                        data_map.count_receive );
-
-    const typename Device::size_type nrow = data_map.count_owned ;
-    const typename Device::size_type ncol = data_map.count_owned +
-                                            data_map.count_receive ;
-
-    Impl::Multiply<matrix_type,vector_type,vector_type>( A, nrow, ncol, x, y);
-  }
-};
-
-//----------------------------------------------------------------------------
-
-template< typename AScalarType , typename VScalarType , class Device >
-void cgsolve(
-  const ParallelDataMap                 data_map ,
-  const CrsMatrix<AScalarType,Device>   A ,
-  const View<VScalarType*,Device> b ,
-  const View<VScalarType*,Device> x ,
-  size_t & iteration ,
-  double & normr ,
-  double & iter_time ,
-  const size_t maximum_iteration = 200 ,
-  const double tolerance = std::numeric_limits<VScalarType>::epsilon() )
-{
-  typedef View<VScalarType*,Device> vector_type ;
-  //typedef View<VScalarType,  Device> value_type ; // unused
-
-  const size_t count_owned = data_map.count_owned ;
-  const size_t count_total = data_map.count_owned + data_map.count_receive ;
-
-  Operator<AScalarType,VScalarType,Device> matrix_operator( data_map , A );
-
-  // Need input vector to matvec to be owned + received
-  vector_type pAll ( "cg::p" , count_total );
-
-  vector_type p = Kokkos::subview( pAll , std::pair<size_t,size_t>(0,count_owned) );
-  vector_type r ( "cg::r" , count_owned );
-  vector_type Ap( "cg::Ap", count_owned );
-
-  /* r = b - A * x ; */
-
-  /* p  = x      */ deep_copy( p , x );
-  /* Ap = A * p  */ matrix_operator.apply( pAll , Ap );
-  /* r  = b - Ap */ waxpby( count_owned , 1.0 , b , -1.0 , Ap , r );
-  /* p  = r      */ deep_copy( p , r );
-
-  double old_rdot = dot( count_owned , r , data_map.machine );
-
-  normr     = sqrt( old_rdot );
-  iteration = 0 ;
-
-  Kokkos::Impl::Timer wall_clock ;
-
-  while ( tolerance < normr && iteration < maximum_iteration ) {
-
-    /* pAp_dot = dot( p , Ap = A * p ) */
-
-    /* Ap = A * p  */ matrix_operator.apply( pAll , Ap );
-
-    const double pAp_dot = dot( count_owned , p , Ap , data_map.machine );
-    const double alpha   = old_rdot / pAp_dot ;
-
-    /* x += alpha * p ;  */ axpy( count_owned,  alpha, p , x );
-    /* r -= alpha * Ap ; */ axpy( count_owned, -alpha, Ap, r );
-
-    const double r_dot = dot( count_owned , r , data_map.machine );
-    const double beta  = r_dot / old_rdot ;
-
-    /* p = r + beta * p ; */ xpby( count_owned , r , beta , p );
-
-    normr = sqrt( old_rdot = r_dot );
-    ++iteration ;
-  }
-
-  iter_time = wall_clock.seconds();
-}
-
-//----------------------------------------------------------------------------
-
-} // namespace Kokkos
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#if defined( KOKKOS_HAVE_CUDA )
-
-#if ( CUDA_VERSION < 6000 )
-#pragma message "cusparse_v2.h"
-#include <cusparse_v2.h>
-#else
-#pragma message "cusparse.h"
-#include <cusparse.h>
-#endif
-
-namespace Kokkos {
-namespace Impl {
-
-struct CudaSparseSingleton {
-  cusparseHandle_t   handle;
-  cusparseMatDescr_t descra;
-
-  CudaSparseSingleton()
-  {
-    cusparseCreate( & handle );
-    cusparseCreateMatDescr( & descra );
-    cusparseSetMatType(       descra , CUSPARSE_MATRIX_TYPE_GENERAL );
-    cusparseSetMatIndexBase(  descra , CUSPARSE_INDEX_BASE_ZERO );
-  }
-
-  static CudaSparseSingleton & singleton();
-
-};
-
-template<>
-struct Multiply< CrsMatrix<double,Cuda> ,
-                 View<double*,Cuda > ,
-                 View<double*,Cuda > >
-{
-  typedef Cuda                                      execution_space ;
-  typedef execution_space::size_type                    size_type ;
-  typedef double                                    scalar_type ;
-  typedef View< scalar_type* , execution_space >        vector_type ;
-  typedef CrsMatrix< scalar_type , execution_space >    matrix_type ;
-
-public:
-
-  Multiply( const matrix_type & A ,
-            const size_type nrow ,
-            const size_type ncol ,
-            const vector_type & x ,
-            const vector_type & y )
-  {
-    CudaSparseSingleton & s = CudaSparseSingleton::singleton();
-    const scalar_type alpha = 1 , beta = 0 ;
-
-    cusparseStatus_t status =
-      cusparseDcsrmv( s.handle ,
-                      CUSPARSE_OPERATION_NON_TRANSPOSE ,
-                      nrow , ncol , A.coefficients.dimension_0() ,
-                      &alpha ,
-                      s.descra ,
-                      A.coefficients.ptr_on_device() ,
-                      A.graph.row_map.ptr_on_device() ,
-                      A.graph.entries.ptr_on_device() ,
-                      x.ptr_on_device() ,
-                      &beta ,
-                      y.ptr_on_device() );
-
-    if ( CUSPARSE_STATUS_SUCCESS != status ) {
-      throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
-    }
-  }
-};
-
-
-template<>
-struct Multiply< CrsMatrix<float,Cuda> ,
-                 View<float*,Cuda > ,
-                 View<float*,Cuda > >
-{
-  typedef Cuda                                      execution_space ;
-  typedef execution_space::size_type                    size_type ;
-  typedef float                                     scalar_type ;
-  typedef View< scalar_type* , execution_space >        vector_type ;
-  typedef CrsMatrix< scalar_type , execution_space >    matrix_type ;
-
-public:
-
-  Multiply( const matrix_type & A ,
-            const size_type nrow ,
-            const size_type ncol ,
-            const vector_type & x ,
-            const vector_type & y )
-  {
-    CudaSparseSingleton & s = CudaSparseSingleton::singleton();
-    const scalar_type alpha = 1 , beta = 0 ;
-
-    cusparseStatus_t status =
-      cusparseScsrmv( s.handle ,
-                      CUSPARSE_OPERATION_NON_TRANSPOSE ,
-                      nrow , ncol , A.coefficients.dimension_0() ,
-                      &alpha ,
-                      s.descra ,
-                      A.coefficients.ptr_on_device() ,
-                      A.graph.row_map.ptr_on_device() ,
-                      A.graph.entries.ptr_on_device() ,
-                      x.ptr_on_device() ,
-                      &beta ,
-                      y.ptr_on_device() );
-
-    if ( CUSPARSE_STATUS_SUCCESS != status ) {
-      throw std::runtime_error( std::string("ERROR - cusparseDcsrmv " ) );
-    }
-  }
-};
-
-} /* namespace Impl */
-} /* namespace Kokkos */
-
-#endif /* #if defined( KOKKOS_HAVE_CUDA ) */
-
-//----------------------------------------------------------------------------
-//----------------------------------------------------------------------------
-
-#endif /* #ifndef SPARSELINEARSYSTEM_HPP */
-