/* //@HEADER // ************************************************************************ // // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation // // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the Corporation nor the names of the // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) // // ************************************************************************ //@HEADER */ #ifndef KOKKOS_CUDA_HPP #define KOKKOS_CUDA_HPP #include #if defined( KOKKOS_ENABLE_CUDA ) #include #include #include #include #include #include #include #include #include #include #include /*--------------------------------------------------------------------------*/ namespace Kokkos { namespace Impl { class CudaExec ; class CudaInternal ; } // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ namespace Kokkos { namespace Impl { namespace Experimental { enum class CudaLaunchMechanism:unsigned{Default=0,ConstantMemory=1,GlobalMemory=2,LocalMemory=4}; constexpr inline CudaLaunchMechanism operator | (CudaLaunchMechanism p1, CudaLaunchMechanism p2) { return static_cast(static_cast(p1) | static_cast(p2)); } constexpr inline CudaLaunchMechanism operator & (CudaLaunchMechanism p1, CudaLaunchMechanism p2) { return static_cast(static_cast(p1) & static_cast(p2)); } template struct CudaDispatchProperties { CudaLaunchMechanism launch_mechanism = l; }; } } /// \class Cuda /// \brief Kokkos Execution Space that uses CUDA to run on GPUs. /// /// An "execution space" represents a parallel execution model. It tells Kokkos /// how to parallelize the execution of kernels in a parallel_for or /// parallel_reduce. For example, the Threads execution space uses Pthreads or /// C++11 threads on a CPU, the OpenMP execution space uses the OpenMP language /// extensions, and the Serial execution space executes "parallel" kernels /// sequentially. The Cuda execution space uses NVIDIA's CUDA programming /// model to execute kernels in parallel on GPUs. class Cuda { public: //! \name Type declarations that all Kokkos execution spaces must provide. //@{ //! Tag this class as a kokkos execution space typedef Cuda execution_space ; #if defined( KOKKOS_ENABLE_CUDA_UVM ) //! This execution space's preferred memory space. typedef CudaUVMSpace memory_space ; #else //! This execution space's preferred memory space. typedef CudaSpace memory_space ; #endif //! This execution space preferred device_type typedef Kokkos::Device device_type; //! The size_type best suited for this execution space. typedef memory_space::size_type size_type ; //! This execution space's preferred array layout. typedef LayoutLeft array_layout ; //! typedef ScratchMemorySpace< Cuda > scratch_memory_space ; //@} //-------------------------------------------------- //! \name Functions that all Kokkos devices must implement. //@{ /// \brief True if and only if this method is being called in a /// thread-parallel function. KOKKOS_INLINE_FUNCTION static int in_parallel() { #if defined( __CUDA_ARCH__ ) return true; #else return false; #endif } /** \brief Set the device in a "sleep" state. * * This function sets the device in a "sleep" state in which it is * not ready for work. This may consume less resources than if the * device were in an "awake" state, but it may also take time to * bring the device from a sleep state to be ready for work. * * \return True if the device is in the "sleep" state, else false if * the device is actively working and could not enter the "sleep" * state. */ static bool sleep(); /// \brief Wake the device from the 'sleep' state so it is ready for work. /// /// \return True if the device is in the "ready" state, else "false" /// if the device is actively working (which also means that it's /// awake). static bool wake(); /// \brief Wait until all dispatched functors complete. /// /// The parallel_for or parallel_reduce dispatch of a functor may /// return asynchronously, before the functor completes. This /// method does not return until all dispatched functors on this /// device have completed. static void impl_static_fence(); #ifdef KOKKOS_ENABLE_DEPRECATED_CODE static void fence(); #else void fence() const; #endif /** \brief Return the maximum amount of concurrency. */ static int concurrency(); //! Print configuration information to the given output stream. static void print_configuration( std::ostream & , const bool detail = false ); //@} //-------------------------------------------------- //! \name Cuda space instances KOKKOS_INLINE_FUNCTION ~Cuda() {} Cuda(); Cuda( Cuda && ) = default ; Cuda( const Cuda & ) = default ; Cuda & operator = ( Cuda && ) = default ; Cuda & operator = ( const Cuda & ) = default ; Cuda(cudaStream_t stream); //-------------------------------------------------------------------------- //! \name Device-specific functions //@{ struct SelectDevice { int cuda_device_id ; SelectDevice() : cuda_device_id(0) {} explicit SelectDevice( int id ) : cuda_device_id( id ) {} }; #ifdef KOKKOS_ENABLE_DEPRECATED_CODE //! Free any resources being consumed by the device. static void finalize(); //! Has been initialized static int is_initialized(); //! Initialize, telling the CUDA run-time library which device to use. static void initialize( const SelectDevice = SelectDevice() , const size_t num_instances = 1 ); #else //! Free any resources being consumed by the device. static void impl_finalize(); //! Has been initialized static int impl_is_initialized(); //! Initialize, telling the CUDA run-time library which device to use. static void impl_initialize( const SelectDevice = SelectDevice() , const size_t num_instances = 1 ); #endif /// \brief Cuda device architecture of the selected device. /// /// This matches the __CUDA_ARCH__ specification. static size_type device_arch(); //! Query device count. static size_type detect_device_count(); /** \brief Detect the available devices and their architecture * as defined by the __CUDA_ARCH__ specification. */ static std::vector detect_device_arch(); cudaStream_t cuda_stream() const; int cuda_device() const; //@} //-------------------------------------------------------------------------- static const char* name(); inline Impl::CudaInternal* impl_internal_space_instance() const { return m_space_instance; } private: Impl::CudaInternal* m_space_instance; }; } // namespace Kokkos /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ namespace Kokkos { namespace Impl { template<> struct MemorySpaceAccess < Kokkos::CudaSpace , Kokkos::Cuda::scratch_memory_space > { enum { assignable = false }; enum { accessible = true }; enum { deepcopy = false }; }; #if defined( KOKKOS_ENABLE_CUDA_UVM ) // If forcing use of UVM everywhere // then must assume that CudaUVMSpace // can be a stand-in for CudaSpace. // This will fail when a strange host-side execution space // that defines CudaUVMSpace as its preferredmemory space. template<> struct MemorySpaceAccess < Kokkos::CudaUVMSpace , Kokkos::Cuda::scratch_memory_space > { enum { assignable = false }; enum { accessible = true }; enum { deepcopy = false }; }; #endif template<> struct VerifyExecutionCanAccessMemorySpace < Kokkos::CudaSpace , Kokkos::Cuda::scratch_memory_space > { enum { value = true }; KOKKOS_INLINE_FUNCTION static void verify( void ) { } KOKKOS_INLINE_FUNCTION static void verify( const void * ) { } }; template<> struct VerifyExecutionCanAccessMemorySpace < Kokkos::HostSpace , Kokkos::Cuda::scratch_memory_space > { enum { value = false }; inline static void verify( void ) { CudaSpace::access_error(); } inline static void verify( const void * p ) { CudaSpace::access_error(p); } }; } // namespace Impl } // namespace Kokkos /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ #include #include #include #include #include #include #include #include //---------------------------------------------------------------------------- #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ #endif /* #ifndef KOKKOS_CUDA_HPP */