Update Kokkos library in LAMMPS to v3.3.0
This commit is contained in:
@ -97,81 +97,16 @@
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
#if !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_CUDA) && \
|
||||
!defined(KOKKOS_ENABLE_OPENMP) && !defined(KOKKOS_ENABLE_HPX) && \
|
||||
!defined(KOKKOS_ENABLE_ROCM) && !defined(KOKKOS_ENABLE_OPENMPTARGET) && \
|
||||
!defined(KOKKOS_ENABLE_HIP)
|
||||
#if !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_CUDA) && \
|
||||
!defined(KOKKOS_ENABLE_OPENMP) && !defined(KOKKOS_ENABLE_HPX) && \
|
||||
!defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(KOKKOS_ENABLE_HIP) && \
|
||||
!defined(KOKKOS_ENABLE_SYCL)
|
||||
#define KOKKOS_INTERNAL_NOT_PARALLEL
|
||||
#endif
|
||||
|
||||
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
|
||||
// Compiling with a CUDA compiler.
|
||||
//
|
||||
// Include <cuda.h> to pick up the CUDA_VERSION macro defined as:
|
||||
// CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 )
|
||||
//
|
||||
// When generating device code the __CUDA_ARCH__ macro is defined as:
|
||||
// __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 )
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda.h>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define KOKKOS_IMPL_WINDOWS_CUDA
|
||||
#endif
|
||||
|
||||
#if !defined(CUDA_VERSION)
|
||||
#error "#include <cuda.h> did not define CUDA_VERSION."
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 300)
|
||||
// Compiling with CUDA compiler for device code.
|
||||
#error "Cuda device capability >= 3.0 is required."
|
||||
#endif
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
|
||||
#define KOKKOS_LAMBDA [=] __host__ __device__
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CXX17) || defined(KOKKOS_ENABLE_CXX20)
|
||||
#define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__
|
||||
#endif
|
||||
|
||||
#if defined(__NVCC__)
|
||||
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
|
||||
#endif
|
||||
#else // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
|
||||
#undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
|
||||
#endif // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
|
||||
|
||||
#if (10000 > CUDA_VERSION)
|
||||
#define KOKKOS_ENABLE_PRE_CUDA_10_DEPRECATION_API
|
||||
#endif
|
||||
|
||||
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 700) && \
|
||||
!defined(KOKKOS_IMPL_WINDOWS_CUDA)
|
||||
// PTX atomics with memory order semantics are only available on volta and later
|
||||
#if !defined(KOKKOS_DISABLE_CUDA_ASM)
|
||||
#if !defined(KOKKOS_ENABLE_CUDA_ASM)
|
||||
#define KOKKOS_ENABLE_CUDA_ASM
|
||||
#if !defined(KOKKOS_DISABLE_CUDA_ASM_ATOMICS)
|
||||
#define KOKKOS_ENABLE_CUDA_ASM_ATOMICS
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
|
||||
|
||||
#if defined(KOKKOS_ENABLE_HIP)
|
||||
|
||||
#define HIP_ENABLE_PRINTF
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
#define KOKKOS_LAMBDA [=] __host__ __device__
|
||||
#endif // #if defined(KOKKOS_ENABLE_HIP)
|
||||
#include <KokkosCore_Config_SetupBackend.hpp>
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Mapping compiler built-ins to KOKKOS_COMPILER_*** macros
|
||||
@ -219,13 +154,16 @@
|
||||
#define KOKKOS_COMPILER_IBM __IBMCPP__
|
||||
#elif defined(__IBMC__)
|
||||
#define KOKKOS_COMPILER_IBM __IBMC__
|
||||
#elif defined(__ibmxl_vrm__) // xlclang++
|
||||
#define KOKKOS_COMPILER_IBM __ibmxl_vrm__
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE_CC__)
|
||||
#define KOKKOS_COMPILER_APPLECC __APPLE_CC__
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) && !defined(KOKKOS_COMPILER_INTEL)
|
||||
#if defined(__clang__) && !defined(KOKKOS_COMPILER_INTEL) && \
|
||||
!defined(KOKKOS_COMPILER_IBM)
|
||||
#define KOKKOS_COMPILER_CLANG \
|
||||
__clang_major__ * 100 + __clang_minor__ * 10 + __clang_patchlevel__
|
||||
#endif
|
||||
@ -234,8 +172,8 @@
|
||||
#define KOKKOS_COMPILER_GNU \
|
||||
__GNUC__ * 100 + __GNUC_MINOR__ * 10 + __GNUC_PATCHLEVEL__
|
||||
|
||||
#if (472 > KOKKOS_COMPILER_GNU)
|
||||
#error "Compiling with GCC version earlier than 4.7.2 is not supported."
|
||||
#if (530 > KOKKOS_COMPILER_GNU)
|
||||
#error "Compiling with GCC version earlier than 5.3.0 is not supported."
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -243,8 +181,8 @@
|
||||
#define KOKKOS_COMPILER_PGI \
|
||||
__PGIC__ * 100 + __PGIC_MINOR__ * 10 + __PGIC_PATCHLEVEL__
|
||||
|
||||
#if (1540 > KOKKOS_COMPILER_PGI)
|
||||
#error "Compiling with PGI version earlier than 15.4 is not supported."
|
||||
#if (1740 > KOKKOS_COMPILER_PGI)
|
||||
#error "Compiling with PGI version earlier than 17.4 is not supported."
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -252,56 +190,6 @@
|
||||
#define KOKKOS_COMPILER_MSVC _MSC_VER
|
||||
#endif
|
||||
|
||||
//#endif // #if !defined( __CUDA_ARCH__ )
|
||||
//----------------------------------------------------------------------------
|
||||
// Language info: C++, CUDA, OPENMP
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
// Compiling Cuda code to 'ptx'
|
||||
|
||||
#define KOKKOS_IMPL_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
|
||||
#define KOKKOS_IMPL_FORCEINLINE __forceinline__
|
||||
#define KOKKOS_IMPL_INLINE_FUNCTION __device__ __host__ inline
|
||||
#define KOKKOS_IMPL_FUNCTION __device__ __host__
|
||||
#define KOKKOS_IMPL_HOST_FUNCTION __host__
|
||||
#define KOKKOS_IMPL_DEVICE_FUNCTION __device__
|
||||
#if defined(KOKKOS_COMPILER_NVCC)
|
||||
#define KOKKOS_INLINE_FUNCTION_DELETED inline
|
||||
#else
|
||||
#define KOKKOS_INLINE_FUNCTION_DELETED __device__ __host__ inline
|
||||
#endif
|
||||
#if (CUDA_VERSION < 10000)
|
||||
#define KOKKOS_DEFAULTED_FUNCTION __host__ __device__ inline
|
||||
#else
|
||||
#define KOKKOS_DEFAULTED_FUNCTION inline
|
||||
#endif
|
||||
#define KOKKOS_IMPL_HOST_FUNCTION __host__
|
||||
#define KOKKOS_IMPL_DEVICE_FUNCTION __device__
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_HIP)
|
||||
|
||||
#define KOKKOS_IMPL_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
|
||||
#define KOKKOS_IMPL_INLINE_FUNCTION __device__ __host__ inline
|
||||
#define KOKKOS_DEFAULTED_FUNCTION __device__ __host__ inline
|
||||
#define KOKKOS_INLINE_FUNCTION_DELETED __device__ __host__ inline
|
||||
#define KOKKOS_IMPL_FUNCTION __device__ __host__
|
||||
#define KOKKOS_IMPL_HOST_FUNCTION __host__
|
||||
#define KOKKOS_IMPL_DEVICE_FUNCTION __device__
|
||||
#if defined(KOKKOS_ENABLE_CXX17) || defined(KOKKOS_ENABLE_CXX20)
|
||||
#define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__
|
||||
#endif
|
||||
#endif // #if defined( KOKKOS_ENABLE_HIP )
|
||||
|
||||
#if defined(KOKKOS_ENABLE_ROCM) && defined(__HCC__)
|
||||
|
||||
#define KOKKOS_IMPL_FORCEINLINE_FUNCTION __attribute__((amp, cpu)) inline
|
||||
#define KOKKOS_IMPL_INLINE_FUNCTION __attribute__((amp, cpu)) inline
|
||||
#define KOKKOS_IMPL_FUNCTION __attribute__((amp, cpu))
|
||||
#define KOKKOS_LAMBDA [=] __attribute__((amp, cpu))
|
||||
#define KOKKOS_DEFAULTED_FUNCTION __attribute__((amp, cpu)) inline
|
||||
#endif
|
||||
|
||||
#if defined(_OPENMP)
|
||||
// Compiling with OpenMP.
|
||||
// The value of _OPENMP is an integer value YYYYMM
|
||||
@ -313,14 +201,18 @@
|
||||
// Intel compiler macros
|
||||
|
||||
#if defined(KOKKOS_COMPILER_INTEL)
|
||||
// FIXME_SYCL
|
||||
#if !defined(KOKKOS_ENABLE_SYCL)
|
||||
#define KOKKOS_ENABLE_PRAGMA_UNROLL 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
|
||||
#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
|
||||
#endif
|
||||
#if (1800 > KOKKOS_COMPILER_INTEL)
|
||||
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
|
||||
#endif
|
||||
|
||||
#if (__INTEL_COMPILER > 1400)
|
||||
// FIXME_SYCL
|
||||
#if !defined(KOKKOS_ENABLE_SYCL)
|
||||
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
|
||||
#endif
|
||||
|
||||
@ -334,14 +226,8 @@
|
||||
#define KOKKOS_IMPL_ALIGN_PTR(size) __attribute__((align_value(size)))
|
||||
#endif
|
||||
|
||||
#if (1400 > KOKKOS_COMPILER_INTEL)
|
||||
#if (1300 > KOKKOS_COMPILER_INTEL)
|
||||
#error \
|
||||
"Compiling with Intel version earlier than 13.0 is not supported. Official minimal version is 14.0."
|
||||
#else
|
||||
#warning \
|
||||
"Compiling with Intel version 13.x probably works but is not officially supported. Official minimal version is 14.0."
|
||||
#endif
|
||||
#if (1700 > KOKKOS_COMPILER_INTEL)
|
||||
#error "Compiling with Intel version earlier than 17.0 is not supported."
|
||||
#endif
|
||||
|
||||
#if !defined(KOKKOS_ENABLE_ASM) && !defined(_WIN32)
|
||||
@ -542,7 +428,7 @@
|
||||
|
||||
#if 1 < ((defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA) ? 1 : 0) + \
|
||||
(defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP) ? 1 : 0) + \
|
||||
(defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM) ? 1 : 0) + \
|
||||
(defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SYCL) ? 1 : 0) + \
|
||||
(defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET) ? 1 : 0) + \
|
||||
(defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP) ? 1 : 0) + \
|
||||
(defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS) ? 1 : 0) + \
|
||||
@ -552,10 +438,10 @@
|
||||
#endif
|
||||
|
||||
// If default is not specified then chose from enabled execution spaces.
|
||||
// Priority: CUDA, HIP, ROCM, OPENMPTARGET, OPENMP, THREADS, HPX, SERIAL
|
||||
// Priority: CUDA, HIP, SYCL, OPENMPTARGET, OPENMP, THREADS, HPX, SERIAL
|
||||
#if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA)
|
||||
#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP)
|
||||
#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM)
|
||||
#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SYCL)
|
||||
#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET)
|
||||
#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP)
|
||||
#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS)
|
||||
@ -570,8 +456,8 @@
|
||||
// as valid overload criteria
|
||||
#define KOKKOS_IMPL_ENABLE_OVERLOAD_HOST_DEVICE
|
||||
#endif
|
||||
#elif defined(KOKKOS_ENABLE_ROCM)
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM
|
||||
#elif defined(KOKKOS_ENABLE_SYCL)
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SYCL
|
||||
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET
|
||||
#elif defined(KOKKOS_ENABLE_OPENMP)
|
||||
@ -589,9 +475,8 @@
|
||||
|
||||
#if defined(__CUDACC__) && defined(__CUDA_ARCH__) && defined(KOKKOS_ENABLE_CUDA)
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
|
||||
#elif defined(__HCC__) && defined(__HCC_ACCELERATOR__) && \
|
||||
defined(KOKKOS_ENABLE_ROCM)
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU
|
||||
#elif defined(__SYCL_DEVICE_ONLY__) && defined(KOKKOS_ENABLE_SYCL)
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_SYCL
|
||||
#elif defined(__HIPCC__) && defined(__HIP_DEVICE_COMPILE__) && \
|
||||
defined(KOKKOS_ENABLE_HIP)
|
||||
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU
|
||||
@ -616,11 +501,9 @@
|
||||
#if defined(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
#endif
|
||||
#else
|
||||
#ifndef KOKKOS_ENABLE_HIP
|
||||
#elif !defined(KOKKOS_ENABLE_HIP) && !defined(KOKKOS_ENABLE_SYCL)
|
||||
#define KOKKOS_ENABLE_TASKDAG
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA)
|
||||
#define KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND
|
||||
@ -633,21 +516,9 @@
|
||||
|
||||
#define KOKKOS_IMPL_CTOR_DEFAULT_ARG KOKKOS_INVALID_INDEX
|
||||
|
||||
#if (defined(KOKKOS_ENABLE_CXX14) || defined(KOKKOS_ENABLE_CXX17) || \
|
||||
defined(KOKKOS_ENABLE_CXX20))
|
||||
#define KOKKOS_CONSTEXPR_14 constexpr
|
||||
#define KOKKOS_DEPRECATED [[deprecated]]
|
||||
#define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE
|
||||
#else
|
||||
#define KOKKOS_CONSTEXPR_14
|
||||
#if defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG)
|
||||
#define KOKKOS_DEPRECATED
|
||||
#define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE __attribute__((deprecated))
|
||||
#else
|
||||
#define KOKKOS_DEPRECATED
|
||||
#define KOKKOS_DEPRECATED_TRAILING_ATTRIBUTE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// DJS 05/28/2019: Bugfix: Issue 2155
|
||||
// Use KOKKOS_ENABLE_CUDA_LDG_INTRINSIC to avoid memory leak in RandomAccess
|
||||
@ -676,13 +547,14 @@
|
||||
#undef __CUDA_ARCH__
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_COMPILER_MSVC)
|
||||
#if defined(KOKKOS_COMPILER_MSVC) && !defined(KOKKOS_COMPILER_CLANG)
|
||||
#define KOKKOS_THREAD_LOCAL __declspec(thread)
|
||||
#else
|
||||
#define KOKKOS_THREAD_LOCAL __thread
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_IMPL_WINDOWS_CUDA) || defined(KOKKOS_COMPILER_MSVC)
|
||||
#if (defined(KOKKOS_IMPL_WINDOWS_CUDA) || defined(KOKKOS_COMPILER_MSVC)) && \
|
||||
!defined(KOKKOS_COMPILER_CLANG)
|
||||
// MSVC (as of 16.5.5 at least) does not do empty base class optimization by
|
||||
// default when there are multiple bases, even though the standard requires it
|
||||
// for standard layout types.
|
||||
|
||||
Reference in New Issue
Block a user