Add missed changes

2023-06-05 10:33:30 -06:00
parent 2e09ba2702
commit 966efd8bd5
3 changed files with 34 additions and 33 deletions
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
@ -710,7 +710,7 @@ struct CudaParallelLaunchImpl<
                    " occupancy requests are currently broken."));
      }
-      KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
+      ensure_cuda_lock_arrays_on_device();
      // Invoke the driver function on the device
      base_t::invoke_kernel(driver, grid, block, shmem, cuda_instance);
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
@ -79,8 +79,7 @@ CudaLockArrays g_host_cuda_lock_arrays = {nullptr, 0};
 void initialize_host_cuda_lock_arrays() {
 #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
  desul::Impl::init_lock_arrays();
-
+  desul::ensure_cuda_lock_arrays_on_device();
  DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
 #endif
  if (g_host_cuda_lock_arrays.atomic != nullptr) return;
  KOKKOS_IMPL_CUDA_SAFE_CALL(
@ -89,7 +88,7 @@ void initialize_host_cuda_lock_arrays() {
  Impl::cuda_device_synchronize(
      "Kokkos::Impl::initialize_host_cuda_lock_arrays: Pre Init Lock Arrays");
  g_host_cuda_lock_arrays.n = Cuda::concurrency();
-  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
+  copy_cuda_lock_arrays_to_device();
  init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256,
                                  256>>>();
  Impl::cuda_device_synchronize(
@ -106,7 +105,7 @@ void finalize_host_cuda_lock_arrays() {
  g_host_cuda_lock_arrays.atomic = nullptr;
  g_host_cuda_lock_arrays.n      = 0;
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
+  copy_cuda_lock_arrays_to_device();
 #endif
 }
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
@ -67,7 +67,7 @@ struct CudaLockArrays {
 /// \brief This global variable in Host space is the central definition
 ///        of these arrays.
-extern Kokkos::Impl::CudaLockArrays g_host_cuda_lock_arrays;
+extern CudaLockArrays g_host_cuda_lock_arrays;
 /// \brief After this call, the g_host_cuda_lock_arrays variable has
 ///        valid, initialized arrays.
@ -105,12 +105,12 @@ namespace Impl {
 /// instances in other translation units, we must update this CUDA global
 /// variable based on the Host global variable prior to running any kernels
 /// that will use it.
-/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro.
+/// That is the purpose of the ensure_cuda_lock_arrays_on_device function.
 __device__
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
    __constant__ extern
 #endif
-    Kokkos::Impl::CudaLockArrays g_device_cuda_lock_arrays;
+    CudaLockArrays g_device_cuda_lock_arrays;
 #define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
@ -123,9 +123,7 @@ __device__ inline bool lock_address_cuda_space(void* ptr) {
  size_t offset = size_t(ptr);
  offset        = offset >> 2;
  offset        = offset & CUDA_SPACE_ATOMIC_MASK;
-  return (
+  return (0 == atomicCAS(&g_device_cuda_lock_arrays.atomic[offset], 0, 1));
      0 ==
      atomicCAS(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0, 1));
 }
 /// \brief Release lock for the address
@ -138,7 +136,7 @@ __device__ inline void unlock_address_cuda_space(void* ptr) {
  size_t offset = size_t(ptr);
  offset        = offset >> 2;
  offset        = offset & CUDA_SPACE_ATOMIC_MASK;
-  atomicExch(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0);
+  atomicExch(&g_device_cuda_lock_arrays.atomic[offset], 0);
 }
 }  // namespace Impl
@ -151,45 +149,49 @@ namespace {
 static int lock_array_copied = 0;
 inline int eliminate_warning_for_lock_array() { return lock_array_copied; }
 }  // namespace
 }  // namespace Impl
 }  // namespace Kokkos
-/* Dan Ibanez: it is critical that this code be a macro, so that it will
+#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-   capture the right address for Kokkos::Impl::g_device_cuda_lock_arrays!
+inline
-   putting this in an inline function will NOT do the right thing! */
+#else
-#define KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()                      \
+inline static
-  {                                                                   \
+#endif
-    if (::Kokkos::Impl::lock_array_copied == 0) {                     \
+    void
-      KOKKOS_IMPL_CUDA_SAFE_CALL(                                     \
+    copy_cuda_lock_arrays_to_device() {
-          cudaMemcpyToSymbol(Kokkos::Impl::g_device_cuda_lock_arrays, \
+  if (lock_array_copied == 0) {
-                             &Kokkos::Impl::g_host_cuda_lock_arrays,  \
+    KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpyToSymbol(g_device_cuda_lock_arrays,
-                             sizeof(Kokkos::Impl::CudaLockArrays)));  \
+                                                  &g_host_cuda_lock_arrays,
-    }                                                                 \
+                                                  sizeof(CudaLockArrays)));
    lock_array_copied = 1;                                            \
  }
  lock_array_copied = 1;
 }
 #ifndef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
+inline void ensure_cuda_lock_arrays_on_device() {}
 #else
-#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
+inline static void ensure_cuda_lock_arrays_on_device() {
-  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()
+  copy_cuda_lock_arrays_to_device();
 }
 #endif
 #else
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
+inline void ensure_cuda_lock_arrays_on_device() {}
 #else
 // Still Need COPY_CUDA_LOCK_ARRAYS for team scratch etc.
-#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
+inline static void ensure_cuda_lock_arrays_on_device() {
-  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()         \
+  copy_cuda_lock_arrays_to_device();
-  DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
+  desul::ensure_cuda_lock_arrays_on_device();
 }
 #endif
 #endif /* defined( KOKKOS_ENABLE_IMPL_DESUL_ATOMICS ) */
 }  // namespace Impl
 }  // namespace Kokkos
 #endif /* defined( KOKKOS_ENABLE_CUDA ) */
 #endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP */