diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
index ba43e362bb..a91d0eb313 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp
@@ -710,7 +710,7 @@ struct CudaParallelLaunchImpl<
                     " occupancy requests are currently broken."));
       }
 
-      KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
+      ensure_cuda_lock_arrays_on_device();
 
       // Invoke the driver function on the device
       base_t::invoke_kernel(driver, grid, block, shmem, cuda_instance);
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
index 84d4307cfd..3796534816 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
@@ -79,8 +79,7 @@ CudaLockArrays g_host_cuda_lock_arrays = {nullptr, 0};
 void initialize_host_cuda_lock_arrays() {
 #ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
   desul::Impl::init_lock_arrays();
-
-  DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
+  desul::ensure_cuda_lock_arrays_on_device();
 #endif
   if (g_host_cuda_lock_arrays.atomic != nullptr) return;
   KOKKOS_IMPL_CUDA_SAFE_CALL(
@@ -89,7 +88,7 @@ void initialize_host_cuda_lock_arrays() {
   Impl::cuda_device_synchronize(
       "Kokkos::Impl::initialize_host_cuda_lock_arrays: Pre Init Lock Arrays");
   g_host_cuda_lock_arrays.n = Cuda::concurrency();
-  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
+  copy_cuda_lock_arrays_to_device();
   init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256,
                                   256>>>();
   Impl::cuda_device_synchronize(
@@ -106,7 +105,7 @@ void finalize_host_cuda_lock_arrays() {
   g_host_cuda_lock_arrays.atomic = nullptr;
   g_host_cuda_lock_arrays.n      = 0;
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
+  copy_cuda_lock_arrays_to_device();
 #endif
 }
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
index bdb7723985..84bfc953fd 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
@@ -67,7 +67,7 @@ struct CudaLockArrays {
 
 /// \brief This global variable in Host space is the central definition
 ///        of these arrays.
-extern Kokkos::Impl::CudaLockArrays g_host_cuda_lock_arrays;
+extern CudaLockArrays g_host_cuda_lock_arrays;
 
 /// \brief After this call, the g_host_cuda_lock_arrays variable has
 ///        valid, initialized arrays.
@@ -105,12 +105,12 @@ namespace Impl {
 /// instances in other translation units, we must update this CUDA global
 /// variable based on the Host global variable prior to running any kernels
 /// that will use it.
-/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro.
+/// That is the purpose of the ensure_cuda_lock_arrays_on_device function.
 __device__
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
     __constant__ extern
 #endif
-    Kokkos::Impl::CudaLockArrays g_device_cuda_lock_arrays;
+    CudaLockArrays g_device_cuda_lock_arrays;
 
 #define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
 
@@ -123,9 +123,7 @@ __device__ inline bool lock_address_cuda_space(void* ptr) {
   size_t offset = size_t(ptr);
   offset        = offset >> 2;
   offset        = offset & CUDA_SPACE_ATOMIC_MASK;
-  return (
-      0 ==
-      atomicCAS(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0, 1));
+  return (0 == atomicCAS(&g_device_cuda_lock_arrays.atomic[offset], 0, 1));
 }
 
 /// \brief Release lock for the address
@@ -138,7 +136,7 @@ __device__ inline void unlock_address_cuda_space(void* ptr) {
   size_t offset = size_t(ptr);
   offset        = offset >> 2;
   offset        = offset & CUDA_SPACE_ATOMIC_MASK;
-  atomicExch(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0);
+  atomicExch(&g_device_cuda_lock_arrays.atomic[offset], 0);
 }
 
 }  // namespace Impl
@@ -151,45 +149,49 @@ namespace {
 static int lock_array_copied = 0;
 inline int eliminate_warning_for_lock_array() { return lock_array_copied; }
 }  // namespace
-}  // namespace Impl
-}  // namespace Kokkos
 
-/* Dan Ibanez: it is critical that this code be a macro, so that it will
-   capture the right address for Kokkos::Impl::g_device_cuda_lock_arrays!
-   putting this in an inline function will NOT do the right thing! */
-#define KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()                      \
-  {                                                                   \
-    if (::Kokkos::Impl::lock_array_copied == 0) {                     \
-      KOKKOS_IMPL_CUDA_SAFE_CALL(                                     \
-          cudaMemcpyToSymbol(Kokkos::Impl::g_device_cuda_lock_arrays, \
-                             &Kokkos::Impl::g_host_cuda_lock_arrays,  \
-                             sizeof(Kokkos::Impl::CudaLockArrays)));  \
-    }                                                                 \
-    lock_array_copied = 1;                                            \
+#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
+inline
+#else
+inline static
+#endif
+    void
+    copy_cuda_lock_arrays_to_device() {
+  if (lock_array_copied == 0) {
+    KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpyToSymbol(g_device_cuda_lock_arrays,
+                                                  &g_host_cuda_lock_arrays,
+                                                  sizeof(CudaLockArrays)));
   }
+  lock_array_copied = 1;
+}
 
 #ifndef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
 
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
+inline void ensure_cuda_lock_arrays_on_device() {}
 #else
-#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
-  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()
+inline static void ensure_cuda_lock_arrays_on_device() {
+  copy_cuda_lock_arrays_to_device();
+}
 #endif
 
 #else
 
 #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
+inline void ensure_cuda_lock_arrays_on_device() {}
 #else
 // Still Need COPY_CUDA_LOCK_ARRAYS for team scratch etc.
-#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
-  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()         \
-  DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
+inline static void ensure_cuda_lock_arrays_on_device() {
+  copy_cuda_lock_arrays_to_device();
+  desul::ensure_cuda_lock_arrays_on_device();
+}
 #endif
 
 #endif /* defined( KOKKOS_ENABLE_IMPL_DESUL_ATOMICS ) */
 
+}  // namespace Impl
+}  // namespace Kokkos
+
 #endif /* defined( KOKKOS_ENABLE_CUDA ) */
 
 #endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP */