From 7bf177a1c75c222a676540a60e62f381545c1ac3 Mon Sep 17 00:00:00 2001 From: Nick Curtis Date: Thu, 17 Sep 2020 10:43:32 -0500 Subject: [PATCH] Fast forward porting work to master Change-Id: Ieb428e4a001efadf880dbe2c64c2a685cebdd4ae --- src/KOKKOS/atom_vec_kokkos.h | 16 ++--- src/KOKKOS/comm_kokkos.cpp | 4 +- src/KOKKOS/gridcomm_kokkos.cpp | 24 +++---- src/KOKKOS/kokkos.cpp | 78 +++++++++++++---------- src/KOKKOS/kokkos.h | 2 +- src/KOKKOS/kokkos_type.h | 17 ++++- src/KOKKOS/npair_kokkos.cpp | 43 ++++++++++--- src/KOKKOS/npair_kokkos.h | 6 +- src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp | 10 +-- src/KOKKOS/pppm_kokkos.cpp | 8 +-- 10 files changed, 127 insertions(+), 81 deletions(-) diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index d414101966..09f02f61e2 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -137,24 +137,24 @@ class AtomVecKokkos : public AtomVec { size_t buffer_size; void* buffer; - #ifdef KOKKOS_ENABLE_CUDA + #ifdef LMP_KOKKOS_GPU template Kokkos::View > create_async_copy(const ViewType& src) { typedef Kokkos::View::value, - Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type, + LMPPinnedHostType,typename ViewType::memory_space>::type, Kokkos::MemoryTraits > mirror_type; if (buffer_size == 0) { - buffer = Kokkos::kokkos_malloc(src.span()); + buffer = Kokkos::kokkos_malloc(src.span()); buffer_size = src.span(); } else if (buffer_size < src.span()) { - buffer = Kokkos::kokkos_realloc(buffer,src.span()); + buffer = Kokkos::kokkos_realloc(buffer,src.span()); buffer_size = src.span(); } return mirror_type(buffer, src.d_view.layout()); @@ -166,13 +166,13 @@ class AtomVecKokkos : public AtomVec { typename ViewType::array_layout, typename std::conditional< std::is_same::value, - Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type, + LMPPinnedHostType,typename ViewType::memory_space>::type, Kokkos::MemoryTraits > mirror_type; if (buffer_size == 0) { - buffer = Kokkos::kokkos_malloc(src.span()*sizeof(typename ViewType::value_type)); + buffer = Kokkos::kokkos_malloc(src.span()*sizeof(typename ViewType::value_type)); buffer_size = src.span(); } else if (buffer_size < src.span()) { - buffer = Kokkos::kokkos_realloc(buffer,src.span()*sizeof(typename ViewType::value_type)); + buffer = Kokkos::kokkos_realloc(buffer,src.span()*sizeof(typename ViewType::value_type)); buffer_size = src.span(); } mirror_type tmp_view((typename ViewType::value_type*)buffer, src.d_view.layout()); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index c6ba15febd..b9f3864981 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -419,7 +419,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) if (sendproc[iswap] != me) { double* buf_send_pair; double* buf_recv_pair; - if (lmp->kokkos->cuda_aware_flag) { + if (lmp->kokkos->gpu_aware_flag) { buf_send_pair = k_buf_send_pair.view().data(); buf_recv_pair = k_buf_recv_pair.view().data(); } else { @@ -437,7 +437,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world); if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf_recv_pair.modify(); k_buf_recv_pair.sync(); } diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp index 0a175343c3..dfca97c13e 100644 --- a/src/KOKKOS/gridcomm_kokkos.cpp +++ b/src/KOKKOS/gridcomm_kokkos.cpp @@ -678,7 +678,7 @@ forward_comm_kspace_regular(KSpace *kspace, int nper, int which, KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); FFT_SCALAR* buf1; FFT_SCALAR* buf2; - if (lmp->kokkos->cuda_aware_flag) { + if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); } else { @@ -695,7 +695,7 @@ forward_comm_kspace_regular(KSpace *kspace, int nper, int which, if (swap[m].sendproc != me) { - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf1.modify(); k_buf1.sync(); } @@ -706,7 +706,7 @@ forward_comm_kspace_regular(KSpace *kspace, int nper, int which, swap[m].sendproc,0,gridcomm); if (swap[m].nunpack) MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf2.modify(); k_buf2.sync(); } @@ -731,7 +731,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int nper, int which, KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); FFT_SCALAR* buf1; FFT_SCALAR* buf2; - if (lmp->kokkos->cuda_aware_flag) { + if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); } else { @@ -753,7 +753,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int nper, int which, kspaceKKBase->pack_forward_grid_kokkos(which,k_buf1,send[m].npack,k_send_packlist,m); DeviceType().fence(); - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf1.modify(); k_buf1.sync(); } @@ -773,7 +773,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int nper, int which, for (i = 0; i < nrecv; i++) { MPI_Waitany(nrecv,requests,&m,MPI_STATUS_IGNORE); - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf2.modify(); k_buf2.sync(); } @@ -814,7 +814,7 @@ reverse_comm_kspace_regular(KSpace *kspace, int nper, int which, KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); FFT_SCALAR* buf1; FFT_SCALAR* buf2; - if (lmp->kokkos->cuda_aware_flag) { + if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); } else { @@ -831,7 +831,7 @@ reverse_comm_kspace_regular(KSpace *kspace, int nper, int which, if (swap[m].recvproc != me) { - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf1.modify(); k_buf1.sync(); } @@ -843,7 +843,7 @@ reverse_comm_kspace_regular(KSpace *kspace, int nper, int which, if (swap[m].npack) MPI_Wait(&request,MPI_STATUS_IGNORE); - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf2.modify(); k_buf2.sync(); } @@ -869,7 +869,7 @@ reverse_comm_kspace_tiled(KSpace *kspace, int nper, int which, FFT_SCALAR* buf1; FFT_SCALAR* buf2; - if (lmp->kokkos->cuda_aware_flag) { + if (lmp->kokkos->gpu_aware_flag) { buf1 = k_buf1.view().data(); buf2 = k_buf2.view().data(); } else { @@ -891,7 +891,7 @@ reverse_comm_kspace_tiled(KSpace *kspace, int nper, int which, kspaceKKBase->pack_reverse_grid_kokkos(which,k_buf1,recv[m].nunpack,k_recv_unpacklist,m); DeviceType().fence(); - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf1.modify(); k_buf1.sync(); } @@ -911,7 +911,7 @@ reverse_comm_kspace_tiled(KSpace *kspace, int nper, int which, for (i = 0; i < nsend; i++) { MPI_Waitany(nsend,requests,&m,MPI_STATUS_IGNORE); - if (!lmp->kokkos->cuda_aware_flag) { + if (!lmp->kokkos->gpu_aware_flag) { k_buf2.modify(); k_buf2.sync(); } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 16d906ba3c..85365f8017 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -24,15 +24,20 @@ #include #include -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU -// for detecting CUDA-aware MPI support: -// the variable int have_cuda_aware -// - is 1 if CUDA-aware MPI support is available -// - is 0 if CUDA-aware MPI support is unavailable -// - is -1 if CUDA-aware MPI support is unknown +// for detecting GPU-aware MPI support: +// the variable int have_gpu_aware +// - is 1 if GPU-aware MPI support is available +// - is 0 if GPU-aware MPI support is unavailable +// - is -1 if GPU-aware MPI support is unknown -#define CUDA_AWARE_UNKNOWN static int have_cuda_aware = -1; +#define GPU_AWARE_UNKNOWN static int have_gpu_aware = -1; + +// TODO HIP: implement HIP-aware MPI support (UCX) detection +#if defined(KOKKOS_ENABLE_HIP) +GPU_AWARE_UNKNOWN +#elif defined(KOKKOS_ENABLE_CUDA) // OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0 @@ -42,23 +47,25 @@ #include #if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT -static int have_cuda_aware = 1; +static int have_gpu_aware = 1; #elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT -static int have_cuda_aware = 0; +static int have_gpu_aware = 0; #else -CUDA_AWARE_UNKNOWN +GPU_AWARE_UNKNOWN #endif // defined(MPIX_CUDA_AWARE_SUPPORT) #else // old OpenMPI -CUDA_AWARE_UNKNOWN +GPU_AWARE_UNKNOWN #endif // (OMPI_MAJOR_VERSION >=2) #else // unknown MPI library -CUDA_AWARE_UNKNOWN +GPU_AWARE_UNKNOWN #endif // OPEN_MPI #endif // KOKKOS_ENABLE_CUDA +#endif // LMP_ENABLE_DEVICE + using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ @@ -185,9 +192,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) binsize = 0.0; #ifdef KOKKOS_ENABLE_CUDA - cuda_aware_flag = 1; + // TODO HIP: implement HIP-aware MPI testing + gpu_aware_flag = 1; #else - cuda_aware_flag = 0; + gpu_aware_flag = 0; #endif neigh_thread = 0; neigh_thread_set = 0; @@ -211,7 +219,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; } -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU // check and warn about CUDA-aware MPI availability when using multiple MPI tasks // change default only if we can safely detect that CUDA-aware MPI is not available @@ -227,42 +235,42 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) char mpi_version[MPI_MAX_LIBRARY_VERSION_STRING]; MPI_Get_library_version(mpi_version, &len); if (strstr(&mpi_version[0], "Spectrum") != nullptr) { - cuda_aware_flag = 0; + gpu_aware_flag = 0; char* str; - if (str = getenv("OMPI_MCA_pml_pami_enable_cuda")) + if ((str = getenv("OMPI_MCA_pml_pami_enable_cuda"))) if((strcmp(str,"1") == 0)) { - have_cuda_aware = 1; - cuda_aware_flag = 1; + have_gpu_aware = 1; + gpu_aware_flag = 1; } - if (!cuda_aware_flag) + if (!gpu_aware_flag) if (me == 0) error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI"); } #endif - if (cuda_aware_flag == 1 && have_cuda_aware == 0) { + if (gpu_aware_flag == 1 && have_gpu_aware == 0) { if (me == 0) error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, " "use '-pk kokkos cuda/aware on' to override"); - cuda_aware_flag = 0; - } else if (have_cuda_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative? + gpu_aware_flag = 0; + } else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative? // MVAPICH2 #if defined(MPICH) && defined(MVAPICH2_VERSION) char* str; - cuda_aware_flag = 0; - if ((str = getenv("MV2_USE_CUDA"))) + gpu_aware_flag = 0; + if ((str = getenv("MV2_ENABLE_CUDA"))) if ((strcmp(str,"1") == 0)) - cuda_aware_flag = 1; + gpu_aware_flag = 1; - if (!cuda_aware_flag) + if (!gpu_aware_flag) if (me == 0) error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling CUDA-aware MPI"); // pure MPICH or some unsupported MPICH derivative #elif defined(MPICH) && !defined(MVAPICH2_VERSION) if (me == 0) error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI"); - cuda_aware_flag = 0; + gpu_aware_flag = 0; #else if (me == 0) error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available," @@ -270,9 +278,9 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) " '-pk kokkos cuda/aware off' if getting segmentation faults"); #endif - } // if (-1 == have_cuda_aware) + } // if (-1 == have_gpu_aware) } // nmpi > 0 -#endif // KOKKOS_ENABLE_CUDA +#endif // LMP_ENABLE_DEVICE #ifdef KILL_KOKKOS_ON_SIGSEGV signal(SIGSEGV, my_signal_handler); @@ -381,8 +389,8 @@ void KokkosLMP::accelerator(int narg, char **arg) iarg += 2; } else if (strcmp(arg[iarg],"cuda/aware") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); - if (strcmp(arg[iarg+1],"off") == 0) cuda_aware_flag = 0; - else if (strcmp(arg[iarg+1],"on") == 0) cuda_aware_flag = 1; + if (strcmp(arg[iarg+1],"off") == 0) gpu_aware_flag = 0; + else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; } else if (strcmp(arg[iarg],"neigh/thread") == 0) { @@ -395,14 +403,14 @@ void KokkosLMP::accelerator(int narg, char **arg) } else error->all(FLERR,"Illegal package kokkos command"); } -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU int nmpi = 0; MPI_Comm_size(world,&nmpi); // if "cuda/aware off" and "comm device", change to "comm host" - if (!cuda_aware_flag && nmpi > 1) { + if (!gpu_aware_flag && nmpi > 1) { if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) { exchange_comm_on_host = 1; exchange_comm_changed = 1; @@ -419,7 +427,7 @@ void KokkosLMP::accelerator(int narg, char **arg) // if "cuda/aware on" and comm flags were changed previously, change them back - if (cuda_aware_flag) { + if (gpu_aware_flag) { if (exchange_comm_changed) { exchange_comm_on_host = 0; exchange_comm_changed = 0; diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 72b7d19305..eb0eb2e71f 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -38,7 +38,7 @@ class KokkosLMP : protected Pointers { int nthreads,ngpus; int numa; int auto_sync; - int cuda_aware_flag; + int gpu_aware_flag; int neigh_thread; int neigh_thread_set; int newtonflag; diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 0271c3d108..d78d5ed8ba 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -211,6 +211,21 @@ struct ExecutionSpaceFromDevice { }; #endif +// set host pinned space +#if defined(KOKKOS_ENABLE_CUDA) +typedef Kokkos::CudaHostPinnedSpace LMPPinnedHostType; +#elif defined(KOKKOS_ENABLE_HIP) +typedef Kokkos::Experimental::HIPHostPinnedSpace LMPPinnedHostType; +#endif + +// create simple LMPDeviceSpace typedef for non HIP or CUDA specific +// behaviour +#if defined(KOKKOS_ENABLE_CUDA) +typedef Kokkos::Cuda LMPDeviceSpace; +#elif defined(KOKKOS_ENABLE_HIP) +typedef Kokkos::Experimental::HIP LMPDeviceSpace; +#endif + // Determine memory traits for force array // Do atomic trait when running HALFTHREAD neighbor list style @@ -239,7 +254,7 @@ struct AtomicDup { }; #endif -#if defined(KOKKOS_ENABLE_HIP) +#ifdef KOKKOS_ENABLE_HIP template<> struct AtomicDup { using value = Kokkos::Experimental::ScatterAtomic; diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 4cad8632b4..f9891a8185 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -212,7 +212,7 @@ void NPairKokkos::build(NeighList *list_) data.h_resize() = 0; Kokkos::deep_copy(d_scalars, h_scalars); -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU #define BINS_PER_BLOCK 2 const int factor = atoms_per_bin<64?2:1; #else @@ -226,7 +226,7 @@ void NPairKokkos::build(NeighList *list_) if (newton_pair) { if (SIZE) { NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); @@ -244,7 +244,7 @@ void NPairKokkos::build(NeighList *list_) #endif } else { NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); @@ -264,7 +264,7 @@ void NPairKokkos::build(NeighList *list_) } else { if (SIZE) { NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); @@ -282,7 +282,7 @@ void NPairKokkos::build(NeighList *list_) #endif } else { NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); @@ -529,15 +529,33 @@ void NeighborKokkosExecute:: /* ---------------------------------------------------------------------- */ -#ifdef KOKKOS_ENABLE_CUDA -extern __shared__ X_FLOAT sharedmem[]; - -/* ---------------------------------------------------------------------- */ +#ifdef KOKKOS_ENABLE_HIP +#include +#if HIP_VERSION_MAJOR < 3 || (HIP_VERSION_MAJOR == 3 && HIP_VERSION_MINOR < 7) +// ROCm versions < 3.7 are missing __syncthreads_count, so we define a functional +// but (probably) not performant workaround +__device__ __forceinline__ int __syncthreads_count(int predicate) { + __shared__ int test_block[1]; + if (!(threadIdx.x || threadIdx.y || threadIdx.z)) + test_block[0] = 0; + __syncthreads(); + atomicAdd(test_block, predicate); + __threadfence_block(); + return test_block[0]; +} +#endif +#endif +#ifdef LMP_KOKKOS_GPU template template __device__ inline void NeighborKokkosExecute::build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const { +#ifdef KOKKOS_ENABLE_HIP + HIP_DYNAMIC_SHARED(X_FLOAT, sharedmem); +#else + extern __shared__ X_FLOAT sharedmem[]; +#endif /* loop over atoms in i's bin, */ const int atoms_per_bin = c_bins.extent(1); @@ -971,11 +989,16 @@ void NeighborKokkosExecute:: /* ---------------------------------------------------------------------- */ -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU template template __device__ inline void NeighborKokkosExecute::build_ItemSizeCuda(typename Kokkos::TeamPolicy::member_type dev) const { +#ifdef KOKKOS_ENABLE_HIP + HIP_DYNAMIC_SHARED(X_FLOAT, sharedmem); +#else + extern __shared__ X_FLOAT sharedmem[]; +#endif /* loop over atoms in i's bin, */ const int atoms_per_bin = c_bins.extent(1); diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 9090cd3083..be50fd6275 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -310,7 +310,7 @@ class NeighborKokkosExecute KOKKOS_FUNCTION void build_ItemSize(const int &i) const; -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU template __device__ inline void build_ItemCuda(typename Kokkos::TeamPolicy::member_type dev) const; @@ -387,7 +387,7 @@ struct NPairKokkosBuildFunctor { void operator() (const int & i) const { c.template build_Item(i); } -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU __device__ inline void operator() (typename Kokkos::TeamPolicy::member_type dev) const { @@ -445,7 +445,7 @@ struct NPairKokkosBuildFunctorSize { c.template build_ItemSize(i); } -#ifdef KOKKOS_ENABLE_CUDA +#ifdef LMP_KOKKOS_GPU __device__ inline void operator() (typename Kokkos::TeamPolicy::member_type dev) const { c.template build_ItemSizeCuda(dev); diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index f4dc0e867c..f5e284e28a 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -112,10 +112,10 @@ void PairDPDfdtEnergyKokkos::init_style() #endif } -#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__) +#if (defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)) || defined(KOKKOS_ENABLE_HIP) // CUDA specialization of init_style to properly call rand_pool.init() template<> -void PairDPDfdtEnergyKokkos::init_style() +void PairDPDfdtEnergyKokkos::init_style() { PairDPDfdtEnergy::init_style(); @@ -125,10 +125,10 @@ void PairDPDfdtEnergyKokkos::init_style() int irequest = neighbor->nrequest - 1; neighbor->requests[irequest]-> - kokkos_host = std::is_same::value && - !std::is_same::value; + kokkos_host = std::is_same::value && + !std::is_same::value; neighbor->requests[irequest]-> - kokkos_device = std::is_same::value; + kokkos_device = std::is_same::value; if (neighflag == FULL) { neighbor->requests[irequest]->full = 1; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index f4e3955f3c..9d9a26897f 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -811,22 +811,22 @@ void PPPMKokkos::allocate() // remap takes data from 3d brick to FFT decomposition int collective_flag = 0; // not yet supported in Kokkos version - int cuda_aware_flag = lmp->kokkos->cuda_aware_flag; + int gpu_aware_flag = lmp->kokkos->gpu_aware_flag; int tmp; fft1 = new FFT3dKokkos(lmp,world,nx_pppm,ny_pppm,nz_pppm, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 0,0,&tmp,collective_flag,cuda_aware_flag); + 0,0,&tmp,collective_flag,gpu_aware_flag); fft2 = new FFT3dKokkos(lmp,world,nx_pppm,ny_pppm,nz_pppm, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, - 0,0,&tmp,collective_flag,cuda_aware_flag); + 0,0,&tmp,collective_flag,gpu_aware_flag); remap = new RemapKokkos(lmp,world, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,FFT_PRECISION,collective_flag,cuda_aware_flag); + 1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag); // create ghost grid object for rho and electric field communication // also create 2 bufs for ghost grid cell comm, passed to GridComm methods