Merge branch 'master' into collected-small-changes

This commit is contained in:
Axel Kohlmeyer
2020-09-17 17:01:44 -04:00
10 changed files with 127 additions and 81 deletions

View File

@ -137,24 +137,24 @@ class AtomVecKokkos : public AtomVec {
size_t buffer_size;
void* buffer;
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
template<class ViewType>
Kokkos::View<typename ViewType::data_type,
typename ViewType::array_layout,
Kokkos::CudaHostPinnedSpace,
LMPPinnedHostType,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >
create_async_copy(const ViewType& src) {
typedef Kokkos::View<typename ViewType::data_type,
typename ViewType::array_layout,
typename std::conditional<
std::is_same<typename ViewType::execution_space,LMPDeviceType>::value,
Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type,
LMPPinnedHostType,typename ViewType::memory_space>::type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > mirror_type;
if (buffer_size == 0) {
buffer = Kokkos::kokkos_malloc<Kokkos::CudaHostPinnedSpace>(src.span());
buffer = Kokkos::kokkos_malloc<LMPPinnedHostType>(src.span());
buffer_size = src.span();
} else if (buffer_size < src.span()) {
buffer = Kokkos::kokkos_realloc<Kokkos::CudaHostPinnedSpace>(buffer,src.span());
buffer = Kokkos::kokkos_realloc<LMPPinnedHostType>(buffer,src.span());
buffer_size = src.span();
}
return mirror_type(buffer, src.d_view.layout());
@ -166,13 +166,13 @@ class AtomVecKokkos : public AtomVec {
typename ViewType::array_layout,
typename std::conditional<
std::is_same<typename ViewType::execution_space,LMPDeviceType>::value,
Kokkos::CudaHostPinnedSpace,typename ViewType::memory_space>::type,
LMPPinnedHostType,typename ViewType::memory_space>::type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> > mirror_type;
if (buffer_size == 0) {
buffer = Kokkos::kokkos_malloc<Kokkos::CudaHostPinnedSpace>(src.span()*sizeof(typename ViewType::value_type));
buffer = Kokkos::kokkos_malloc<LMPPinnedHostType>(src.span()*sizeof(typename ViewType::value_type));
buffer_size = src.span();
} else if (buffer_size < src.span()) {
buffer = Kokkos::kokkos_realloc<Kokkos::CudaHostPinnedSpace>(buffer,src.span()*sizeof(typename ViewType::value_type));
buffer = Kokkos::kokkos_realloc<LMPPinnedHostType>(buffer,src.span()*sizeof(typename ViewType::value_type));
buffer_size = src.span();
}
mirror_type tmp_view((typename ViewType::value_type*)buffer, src.d_view.layout());

View File

@ -419,7 +419,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair)
if (sendproc[iswap] != me) {
double* buf_send_pair;
double* buf_recv_pair;
if (lmp->kokkos->cuda_aware_flag) {
if (lmp->kokkos->gpu_aware_flag) {
buf_send_pair = k_buf_send_pair.view<DeviceType>().data();
buf_recv_pair = k_buf_recv_pair.view<DeviceType>().data();
} else {
@ -437,7 +437,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair)
MPI_Send(buf_send_pair,n,MPI_DOUBLE,sendproc[iswap],0,world);
if (recvnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf_recv_pair.modify<LMPHostType>();
k_buf_recv_pair.sync<DeviceType>();
}

View File

@ -678,7 +678,7 @@ forward_comm_kspace_regular(KSpace *kspace, int nper, int which,
KokkosBaseFFT* kspaceKKBase = dynamic_cast<KokkosBaseFFT*>(kspace);
FFT_SCALAR* buf1;
FFT_SCALAR* buf2;
if (lmp->kokkos->cuda_aware_flag) {
if (lmp->kokkos->gpu_aware_flag) {
buf1 = k_buf1.view<DeviceType>().data();
buf2 = k_buf2.view<DeviceType>().data();
} else {
@ -695,7 +695,7 @@ forward_comm_kspace_regular(KSpace *kspace, int nper, int which,
if (swap[m].sendproc != me) {
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf1.modify<DeviceType>();
k_buf1.sync<LMPHostType>();
}
@ -706,7 +706,7 @@ forward_comm_kspace_regular(KSpace *kspace, int nper, int which,
swap[m].sendproc,0,gridcomm);
if (swap[m].nunpack) MPI_Wait(&request,MPI_STATUS_IGNORE);
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf2.modify<LMPHostType>();
k_buf2.sync<DeviceType>();
}
@ -731,7 +731,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int nper, int which,
KokkosBaseFFT* kspaceKKBase = dynamic_cast<KokkosBaseFFT*>(kspace);
FFT_SCALAR* buf1;
FFT_SCALAR* buf2;
if (lmp->kokkos->cuda_aware_flag) {
if (lmp->kokkos->gpu_aware_flag) {
buf1 = k_buf1.view<DeviceType>().data();
buf2 = k_buf2.view<DeviceType>().data();
} else {
@ -753,7 +753,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int nper, int which,
kspaceKKBase->pack_forward_grid_kokkos(which,k_buf1,send[m].npack,k_send_packlist,m);
DeviceType().fence();
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf1.modify<DeviceType>();
k_buf1.sync<LMPHostType>();
}
@ -773,7 +773,7 @@ forward_comm_kspace_tiled(KSpace *kspace, int nper, int which,
for (i = 0; i < nrecv; i++) {
MPI_Waitany(nrecv,requests,&m,MPI_STATUS_IGNORE);
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf2.modify<LMPHostType>();
k_buf2.sync<DeviceType>();
}
@ -814,7 +814,7 @@ reverse_comm_kspace_regular(KSpace *kspace, int nper, int which,
KokkosBaseFFT* kspaceKKBase = dynamic_cast<KokkosBaseFFT*>(kspace);
FFT_SCALAR* buf1;
FFT_SCALAR* buf2;
if (lmp->kokkos->cuda_aware_flag) {
if (lmp->kokkos->gpu_aware_flag) {
buf1 = k_buf1.view<DeviceType>().data();
buf2 = k_buf2.view<DeviceType>().data();
} else {
@ -831,7 +831,7 @@ reverse_comm_kspace_regular(KSpace *kspace, int nper, int which,
if (swap[m].recvproc != me) {
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf1.modify<DeviceType>();
k_buf1.sync<LMPHostType>();
}
@ -843,7 +843,7 @@ reverse_comm_kspace_regular(KSpace *kspace, int nper, int which,
if (swap[m].npack) MPI_Wait(&request,MPI_STATUS_IGNORE);
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf2.modify<LMPHostType>();
k_buf2.sync<DeviceType>();
}
@ -869,7 +869,7 @@ reverse_comm_kspace_tiled(KSpace *kspace, int nper, int which,
FFT_SCALAR* buf1;
FFT_SCALAR* buf2;
if (lmp->kokkos->cuda_aware_flag) {
if (lmp->kokkos->gpu_aware_flag) {
buf1 = k_buf1.view<DeviceType>().data();
buf2 = k_buf2.view<DeviceType>().data();
} else {
@ -891,7 +891,7 @@ reverse_comm_kspace_tiled(KSpace *kspace, int nper, int which,
kspaceKKBase->pack_reverse_grid_kokkos(which,k_buf1,recv[m].nunpack,k_recv_unpacklist,m);
DeviceType().fence();
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf1.modify<DeviceType>();
k_buf1.sync<LMPHostType>();
}
@ -911,7 +911,7 @@ reverse_comm_kspace_tiled(KSpace *kspace, int nper, int which,
for (i = 0; i < nsend; i++) {
MPI_Waitany(nsend,requests,&m,MPI_STATUS_IGNORE);
if (!lmp->kokkos->cuda_aware_flag) {
if (!lmp->kokkos->gpu_aware_flag) {
k_buf2.modify<LMPHostType>();
k_buf2.sync<DeviceType>();
}

View File

@ -24,15 +24,20 @@
#include <csignal>
#include <unistd.h>
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
// for detecting CUDA-aware MPI support:
// the variable int have_cuda_aware
// - is 1 if CUDA-aware MPI support is available
// - is 0 if CUDA-aware MPI support is unavailable
// - is -1 if CUDA-aware MPI support is unknown
// for detecting GPU-aware MPI support:
// the variable int have_gpu_aware
// - is 1 if GPU-aware MPI support is available
// - is 0 if GPU-aware MPI support is unavailable
// - is -1 if GPU-aware MPI support is unknown
#define CUDA_AWARE_UNKNOWN static int have_cuda_aware = -1;
#define GPU_AWARE_UNKNOWN static int have_gpu_aware = -1;
// TODO HIP: implement HIP-aware MPI support (UCX) detection
#if defined(KOKKOS_ENABLE_HIP)
GPU_AWARE_UNKNOWN
#elif defined(KOKKOS_ENABLE_CUDA)
// OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0
@ -42,23 +47,25 @@
#include <mpi-ext.h>
#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
static int have_cuda_aware = 1;
static int have_gpu_aware = 1;
#elif defined(MPIX_CUDA_AWARE_SUPPORT) && !MPIX_CUDA_AWARE_SUPPORT
static int have_cuda_aware = 0;
static int have_gpu_aware = 0;
#else
CUDA_AWARE_UNKNOWN
GPU_AWARE_UNKNOWN
#endif // defined(MPIX_CUDA_AWARE_SUPPORT)
#else // old OpenMPI
CUDA_AWARE_UNKNOWN
GPU_AWARE_UNKNOWN
#endif // (OMPI_MAJOR_VERSION >=2)
#else // unknown MPI library
CUDA_AWARE_UNKNOWN
GPU_AWARE_UNKNOWN
#endif // OPEN_MPI
#endif // KOKKOS_ENABLE_CUDA
#endif // LMP_ENABLE_DEVICE
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
@ -185,9 +192,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
binsize = 0.0;
#ifdef KOKKOS_ENABLE_CUDA
cuda_aware_flag = 1;
// TODO HIP: implement HIP-aware MPI testing
gpu_aware_flag = 1;
#else
cuda_aware_flag = 0;
gpu_aware_flag = 0;
#endif
neigh_thread = 0;
neigh_thread_set = 0;
@ -211,7 +219,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
}
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
// check and warn about CUDA-aware MPI availability when using multiple MPI tasks
// change default only if we can safely detect that CUDA-aware MPI is not available
@ -227,42 +235,42 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
char mpi_version[MPI_MAX_LIBRARY_VERSION_STRING];
MPI_Get_library_version(mpi_version, &len);
if (strstr(&mpi_version[0], "Spectrum") != nullptr) {
cuda_aware_flag = 0;
gpu_aware_flag = 0;
char* str;
if (str = getenv("OMPI_MCA_pml_pami_enable_cuda"))
if ((str = getenv("OMPI_MCA_pml_pami_enable_cuda")))
if((strcmp(str,"1") == 0)) {
have_cuda_aware = 1;
cuda_aware_flag = 1;
have_gpu_aware = 1;
gpu_aware_flag = 1;
}
if (!cuda_aware_flag)
if (!gpu_aware_flag)
if (me == 0)
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI");
}
#endif
if (cuda_aware_flag == 1 && have_cuda_aware == 0) {
if (gpu_aware_flag == 1 && have_gpu_aware == 0) {
if (me == 0)
error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, "
"use '-pk kokkos cuda/aware on' to override");
cuda_aware_flag = 0;
} else if (have_cuda_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative?
gpu_aware_flag = 0;
} else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative?
// MVAPICH2
#if defined(MPICH) && defined(MVAPICH2_VERSION)
char* str;
cuda_aware_flag = 0;
if ((str = getenv("MV2_USE_CUDA")))
gpu_aware_flag = 0;
if ((str = getenv("MV2_ENABLE_CUDA")))
if ((strcmp(str,"1") == 0))
cuda_aware_flag = 1;
gpu_aware_flag = 1;
if (!cuda_aware_flag)
if (!gpu_aware_flag)
if (me == 0)
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling CUDA-aware MPI");
// pure MPICH or some unsupported MPICH derivative
#elif defined(MPICH) && !defined(MVAPICH2_VERSION)
if (me == 0)
error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI");
cuda_aware_flag = 0;
gpu_aware_flag = 0;
#else
if (me == 0)
error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available,"
@ -270,9 +278,9 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
" '-pk kokkos cuda/aware off' if getting segmentation faults");
#endif
} // if (-1 == have_cuda_aware)
} // if (-1 == have_gpu_aware)
} // nmpi > 0
#endif // KOKKOS_ENABLE_CUDA
#endif // LMP_ENABLE_DEVICE
#ifdef KILL_KOKKOS_ON_SIGSEGV
signal(SIGSEGV, my_signal_handler);
@ -381,8 +389,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
iarg += 2;
} else if (strcmp(arg[iarg],"cuda/aware") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"off") == 0) cuda_aware_flag = 0;
else if (strcmp(arg[iarg+1],"on") == 0) cuda_aware_flag = 1;
if (strcmp(arg[iarg+1],"off") == 0) gpu_aware_flag = 0;
else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1;
else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else if (strcmp(arg[iarg],"neigh/thread") == 0) {
@ -395,14 +403,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
} else error->all(FLERR,"Illegal package kokkos command");
}
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
int nmpi = 0;
MPI_Comm_size(world,&nmpi);
// if "cuda/aware off" and "comm device", change to "comm host"
if (!cuda_aware_flag && nmpi > 1) {
if (!gpu_aware_flag && nmpi > 1) {
if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) {
exchange_comm_on_host = 1;
exchange_comm_changed = 1;
@ -419,7 +427,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
// if "cuda/aware on" and comm flags were changed previously, change them back
if (cuda_aware_flag) {
if (gpu_aware_flag) {
if (exchange_comm_changed) {
exchange_comm_on_host = 0;
exchange_comm_changed = 0;

View File

@ -38,7 +38,7 @@ class KokkosLMP : protected Pointers {
int nthreads,ngpus;
int numa;
int auto_sync;
int cuda_aware_flag;
int gpu_aware_flag;
int neigh_thread;
int neigh_thread_set;
int newtonflag;

View File

@ -211,6 +211,21 @@ struct ExecutionSpaceFromDevice<Kokkos::Experimental::HIP> {
};
#endif
// set host pinned space
#if defined(KOKKOS_ENABLE_CUDA)
typedef Kokkos::CudaHostPinnedSpace LMPPinnedHostType;
#elif defined(KOKKOS_ENABLE_HIP)
typedef Kokkos::Experimental::HIPHostPinnedSpace LMPPinnedHostType;
#endif
// create simple LMPDeviceSpace typedef for non HIP or CUDA specific
// behaviour
#if defined(KOKKOS_ENABLE_CUDA)
typedef Kokkos::Cuda LMPDeviceSpace;
#elif defined(KOKKOS_ENABLE_HIP)
typedef Kokkos::Experimental::HIP LMPDeviceSpace;
#endif
// Determine memory traits for force array
// Do atomic trait when running HALFTHREAD neighbor list style
@ -239,7 +254,7 @@ struct AtomicDup<HALFTHREAD,Kokkos::Cuda> {
};
#endif
#if defined(KOKKOS_ENABLE_HIP)
#ifdef KOKKOS_ENABLE_HIP
template<>
struct AtomicDup<HALFTHREAD,Kokkos::Experimental::HIP> {
using value = Kokkos::Experimental::ScatterAtomic;

View File

@ -212,7 +212,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
data.h_resize() = 0;
Kokkos::deep_copy(d_scalars, h_scalars);
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
#else
@ -226,7 +226,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
if (newton_pair) {
if (SIZE) {
NPairKokkosBuildFunctorSize<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
int team_size = atoms_per_bin*factor;
int team_size_max = Kokkos::TeamPolicy<DeviceType>(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag());
@ -244,7 +244,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
#endif
} else {
NPairKokkosBuildFunctor<DeviceType,TRI?0:HALF_NEIGH,1,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
int team_size = atoms_per_bin*factor;
int team_size_max = Kokkos::TeamPolicy<DeviceType>(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag());
@ -264,7 +264,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
} else {
if (SIZE) {
NPairKokkosBuildFunctorSize<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
int team_size = atoms_per_bin*factor;
int team_size_max = Kokkos::TeamPolicy<DeviceType>(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag());
@ -282,7 +282,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
#endif
} else {
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
int team_size = atoms_per_bin*factor;
int team_size_max = Kokkos::TeamPolicy<DeviceType>(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag());
@ -529,15 +529,33 @@ void NeighborKokkosExecute<DeviceType>::
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_ENABLE_CUDA
extern __shared__ X_FLOAT sharedmem[];
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_ENABLE_HIP
#include <hip/hip_version.h>
#if HIP_VERSION_MAJOR < 3 || (HIP_VERSION_MAJOR == 3 && HIP_VERSION_MINOR < 7)
// ROCm versions < 3.7 are missing __syncthreads_count, so we define a functional
// but (probably) not performant workaround
__device__ __forceinline__ int __syncthreads_count(int predicate) {
__shared__ int test_block[1];
if (!(threadIdx.x || threadIdx.y || threadIdx.z))
test_block[0] = 0;
__syncthreads();
atomicAdd(test_block, predicate);
__threadfence_block();
return test_block[0];
}
#endif
#endif
#ifdef LMP_KOKKOS_GPU
template<class DeviceType> template<int HalfNeigh,int Newton,int Tri>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
{
#ifdef KOKKOS_ENABLE_HIP
HIP_DYNAMIC_SHARED(X_FLOAT, sharedmem);
#else
extern __shared__ X_FLOAT sharedmem[];
#endif
/* loop over atoms in i's bin,
*/
const int atoms_per_bin = c_bins.extent(1);
@ -971,11 +989,16 @@ void NeighborKokkosExecute<DeviceType>::
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
template<class DeviceType> template<int HalfNeigh,int Newton,int Tri>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
{
#ifdef KOKKOS_ENABLE_HIP
HIP_DYNAMIC_SHARED(X_FLOAT, sharedmem);
#else
extern __shared__ X_FLOAT sharedmem[];
#endif
/* loop over atoms in i's bin,
*/
const int atoms_per_bin = c_bins.extent(1);

View File

@ -310,7 +310,7 @@ class NeighborKokkosExecute
KOKKOS_FUNCTION
void build_ItemSize(const int &i) const;
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
template<int HalfNeigh, int Newton, int Tri>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
@ -387,7 +387,7 @@ struct NPairKokkosBuildFunctor {
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON,TRI>(i);
}
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
__device__ inline
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
@ -445,7 +445,7 @@ struct NPairKokkosBuildFunctorSize {
c.template build_ItemSize<HALF_NEIGH,GHOST_NEWTON,TRI>(i);
}
#ifdef KOKKOS_ENABLE_CUDA
#ifdef LMP_KOKKOS_GPU
__device__ inline
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
c.template build_ItemSizeCuda<HALF_NEIGH,GHOST_NEWTON,TRI>(dev);

View File

@ -112,10 +112,10 @@ void PairDPDfdtEnergyKokkos<DeviceType>::init_style()
#endif
}
#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)
#if (defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__)) || defined(KOKKOS_ENABLE_HIP)
// CUDA specialization of init_style to properly call rand_pool.init()
template<>
void PairDPDfdtEnergyKokkos<Kokkos::Cuda>::init_style()
void PairDPDfdtEnergyKokkos<LMPDeviceSpace>::init_style()
{
PairDPDfdtEnergy::init_style();
@ -125,10 +125,10 @@ void PairDPDfdtEnergyKokkos<Kokkos::Cuda>::init_style()
int irequest = neighbor->nrequest - 1;
neighbor->requests[irequest]->
kokkos_host = std::is_same<Kokkos::Cuda,LMPHostType>::value &&
!std::is_same<Kokkos::Cuda,LMPDeviceType>::value;
kokkos_host = std::is_same<LMPDeviceSpace,LMPHostType>::value &&
!std::is_same<LMPDeviceSpace,LMPDeviceType>::value;
neighbor->requests[irequest]->
kokkos_device = std::is_same<Kokkos::Cuda,LMPDeviceType>::value;
kokkos_device = std::is_same<LMPDeviceSpace,LMPDeviceType>::value;
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;

View File

@ -811,22 +811,22 @@ void PPPMKokkos<DeviceType>::allocate()
// remap takes data from 3d brick to FFT decomposition
int collective_flag = 0; // not yet supported in Kokkos version
int cuda_aware_flag = lmp->kokkos->cuda_aware_flag;
int gpu_aware_flag = lmp->kokkos->gpu_aware_flag;
int tmp;
fft1 = new FFT3dKokkos<DeviceType>(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
0,0,&tmp,collective_flag,cuda_aware_flag);
0,0,&tmp,collective_flag,gpu_aware_flag);
fft2 = new FFT3dKokkos<DeviceType>(lmp,world,nx_pppm,ny_pppm,nz_pppm,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
0,0,&tmp,collective_flag,cuda_aware_flag);
0,0,&tmp,collective_flag,gpu_aware_flag);
remap = new RemapKokkos<DeviceType>(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,FFT_PRECISION,collective_flag,cuda_aware_flag);
1,0,0,FFT_PRECISION,collective_flag,gpu_aware_flag);
// create ghost grid object for rho and electric field communication
// also create 2 bufs for ghost grid cell comm, passed to GridComm methods