Merge pull request #2997 from stanmoore1/kk_omp_target
Add preliminary support for Kokkos OpenMPTarget backend
This commit is contained in:
@ -1966,7 +1966,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::get_chi_field()
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class FixACKS2ReaxFFKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class FixACKS2ReaxFFKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -299,7 +299,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor {
|
||||
c.template compute_h_team<NEIGHFLAG>(team, atoms_per_team, vector_length);
|
||||
}
|
||||
|
||||
size_t team_shmem_size(int team_size) const {
|
||||
size_t team_shmem_size(int /*team_size*/) const {
|
||||
size_t shmem_size =
|
||||
Kokkos::View<int *, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(
|
||||
atoms_per_team) + // s_ilist
|
||||
@ -347,7 +347,7 @@ struct FixACKS2ReaxFFKokkosComputeXFunctor {
|
||||
c.template compute_x_team<NEIGHFLAG>(team, atoms_per_team, vector_length);
|
||||
}
|
||||
|
||||
size_t team_shmem_size(int team_size) const {
|
||||
size_t team_shmem_size(int /*team_size*/) const {
|
||||
size_t shmem_size =
|
||||
Kokkos::View<int *, scratch_space, Kokkos::MemoryUnmanaged>::shmem_size(
|
||||
atoms_per_team) + // s_ilist
|
||||
|
||||
@ -30,7 +30,6 @@ FixStyle(qeq/reax/kk/host,FixQEqReaxFFKokkos<LMPHostType>);
|
||||
#include "kokkos_type.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "kokkos_base.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
@ -42,7 +41,7 @@ struct TagFixQEqReaxFFPackForwardComm {};
|
||||
struct TagFixQEqReaxFFUnpackForwardComm {};
|
||||
|
||||
template<class DeviceType>
|
||||
class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
class FixQEqReaxFFKokkos : public FixQEqReaxFF {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
@ -44,7 +44,7 @@
|
||||
#define GPU_AWARE_UNKNOWN static int have_gpu_aware = -1;
|
||||
|
||||
// TODO HIP: implement HIP-aware MPI support (UCX) detection
|
||||
#if defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL)
|
||||
#if defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
GPU_AWARE_UNKNOWN
|
||||
#elif defined(KOKKOS_ENABLE_CUDA)
|
||||
|
||||
@ -121,7 +121,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
} else if (strcmp(arg[iarg],"g") == 0 ||
|
||||
strcmp(arg[iarg],"gpus") == 0) {
|
||||
#ifndef LMP_KOKKOS_GPU
|
||||
error->all(FLERR,"GPUs are requested but Kokkos has not been compiled for CUDA, HIP, or SYCL");
|
||||
error->all(FLERR,"GPUs are requested but Kokkos has not been compiled using a GPU-enabled backend");
|
||||
#endif
|
||||
if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args");
|
||||
ngpus = atoi(arg[iarg+1]);
|
||||
@ -162,7 +162,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
if (ngpus > 1 && !set_flag)
|
||||
error->all(FLERR,"Could not determine local MPI rank for multiple "
|
||||
"GPUs with Kokkos CUDA, HIP, or SYCL because MPI library not recognized");
|
||||
"GPUs with Kokkos because MPI library not recognized");
|
||||
|
||||
} else if (strcmp(arg[iarg],"t") == 0 ||
|
||||
strcmp(arg[iarg],"threads") == 0) {
|
||||
@ -204,7 +204,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
if (ngpus <= 0)
|
||||
error->all(FLERR,"Kokkos has been compiled for CUDA, HIP, or SYCL but no GPUs are requested");
|
||||
error->all(FLERR,"Kokkos has been compiled with GPU-enabled backend but no GPUs are requested");
|
||||
#endif
|
||||
|
||||
#ifndef KOKKOS_ENABLE_SERIAL
|
||||
@ -311,7 +311,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI");
|
||||
#else
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"Kokkos with CUDA, HIP, or SYCL assumes CUDA-aware MPI is available,"
|
||||
error->warning(FLERR,"Kokkos with GPU-enabled backend assumes GPU-aware MPI is available,"
|
||||
" but cannot determine if this is the case\n try"
|
||||
" '-pk kokkos gpu/aware off' if getting segmentation faults");
|
||||
|
||||
|
||||
@ -87,7 +87,7 @@ E: Invalid Kokkos command-line args
|
||||
|
||||
Self-explanatory. See Section 2.7 of the manual for details.
|
||||
|
||||
E: Could not determine local MPI rank for multiple GPUs with Kokkos CUDA
|
||||
E: Could not determine local MPI rank for multiple GPUs with Kokkos
|
||||
because MPI library not recognized
|
||||
|
||||
The local MPI rank was not found in one of four supported environment variables.
|
||||
@ -96,13 +96,13 @@ E: Invalid number of threads requested for Kokkos: must be 1 or greater
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: GPUs are requested but Kokkos has not been compiled for CUDA
|
||||
E: GPUs are requested but Kokkos has not been compiled using GPU-enabled backend
|
||||
|
||||
Recompile Kokkos with CUDA support to use GPUs.
|
||||
Recompile Kokkos with GPU-enabled backend to use GPUs.
|
||||
|
||||
E: Kokkos has been compiled for CUDA, HIP, or SYCL but no GPUs are requested
|
||||
E: Kokkos has been compiled with GPU-enabled backend but no GPUs are requested
|
||||
|
||||
One or more GPUs must be used when Kokkos is compiled for CUDA/HIP/SYCL.
|
||||
One or more GPUs must be used when Kokkos is compiled for CUDA/HIP/SYCL/OpenMPTarget.
|
||||
|
||||
W: Kokkos package already initalized, cannot reinitialize with different parameters
|
||||
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
#include <Kokkos_Timer.hpp>
|
||||
#include <Kokkos_Vectorization.hpp>
|
||||
#include <Kokkos_ScatterView.hpp>
|
||||
#include <Kokkos_UnorderedMap.hpp>
|
||||
@ -34,7 +34,7 @@ constexpr int HALF = 4;
|
||||
#define ISFINITE(x) std::isfinite(x)
|
||||
#endif
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL)
|
||||
#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) || defined(KOKKOS_ENABLE_SYCL) || defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
#define LMP_KOKKOS_GPU
|
||||
#endif
|
||||
|
||||
@ -223,6 +223,11 @@ template<>
|
||||
struct ExecutionSpaceFromDevice<Kokkos::Experimental::SYCL> {
|
||||
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
|
||||
};
|
||||
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
template<>
|
||||
struct ExecutionSpaceFromDevice<Kokkos::Experimental::OpenMPTarget> {
|
||||
static const LAMMPS_NS::ExecutionSpace space = LAMMPS_NS::Device;
|
||||
};
|
||||
#endif
|
||||
|
||||
// set host pinned space
|
||||
@ -232,6 +237,8 @@ typedef Kokkos::CudaHostPinnedSpace LMPPinnedHostType;
|
||||
typedef Kokkos::Experimental::HIPHostPinnedSpace LMPPinnedHostType;
|
||||
#elif defined(KOKKOS_ENABLE_SYCL)
|
||||
typedef Kokkos::Experimental::SYCLSharedUSMSpace LMPPinnedHostType;
|
||||
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
typedef Kokkos::Serial LMPPinnedHostType;
|
||||
#endif
|
||||
|
||||
// create simple LMPDeviceSpace typedef for non CUDA-, HIP-, or SYCL-specific
|
||||
@ -242,6 +249,8 @@ typedef Kokkos::Cuda LMPDeviceSpace;
|
||||
typedef Kokkos::Experimental::HIP LMPDeviceSpace;
|
||||
#elif defined(KOKKOS_ENABLE_SYCL)
|
||||
typedef Kokkos::Experimental::SYCL LMPDeviceSpace;
|
||||
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
typedef Kokkos::Experimental::OpenMPTarget LMPDeviceSpace;
|
||||
#endif
|
||||
|
||||
|
||||
@ -280,6 +289,11 @@ template<>
|
||||
struct AtomicDup<HALFTHREAD,Kokkos::Experimental::SYCL> {
|
||||
using value = Kokkos::Experimental::ScatterAtomic;
|
||||
};
|
||||
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
template<>
|
||||
struct AtomicDup<HALFTHREAD,Kokkos::Experimental::OpenMPTarget> {
|
||||
using value = Kokkos::Experimental::ScatterAtomic;
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef LMP_KOKKOS_USE_ATOMICS
|
||||
|
||||
@ -226,7 +226,8 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
||||
data.h_resize() = 0;
|
||||
|
||||
Kokkos::deep_copy(d_scalars, h_scalars);
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
|
||||
#define BINS_PER_BLOCK 2
|
||||
const int factor = atoms_per_bin<64?2:1;
|
||||
#else
|
||||
@ -605,14 +606,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
|
||||
other_x[MY_II + 3 * atoms_per_bin] = itype;
|
||||
}
|
||||
other_id[MY_II] = i;
|
||||
#ifndef KOKKOS_ENABLE_SYCL
|
||||
#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
|
||||
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
|
||||
|
||||
if (test) return;
|
||||
#else
|
||||
#elif defined(KOKKOS_ENABLE_SYCL)
|
||||
int not_done = (i >= 0 && i <= nlocal);
|
||||
dev.team_reduce(Kokkos::Max<int>(not_done));
|
||||
if(not_done == 0) return;
|
||||
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
dev.team_barrier();
|
||||
#endif
|
||||
|
||||
if (i >= 0 && i < nlocal) {
|
||||
@ -1055,14 +1057,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
|
||||
other_x[MY_II + 4 * atoms_per_bin] = radi;
|
||||
}
|
||||
other_id[MY_II] = i;
|
||||
#ifndef KOKKOS_ENABLE_SYCL
|
||||
#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
|
||||
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
|
||||
|
||||
if (test) return;
|
||||
#else
|
||||
#elif defined(KOKKOS_ENABLE_SYCL)
|
||||
int not_done = (i >= 0 && i <= nlocal);
|
||||
dev.team_reduce(Kokkos::Max<int>(not_done));
|
||||
if(not_done == 0) return;
|
||||
#elif defined(KOKKOS_ENABLE_OPENMPTARGET)
|
||||
dev.team_barrier();
|
||||
#endif
|
||||
|
||||
if (i >= 0 && i < nlocal) {
|
||||
|
||||
@ -283,7 +283,7 @@ void VerletKokkos::run(int n)
|
||||
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
//static double time = 0.0;
|
||||
//Kokkos::Impl::Timer ktimer;
|
||||
//Kokkos::Timer ktimer;
|
||||
|
||||
timer->init_timeout();
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -445,7 +445,7 @@ void VerletKokkos::run(int n)
|
||||
if (pair_compute_flag) {
|
||||
atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
|
||||
atomKK->sync(force->pair->execution_space,~(~force->pair->datamask_read|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
|
||||
Kokkos::Impl::Timer ktimer;
|
||||
Kokkos::Timer ktimer;
|
||||
force->pair->compute(eflag,vflag);
|
||||
atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
|
||||
atomKK->modified(force->pair->execution_space,~(~force->pair->datamask_modify|(F_MASK | ENERGY_MASK | VIRIAL_MASK)));
|
||||
|
||||
Reference in New Issue
Block a user