diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index aae8592b7a..3ead5a2e34 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -28,8 +28,8 @@ namespace LAMMPS_NS { /* ---------------------------------------------------------------------- */ -template -NPairKokkos::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { +template +NPairKokkos::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { last_stencil_old = -1; @@ -49,8 +49,8 @@ NPairKokkos::NPairKokkos(LAMMPS *lmp) : NP copy needed info from Neighbor class to this build class ------------------------------------------------------------------------- */ -template -void NPairKokkos::copy_neighbor_info() +template +void NPairKokkos::copy_neighbor_info() { NPair::copy_neighbor_info(); @@ -58,7 +58,6 @@ void NPairKokkos::copy_neighbor_info() // general params - newton_pair = force->newton_pair; k_cutneighsq = neighborKK->k_cutneighsq; // overwrite per-type Neighbor cutoffs with custom value set by requestor @@ -93,8 +92,8 @@ void NPairKokkos::copy_neighbor_info() copy per-atom and per-bin vectors from NBin class to this build class ------------------------------------------------------------------------- */ -template -void NPairKokkos::copy_bin_info() +template +void NPairKokkos::copy_bin_info() { NPair::copy_bin_info(); @@ -110,8 +109,8 @@ void NPairKokkos::copy_bin_info() copy needed info from NStencil class to this build class ------------------------------------------------------------------------- */ -template -void NPairKokkos::copy_stencil_info() +template +void NPairKokkos::copy_stencil_info() { NPair::copy_stencil_info(); nstencil = ns->nstencil; @@ -145,8 +144,8 @@ void NPairKokkos::copy_stencil_info() /* ---------------------------------------------------------------------- */ -template -void NPairKokkos::build(NeighList *list_) +template +void NPairKokkos::build(NeighList *list_) { NeighListKokkos* list = (NeighListKokkos*) list_; const int nlocal = includegroup?atom->nfirst:atom->nlocal; @@ -244,7 +243,7 @@ void NPairKokkos::build(NeighList *list_) #endif if (GHOST) { - NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef LMP_KOKKOS_GPU if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; @@ -262,82 +261,42 @@ void NPairKokkos::build(NeighList *list_) Kokkos::parallel_for(nall, f); #endif } else { - if (newton_pair) { - if (SIZE) { - NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor); + if (SIZE) { + NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor); #ifdef LMP_KOKKOS_GPU - if (ExecutionSpaceFromDevice::space == Device) { - int team_size = atoms_per_bin*factor; - int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); - if (team_size <= team_size_max) { - Kokkos::TeamPolicy config((mbins+factor-1)/factor,team_size); - Kokkos::parallel_for(config, f); - } else { // fall back to flat method - f.sharedsize = 0; - Kokkos::parallel_for(nall, f); - } - } else + if (ExecutionSpaceFromDevice::space == Device) { + int team_size = atoms_per_bin*factor; + int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); + if (team_size <= team_size_max) { + Kokkos::TeamPolicy config((mbins+factor-1)/factor,team_size); + Kokkos::parallel_for(config, f); + } else { // fall back to flat method + f.sharedsize = 0; Kokkos::parallel_for(nall, f); -#else + } + } else Kokkos::parallel_for(nall, f); -#endif - } else { - NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef LMP_KOKKOS_GPU - if (ExecutionSpaceFromDevice::space == Device) { - int team_size = atoms_per_bin*factor; - int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); - if (team_size <= team_size_max) { - Kokkos::TeamPolicy config((mbins+factor-1)/factor,team_size); - Kokkos::parallel_for(config, f); - } else { // fall back to flat method - f.sharedsize = 0; - Kokkos::parallel_for(nall, f); - } - } else - Kokkos::parallel_for(nall, f); #else - Kokkos::parallel_for(nall, f); + Kokkos::parallel_for(nall, f); #endif - } } else { - if (SIZE) { - NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); #ifdef LMP_KOKKOS_GPU - if (ExecutionSpaceFromDevice::space == Device) { - int team_size = atoms_per_bin*factor; - int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); - if (team_size <= team_size_max) { - Kokkos::TeamPolicy config((mbins+factor-1)/factor,team_size); - Kokkos::parallel_for(config, f); - } else { // fall back to flat method - f.sharedsize = 0; - Kokkos::parallel_for(nall, f); - } - } else + if (ExecutionSpaceFromDevice::space == Device) { + int team_size = atoms_per_bin*factor; + int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); + if (team_size <= team_size_max) { + Kokkos::TeamPolicy config((mbins+factor-1)/factor,team_size); + Kokkos::parallel_for(config, f); + } else { // fall back to flat method + f.sharedsize = 0; Kokkos::parallel_for(nall, f); -#else + } + } else Kokkos::parallel_for(nall, f); -#endif - } else { - NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); -#ifdef LMP_KOKKOS_GPU - if (ExecutionSpaceFromDevice::space == Device) { - int team_size = atoms_per_bin*factor; - int team_size_max = Kokkos::TeamPolicy(team_size,Kokkos::AUTO).team_size_max(f,Kokkos::ParallelForTag()); - if (team_size <= team_size_max) { - Kokkos::TeamPolicy config((mbins+factor-1)/factor,team_size); - Kokkos::parallel_for(config, f); - } else { // fall back to flat method - f.sharedsize = 0; - Kokkos::parallel_for(nall, f); - } - } else - Kokkos::parallel_for(nall, f); #else - Kokkos::parallel_for(nall, f); + Kokkos::parallel_for(nall, f); #endif - } } } Kokkos::deep_copy(h_scalars, d_scalars); @@ -1402,20 +1361,30 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP } namespace LAMMPS_NS { -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; #ifdef LMP_KOKKOS_GPU -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; -template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; +template class NPairKokkos; #endif } diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index ce63c30c4b..78d735f829 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -13,75 +13,125 @@ #ifdef NPAIR_CLASS // clang-format off -typedef NPairKokkos NPairKokkosFullBinHost; +typedef NPairKokkos NPairKokkosFullBinHost; NPairStyle(full/bin/kk/host, NPairKokkosFullBinHost, - NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI); + NP_BIN | NP_KOKKOS_HOST | NP_FULL | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI); -typedef NPairKokkos NPairKokkosFullBinDevice; +typedef NPairKokkos NPairKokkosFullBinDevice; NPairStyle(full/bin/kk/device, NPairKokkosFullBinDevice, - NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI); + NP_BIN | NP_KOKKOS_DEVICE | NP_FULL | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI); -typedef NPairKokkos NPairKokkosFullBinGhostHost; +typedef NPairKokkos NPairKokkosFullBinGhostHost; NPairStyle(full/bin/ghost/kk/host, NPairKokkosFullBinGhostHost, - NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI); + NP_BIN | NP_KOKKOS_HOST | NP_FULL | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI); -typedef NPairKokkos NPairKokkosFullBinGhostDevice; +typedef NPairKokkos NPairKokkosFullBinGhostDevice; NPairStyle(full/bin/ghost/kk/device, NPairKokkosFullBinGhostDevice, - NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI); + NP_BIN | NP_KOKKOS_DEVICE | NP_FULL | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI); -typedef NPairKokkos NPairKokkosHalfBinHost; -NPairStyle(half/bin/kk/host, - NPairKokkosHalfBinHost, - NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO); +typedef NPairKokkos NPairKokkosHalfBinNewtonHost; +NPairStyle(half/bin/newton/kk/host, + NPairKokkosHalfBinNewtonHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTON | NP_ORTHO); -typedef NPairKokkos NPairKokkosHalfBinDevice; -NPairStyle(half/bin/kk/device, - NPairKokkosHalfBinDevice, - NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO); +typedef NPairKokkos NPairKokkosHalfBinNewtoffHost; +NPairStyle(half/bin/newtoff/kk/host, + NPairKokkosHalfBinNewtoffHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTOFF | NP_ORTHO); -typedef NPairKokkos NPairKokkosHalfBinHostTri; -NPairStyle(half/bin/kk/host, - NPairKokkosHalfBinHostTri, - NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI); +typedef NPairKokkos NPairKokkosHalfBinNewtonDevice; +NPairStyle(half/bin/newton/kk/device, + NPairKokkosHalfBinNewtonDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTON | NP_ORTHO); -typedef NPairKokkos NPairKokkosHalfBinDeviceTri; -NPairStyle(half/bin/kk/device, - NPairKokkosHalfBinDeviceTri, - NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI); +typedef NPairKokkos NPairKokkosHalfBinNewtoffDevice; +NPairStyle(half/bin/newtoff/kk/device, + NPairKokkosHalfBinNewtoffDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTOFF | NP_ORTHO); -typedef NPairKokkos NPairKokkosHalfBinGhostHost; -NPairStyle(half/bin/ghost/kk/host, - NPairKokkosHalfBinGhostHost, - NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI); +typedef NPairKokkos NPairKokkosHalfBinNewtonTriHost; +NPairStyle(half/bin/newton/kk/host, + NPairKokkosHalfBinNewtonTriHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTON | NP_TRI); -typedef NPairKokkos NPairKokkosHalfBinGhostDevice; -NPairStyle(half/bin/ghost/kk/device, - NPairKokkosHalfBinGhostDevice, - NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI); +typedef NPairKokkos NPairKokkosHalfBinNewtoffTriHost; +NPairStyle(half/bin/newtoff/kk/host, + NPairKokkosHalfBinNewtoffTriHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTOFF | NP_TRI); -typedef NPairKokkos NPairKokkosHalfSizeBinHost; -NPairStyle(half/size/bin/kk/host, - NPairKokkosHalfSizeBinHost, - NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO); +typedef NPairKokkos NPairKokkosHalfBinNewtonTriDevice; +NPairStyle(half/bin/newton/kk/device, + NPairKokkosHalfBinNewtonTriDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTON | NP_TRI); -typedef NPairKokkos NPairKokkosHalfSizeBinDevice; -NPairStyle(half/size/bin/kk/device, - NPairKokkosHalfSizeBinDevice, - NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO); +typedef NPairKokkos NPairKokkosHalfBinNewtoffTriDevice; +NPairStyle(half/bin/newtoff/kk/device, + NPairKokkosHalfBinNewtoffTriDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTOFF | NP_TRI); -typedef NPairKokkos NPairKokkosHalfSizeBinHostTri; -NPairStyle(half/size/bin/kk/host, - NPairKokkosHalfSizeBinHostTri, - NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_TRI); +typedef NPairKokkos NPairKokkosHalfBinNewtonGhostHost; +NPairStyle(half/bin/newton/ghost/kk/host, + NPairKokkosHalfBinNewtonGhostHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTON | NP_GHOST | NP_ORTHO | NP_TRI); -typedef NPairKokkos NPairKokkosHalfSizeBinDeviceTri; -NPairStyle(half/size/bin/kk/device, - NPairKokkosHalfSizeBinDeviceTri, - NP_HALF | NP_SIZE | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_TRI); +typedef NPairKokkos NPairKokkosHalfBinNewtoffGhostHost; +NPairStyle(half/bin/newtoff/ghost/kk/host, + NPairKokkosHalfBinNewtoffGhostHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI); + +typedef NPairKokkos NPairKokkosHalfBinNewtonGhostDevice; +NPairStyle(half/bin/newton/ghost/kk/device, + NPairKokkosHalfBinNewtonGhostDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTON | NP_GHOST | NP_ORTHO | NP_TRI); + +typedef NPairKokkos NPairKokkosHalfBinNewtoffGhostDevice; +NPairStyle(half/bin/newtoff/ghost/kk/device, + NPairKokkosHalfBinNewtoffGhostDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI); + +typedef NPairKokkos NPairKokkosHalfBinNewtonSizeHost; +NPairStyle(half/bin/newton/size/kk/host, + NPairKokkosHalfBinNewtonSizeHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTON | NP_SIZE | NP_ORTHO); + +typedef NPairKokkos NPairKokkosHalfBinNewtoffSizeHost; +NPairStyle(half/bin/newtoff/size/kk/host, + NPairKokkosHalfBinNewtoffSizeHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTOFF | NP_SIZE | NP_ORTHO); + +typedef NPairKokkos NPairKokkosHalfBinNewtonSizeDevice; +NPairStyle(half/bin/newton/size/kk/device, + NPairKokkosHalfBinNewtonSizeDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTON | NP_SIZE | NP_ORTHO); + +typedef NPairKokkos NPairKokkosHalfBinNewtoffSizeDevice; +NPairStyle(half/bin/newtoff/size/kk/device, + NPairKokkosHalfBinNewtoffSizeDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTOFF | NP_SIZE | NP_ORTHO); + +typedef NPairKokkos NPairKokkosHalfBinNewtonSizeTriHost; +NPairStyle(half/bin/newton/size/kk/host, + NPairKokkosHalfBinNewtonSizeTriHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTON | NP_SIZE | NP_TRI); + +typedef NPairKokkos NPairKokkosHalfBinNewtoffSizeTriHost; +NPairStyle(half/bin/newtoff/size/kk/host, + NPairKokkosHalfBinNewtoffSizeTriHost, + NP_BIN | NP_KOKKOS_HOST | NP_HALF | NP_NEWTOFF | NP_SIZE | NP_TRI); + +typedef NPairKokkos NPairKokkosHalfBinNewtonSizeTriDevice; +NPairStyle(half/bin/newton/size/kk/device, + NPairKokkosHalfBinNewtonSizeTriDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTON | NP_SIZE | NP_TRI); + +typedef NPairKokkos NPairKokkosHalfBinNewtoffSizeTriDevice; +NPairStyle(half/bin/newtoff/size/kk/device, + NPairKokkosHalfBinNewtoffSizeTriDevice, + NP_KOKKOS_DEVICE | NP_HALF | NP_BIN | NP_NEWTOFF | NP_SIZE | NP_TRI); // clang-format on #else @@ -94,7 +144,7 @@ NPairStyle(half/size/bin/kk/device, namespace LAMMPS_NS { -template +template class NPairKokkos : public NPair { typedef ArrayTypes AT; @@ -379,7 +429,7 @@ class NeighborKokkosExecute }; -template +template struct NPairKokkosBuildFunctor { typedef DeviceType device_type; @@ -392,19 +442,19 @@ struct NPairKokkosBuildFunctor { KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { - c.template build_Item(i); + c.template build_Item(i); } #ifdef LMP_KOKKOS_GPU LAMMPS_DEVICE_FUNCTION inline void operator() (typename Kokkos::TeamPolicy::member_type dev) const { - c.template build_ItemGPU(dev, sharedsize); + c.template build_ItemGPU(dev, sharedsize); } size_t team_shmem_size(const int team_size) const { (void) team_size; return sharedsize; } #endif }; -template -struct NPairKokkosBuildFunctor { +template +struct NPairKokkosBuildFunctor { typedef LMPHostType device_type; const NeighborKokkosExecute c; @@ -416,7 +466,7 @@ struct NPairKokkosBuildFunctor { KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { - c.template build_Item(i); + c.template build_Item(i); } void operator() (typename Kokkos::TeamPolicy::member_type /*dev*/) const {} // Should error out @@ -466,7 +516,7 @@ struct NPairKokkosBuildFunctorGhost { void operator() (typename Kokkos::TeamPolicy::member_type /*dev*/) const {} // Should error out }; -template +template struct NPairKokkosBuildFunctorSize { typedef DeviceType device_type; @@ -478,20 +528,20 @@ struct NPairKokkosBuildFunctorSize { KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { - c.template build_ItemSize(i); + c.template build_ItemSize(i); } #ifdef LMP_KOKKOS_GPU LAMMPS_DEVICE_FUNCTION inline void operator() (typename Kokkos::TeamPolicy::member_type dev) const { - c.template build_ItemSizeGPU(dev, sharedsize); + c.template build_ItemSizeGPU(dev, sharedsize); } size_t team_shmem_size(const int team_size) const { (void) team_size; return sharedsize; } #endif }; -template -struct NPairKokkosBuildFunctorSize { +template +struct NPairKokkosBuildFunctorSize { typedef LMPHostType device_type; const NeighborKokkosExecute c; @@ -502,7 +552,7 @@ struct NPairKokkosBuildFunctorSize { KOKKOS_INLINE_FUNCTION void operator() (const int & i) const { - c.template build_ItemSize(i); + c.template build_ItemSize(i); } void operator() (typename Kokkos::TeamPolicy::member_type /*dev*/) const {} // Should error out