diff --git a/src/KOKKOS/npair_ssa_kokkos.cpp b/src/KOKKOS/npair_ssa_kokkos.cpp index 2b33256599..ba4bc9171c 100644 --- a/src/KOKKOS/npair_ssa_kokkos.cpp +++ b/src/KOKKOS/npair_ssa_kokkos.cpp @@ -333,7 +333,10 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu } // count how many ghosts might have neighbors, and increase the work plan storage for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { - nl_size += k_gbincount.h_view(workPhase + 1); + int len = k_gbincount.h_view(workPhase + 1); + ssa_gitemLoc(workPhase,0) = nl_size; // record where workItem starts in ilist + ssa_gitemLen(workPhase,0) = len; + nl_size += len; } list->grow(nl_size); // Make special larger SSA neighbor list @@ -415,22 +418,19 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu Kokkos::deep_copy(data.resize, data.h_resize); Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); -#ifdef NOTYET - NPairSSAKokkosBuildFunctor f(data,atoms_per_bin*5*sizeof(X_FLOAT)); - Kokkos::parallel_for(nall, f); -#endif // loop over bins with local atoms, storing half of the neighbors -#ifdef USE_LAMBDA_BUILD Kokkos::parallel_for(ssa_phaseCt, LAMMPS_LAMBDA (const int workPhase) { data.build_locals_onePhase(firstTry, comm->me, workPhase); }); -#else - NPairSSAKokkosBuildFunctor f(data, firstTry, comm->me); - Kokkos::parallel_for(ssa_phaseCt, f); -#endif data.neigh_list.inum = ssa_itemLoc(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1) + ssa_itemLen(ssa_phaseCt-1,ssa_phaseLen(ssa_phaseCt-1)-1); - data.build_ghosts(); + + // loop over AIR ghost atoms, storing their local neighbors + Kokkos::parallel_for(ssa_gphaseCt, LAMMPS_LAMBDA (const int workPhase) { + data.build_ghosts_onePhase(workPhase); + }); + data.neigh_list.gnum = ssa_gitemLoc(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) + + ssa_gitemLen(ssa_gphaseCt-1,ssa_gphaseLen(ssa_gphaseCt-1)-1) - data.neigh_list.inum; firstTry = false; DeviceType::fence(); @@ -606,34 +606,27 @@ fprintf(stdout, "Phas%03d phase %3d used %6d inums, workItems = %3d, skipped = % template -void NPairSSAKokkosExecute::build_ghosts() +void NPairSSAKokkosExecute::build_ghosts_onePhase(int workPhase) const { - int n = 0; + const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; int which = 0; - int inum = neigh_list.inum; - int gnum = 0; - // loop over AIR ghost atoms, storing their local neighbors // since these are ghosts, must check if stencil bin is out of bounds - for (int workPhase = 0; workPhase < ssa_gphaseCt; workPhase++) { int airnum = workPhase + 1; //FIXME for now, there is only 1 workItem for each ghost AIR int workItem; for (workItem = 0; workItem < 1; ++workItem) { - d_ssa_gitemLoc(workPhase, workItem) = inum + gnum; // record where workItem starts in ilist + int gNdx = d_ssa_gitemLoc(workPhase, workItem); // record where workItem starts in ilist for (int il = 0; il < c_gbincount(airnum); ++il) { const int i = c_gbins(airnum, il); - n = 0; + int n = 0; - const AtomNeighbors neighbors_i = neigh_list.get_neighbors(inum + gnum); + const AtomNeighbors neighbors_i = neigh_list.get_neighbors(gNdx); const X_FLOAT xtmp = x(i, 0); const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); const int itype = type(i); - const typename ArrayTypes::t_int_1d_const_um stencil - = d_stencil; - int loc[3]; const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2), &(loc[0])); @@ -686,8 +679,8 @@ void NPairSSAKokkosExecute::build_ghosts() } if (n > 0) { - neigh_list.d_numneigh(inum + gnum) = n; - neigh_list.d_ilist(inum + (gnum++)) = i; + neigh_list.d_numneigh(gNdx) = n; + neigh_list.d_ilist(gNdx++) = i; if(n > neigh_list.maxneighs) { resize() = 1; if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n); @@ -695,12 +688,10 @@ void NPairSSAKokkosExecute::build_ghosts() } } // record where workItem ends in ilist - d_ssa_gitemLen(workPhase,workItem) = inum + gnum - d_ssa_gitemLoc(workPhase,workItem); + d_ssa_gitemLen(workPhase,workItem) = gNdx - d_ssa_gitemLoc(workPhase,workItem); // if (d_ssa_gitemLen(workPhase,workItem) > 0) workItem++; } d_ssa_gphaseLen(workPhase) = workItem; - } - neigh_list.gnum = gnum; } } diff --git a/src/KOKKOS/npair_ssa_kokkos.h b/src/KOKKOS/npair_ssa_kokkos.h index 62c4135cc7..98046feba8 100644 --- a/src/KOKKOS/npair_ssa_kokkos.h +++ b/src/KOKKOS/npair_ssa_kokkos.h @@ -275,7 +275,7 @@ class NPairSSAKokkosExecute bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; - resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); + resize = typename AT::t_int_scalar("NPairSSAKokkosExecute::resize"); #ifndef KOKKOS_USE_CUDA_UVM h_resize = Kokkos::create_mirror_view(resize); #else @@ -283,7 +283,7 @@ class NPairSSAKokkosExecute #endif h_resize() = 1; new_maxneighs = typename AT:: - t_int_scalar("NeighborKokkosFunctor::new_maxneighs"); + t_int_scalar("NPairSSAKokkosExecute::new_maxneighs"); #ifndef KOKKOS_USE_CUDA_UVM h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); #else @@ -297,7 +297,8 @@ class NPairSSAKokkosExecute KOKKOS_FUNCTION void build_locals_onePhase(const bool firstTry, int me, int workPhase) const; - void build_ghosts(); + KOKKOS_FUNCTION + void build_ghosts_onePhase(int workPhase) const; KOKKOS_INLINE_FUNCTION int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const @@ -351,24 +352,6 @@ class NPairSSAKokkosExecute }; -template -struct NPairSSAKokkosBuildFunctor { - typedef DeviceType device_type; - - const NPairSSAKokkosExecute c; - const bool firstTry; - const int me; - - NPairSSAKokkosBuildFunctor(const NPairSSAKokkosExecute &_c, - const bool _firstTry, const int _me):c(_c), - firstTry(_firstTry), me(_me) {}; - - KOKKOS_INLINE_FUNCTION - void operator() (const int & i) const { - c.build_locals_onePhase(firstTry, me, i); - } -}; - } #endif