From ee2b9f28cb2bbbda831163fd9daada80ddac3b5f Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 3 Mar 2022 08:09:25 -0700 Subject: [PATCH] Switch to vector parallelism for half list in QEq --- src/KOKKOS/fix_qeq_reaxff_kokkos.cpp | 92 +++++++++++++++------------- src/KOKKOS/fix_qeq_reaxff_kokkos.h | 2 +- 2 files changed, 49 insertions(+), 45 deletions(-) diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index bd5baa81f5..8634ebc56a 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -858,6 +858,19 @@ void FixQEqReaxFFKokkos::sparse_matvec_kokkos(typename AT::t_ffloat2 Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); + int teamsize; + int vectorsize; + int leaguesize; + if (execution_space == Host) { + teamsize = 1; + vectorsize = 1; + leaguesize = nn; + } else { + teamsize = FixQEqReaxFFKokkos::spmv_teamsize; + vectorsize = FixQEqReaxFFKokkos::vectorsize; + leaguesize = (nn + teamsize - 1) / (teamsize); + } + if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy(nn,NN),*this); @@ -865,28 +878,14 @@ void FixQEqReaxFFKokkos::sparse_matvec_kokkos(typename AT::t_ffloat2 dup_o.reset_except(d_o); if (neighflag == HALF) - Kokkos::parallel_for(Kokkos::RangePolicy >(0,nn),*this); + Kokkos::parallel_for(Kokkos::TeamPolicy>(leaguesize, teamsize, vectorsize), *this); else if (neighflag == HALFTHREAD) - Kokkos::parallel_for(Kokkos::RangePolicy >(0,nn),*this); + Kokkos::parallel_for(Kokkos::TeamPolicy>(leaguesize, teamsize, vectorsize), *this); if (need_dup) Kokkos::Experimental::contribute(d_o, dup_o); - } else { // FULL - int teamsize; - int vectorsize; - int leaguesize; - if (execution_space == Host) { - teamsize = 1; - vectorsize = 1; - leaguesize = nn; - } else { - teamsize = FixQEqReaxFFKokkos::spmv_teamsize; - vectorsize = FixQEqReaxFFKokkos::vectorsize; - leaguesize = (nn + teamsize - 1) / (teamsize); - } - + } else // FULL Kokkos::parallel_for(Kokkos::TeamPolicy (leaguesize, teamsize, vectorsize), *this); - } } /* ---------------------------------------------------------------------- */ @@ -925,34 +924,39 @@ void FixQEqReaxFFKokkos::operator()(TagQEqZeroQGhosts, const int &i) template template KOKKOS_INLINE_FUNCTION -void FixQEqReaxFFKokkos::operator()(TagQEqSparseMatvec2_Half, const int &ii) const -{ - // The q array is duplicated for OpenMP, atomic for CUDA, and neither for Serial - auto v_o = ScatterViewHelper,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o); - auto a_o = v_o.template access>(); - - const int i = d_ilist[ii]; - if (mask[i] & groupbit) { - F_FLOAT2 tmp; - const auto d_xx_i0 = d_xx(i,0); - const auto d_xx_i1 = d_xx(i,1); - - for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) { - const int j = d_jlist(jj); - const auto d_val_jj = d_val(jj); - if (!(converged & 1)) { - tmp.v[0] += d_val_jj * d_xx(j,0); - a_o(j,0) += d_val_jj * d_xx_i0; - } - if (!(converged & 2)) { - tmp.v[1] += d_val_jj * d_xx(j,1); - a_o(j,1) += d_val_jj * d_xx_i1; - } +void FixQEqReaxFFKokkos::operator()(TagQEqSparseMatvec2_Half, const typename Kokkos::TeamPolicy>::member_type &team) const +{ + int k = team.league_rank() * team.team_size() + team.team_rank(); + if (k < nn) { + // The q array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + auto v_o = ScatterViewHelper,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o); + auto a_o = v_o.template access>(); + + const int i = d_ilist[k]; + if (mask[i] & groupbit) { + F_FLOAT2 tmp; + const double d_xx_i0 = d_xx(i,0); + const double d_xx_i1 = d_xx(i,1); + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT2& tmp) { + const int j = d_jlist(jj); + const auto d_val_jj = d_val(jj); + if (!(converged & 1)) { + tmp.v[0] += d_val_jj * d_xx(j,0); + a_o(j,0) += d_val_jj * d_xx_i0; + } + if (!(converged & 2)) { + tmp.v[1] += d_val_jj * d_xx(j,1); + a_o(j,1) += d_val_jj * d_xx_i1; + } + }, tmp); + Kokkos::single(Kokkos::PerThread(team), [&] () { + if (!(converged & 1)) + a_o(i,0) += tmp.v[0]; + if (!(converged & 2)) + a_o(i,1) += tmp.v[1]; + }); } - if (!(converged & 1)) - a_o(i,0) += tmp.v[0]; - if (!(converged & 2)) - a_o(i,1) += tmp.v[1]; } } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 5c2509129a..bd38522811 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -92,7 +92,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { template KOKKOS_INLINE_FUNCTION - void operator()(TagQEqSparseMatvec2_Half, const int&) const; + void operator()(TagQEqSparseMatvec2_Half, const typename Kokkos::TeamPolicy>::member_type &team) const; typedef typename Kokkos::TeamPolicy::member_type membertype_vec; KOKKOS_INLINE_FUNCTION