Switch to vector parallelism for half list in QEq
This commit is contained in:
@ -858,6 +858,19 @@ void FixQEqReaxFFKokkos<DeviceType>::sparse_matvec_kokkos(typename AT::t_ffloat2
|
|||||||
|
|
||||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqSparseMatvec1>(0,nn),*this);
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqSparseMatvec1>(0,nn),*this);
|
||||||
|
|
||||||
|
int teamsize;
|
||||||
|
int vectorsize;
|
||||||
|
int leaguesize;
|
||||||
|
if (execution_space == Host) {
|
||||||
|
teamsize = 1;
|
||||||
|
vectorsize = 1;
|
||||||
|
leaguesize = nn;
|
||||||
|
} else {
|
||||||
|
teamsize = FixQEqReaxFFKokkos<DeviceType>::spmv_teamsize;
|
||||||
|
vectorsize = FixQEqReaxFFKokkos<DeviceType>::vectorsize;
|
||||||
|
leaguesize = (nn + teamsize - 1) / (teamsize);
|
||||||
|
}
|
||||||
|
|
||||||
if (neighflag != FULL) {
|
if (neighflag != FULL) {
|
||||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqZeroQGhosts>(nn,NN),*this);
|
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqZeroQGhosts>(nn,NN),*this);
|
||||||
|
|
||||||
@ -865,28 +878,14 @@ void FixQEqReaxFFKokkos<DeviceType>::sparse_matvec_kokkos(typename AT::t_ffloat2
|
|||||||
dup_o.reset_except(d_o);
|
dup_o.reset_except(d_o);
|
||||||
|
|
||||||
if (neighflag == HALF)
|
if (neighflag == HALF)
|
||||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqSparseMatvec2_Half<HALF> >(0,nn),*this);
|
Kokkos::parallel_for(Kokkos::TeamPolicy<DeviceType, TagQEqSparseMatvec2_Half<HALF>>(leaguesize, teamsize, vectorsize), *this);
|
||||||
else if (neighflag == HALFTHREAD)
|
else if (neighflag == HALFTHREAD)
|
||||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagQEqSparseMatvec2_Half<HALFTHREAD> >(0,nn),*this);
|
Kokkos::parallel_for(Kokkos::TeamPolicy<DeviceType, TagQEqSparseMatvec2_Half<HALFTHREAD>>(leaguesize, teamsize, vectorsize), *this);
|
||||||
|
|
||||||
if (need_dup)
|
if (need_dup)
|
||||||
Kokkos::Experimental::contribute(d_o, dup_o);
|
Kokkos::Experimental::contribute(d_o, dup_o);
|
||||||
} else { // FULL
|
} else // FULL
|
||||||
int teamsize;
|
|
||||||
int vectorsize;
|
|
||||||
int leaguesize;
|
|
||||||
if (execution_space == Host) {
|
|
||||||
teamsize = 1;
|
|
||||||
vectorsize = 1;
|
|
||||||
leaguesize = nn;
|
|
||||||
} else {
|
|
||||||
teamsize = FixQEqReaxFFKokkos<DeviceType>::spmv_teamsize;
|
|
||||||
vectorsize = FixQEqReaxFFKokkos<DeviceType>::vectorsize;
|
|
||||||
leaguesize = (nn + teamsize - 1) / (teamsize);
|
|
||||||
}
|
|
||||||
|
|
||||||
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagQEqSparseMatvec2_Full>(leaguesize, teamsize, vectorsize), *this);
|
Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagQEqSparseMatvec2_Full>(leaguesize, teamsize, vectorsize), *this);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -925,34 +924,39 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqZeroQGhosts, const int &i)
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
template<int NEIGHFLAG>
|
template<int NEIGHFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec2_Half<NEIGHFLAG>, const int &ii) const
|
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec2_Half<NEIGHFLAG>, const typename Kokkos::TeamPolicy<DeviceType, TagQEqSparseMatvec2_Half<NEIGHFLAG>>::member_type &team) const
|
||||||
{
|
{
|
||||||
// The q array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
int k = team.league_rank() * team.team_size() + team.team_rank();
|
||||||
auto v_o = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o);
|
if (k < nn) {
|
||||||
auto a_o = v_o.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
// The q array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||||
|
auto v_o = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o);
|
||||||
const int i = d_ilist[ii];
|
auto a_o = v_o.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||||
if (mask[i] & groupbit) {
|
|
||||||
F_FLOAT2 tmp;
|
const int i = d_ilist[k];
|
||||||
const auto d_xx_i0 = d_xx(i,0);
|
if (mask[i] & groupbit) {
|
||||||
const auto d_xx_i1 = d_xx(i,1);
|
F_FLOAT2 tmp;
|
||||||
|
const double d_xx_i0 = d_xx(i,0);
|
||||||
for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
const double d_xx_i1 = d_xx(i,1);
|
||||||
const int j = d_jlist(jj);
|
|
||||||
const auto d_val_jj = d_val(jj);
|
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT2& tmp) {
|
||||||
if (!(converged & 1)) {
|
const int j = d_jlist(jj);
|
||||||
tmp.v[0] += d_val_jj * d_xx(j,0);
|
const auto d_val_jj = d_val(jj);
|
||||||
a_o(j,0) += d_val_jj * d_xx_i0;
|
if (!(converged & 1)) {
|
||||||
}
|
tmp.v[0] += d_val_jj * d_xx(j,0);
|
||||||
if (!(converged & 2)) {
|
a_o(j,0) += d_val_jj * d_xx_i0;
|
||||||
tmp.v[1] += d_val_jj * d_xx(j,1);
|
}
|
||||||
a_o(j,1) += d_val_jj * d_xx_i1;
|
if (!(converged & 2)) {
|
||||||
}
|
tmp.v[1] += d_val_jj * d_xx(j,1);
|
||||||
|
a_o(j,1) += d_val_jj * d_xx_i1;
|
||||||
|
}
|
||||||
|
}, tmp);
|
||||||
|
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||||
|
if (!(converged & 1))
|
||||||
|
a_o(i,0) += tmp.v[0];
|
||||||
|
if (!(converged & 2))
|
||||||
|
a_o(i,1) += tmp.v[1];
|
||||||
|
});
|
||||||
}
|
}
|
||||||
if (!(converged & 1))
|
|
||||||
a_o(i,0) += tmp.v[0];
|
|
||||||
if (!(converged & 2))
|
|
||||||
a_o(i,1) += tmp.v[1];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -92,7 +92,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
|||||||
|
|
||||||
template<int NEIGHFLAG>
|
template<int NEIGHFLAG>
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
void operator()(TagQEqSparseMatvec2_Half<NEIGHFLAG>, const int&) const;
|
void operator()(TagQEqSparseMatvec2_Half<NEIGHFLAG>, const typename Kokkos::TeamPolicy<DeviceType, TagQEqSparseMatvec2_Half<NEIGHFLAG>>::member_type &team) const;
|
||||||
|
|
||||||
typedef typename Kokkos::TeamPolicy<DeviceType, TagQEqSparseMatvec2_Full>::member_type membertype_vec;
|
typedef typename Kokkos::TeamPolicy<DeviceType, TagQEqSparseMatvec2_Full>::member_type membertype_vec;
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
|
|||||||
Reference in New Issue
Block a user