This commit is contained in:
Stan Gerald Moore
2022-02-21 13:11:37 -07:00
parent e10ebf5236
commit 4a2dc4f14a
4 changed files with 22 additions and 27 deletions

View File

@ -21,7 +21,7 @@
- Reduced math overhead: enabled specialized calls (e.g., cbrt for a
cube root instead of pow) and use power/exponential laws to reduce the
number of exponentials evaluated, etc.
- Fused the CG solve for "s" and "t" vectors
- Fused the CG solve for "S" and "T" matrices
- Improved the SpMV algorithm by using vector instead of team level
parallelism on GPUs
------------------------------------------------------------------------- */
@ -384,7 +384,7 @@ void FixQEqReaxFFKokkos<DeviceType>::allocate_array()
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqZero, const int &ii) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqZero, const int &ii) const
{
const int i = d_ilist[ii];
const int itype = type(i);
@ -707,7 +707,7 @@ double FixQEqReaxFFKokkos<DeviceType>::calculate_H_k(const F_FLOAT &r, const F_F
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqInitMatvec, const int &ii) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqInitMatvec, const int &ii) const
{
const int i = d_ilist[ii];
const int itype = type(i);
@ -896,7 +896,7 @@ void FixQEqReaxFFKokkos<DeviceType>::sparse_matvec_kokkos(typename AT::t_ffloat2
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSparseMatvec1, const int &ii) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec1, const int &ii) const
{
const int i = d_ilist[ii];
const int itype = type(i);
@ -913,7 +913,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSparseMatvec1, const int
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqZeroQGhosts, const int &i) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqZeroQGhosts, const int &i) const
{
if (mask[i] & groupbit) {
if (!(converged & 1))
@ -928,7 +928,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqZeroQGhosts, const int &i
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSparseMatvec2_Half<NEIGHFLAG>, const int &ii) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec2_Half<NEIGHFLAG>, const int &ii) const
{
// The q array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
auto v_o = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_o),decltype(ndup_o)>::get(dup_o,ndup_o);
@ -963,7 +963,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSparseMatvec2_Half<NEIGHF
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSparseMatvec2_Full, const membertype_vec &team) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec2_Full, const membertype_vec &team) const
{
int k = team.league_rank () * team.team_size () + team.team_rank ();
if (k < nn) {
@ -992,7 +992,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSparseMatvec2_Full, const
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqNorm1, const int &ii, F_FLOAT2& out) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqNorm1, const int &ii, F_FLOAT2& out) const
{
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
@ -1015,7 +1015,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqNorm1, const int &ii, F_F
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqDot1, const int &ii, F_FLOAT2& out) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqDot1, const int &ii, F_FLOAT2& out) const
{
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
@ -1030,7 +1030,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqDot1, const int &ii, F_FL
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqDot2, const int &ii, F_FLOAT2& out) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqDot2, const int &ii, F_FLOAT2& out) const
{
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
@ -1045,7 +1045,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqDot2, const int &ii, F_FL
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqDot3, const int &ii, F_FLOAT2& out) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqDot3, const int &ii, F_FLOAT2& out) const
{
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
@ -1071,7 +1071,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqDot3, const int &ii, F_FL
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSum1, const int &ii) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSum1, const int &ii) const
{
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
@ -1086,7 +1086,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSum1, const int &ii) cons
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSum2, const int &ii, F_FLOAT2& out) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSum2, const int &ii, F_FLOAT2& out) const
{
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
@ -1099,7 +1099,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqSum2, const int &ii, F_FL
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::operator() (TagQEqCalculateQ, const int &ii) const
void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqCalculateQ, const int &ii) const
{
const int i = d_ilist[ii];
if (mask[i] & groupbit) {

View File

@ -3417,8 +3417,6 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxComputeTorsionBlocking<
for (int d = 0; d < 3; d++) a_f(i,d) += fitmp[d];
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION

View File

@ -333,7 +333,6 @@ void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeShortNeigh,
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
@ -341,8 +340,8 @@ void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFL
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
const auto v_f = ScatterViewHelper<typename NeedDup<NEIGHFLAG,DeviceType>::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
const auto a_f = v_f.template access<typename AtomicDup<NEIGHFLAG,DeviceType>::value>();
const auto v_f = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
const auto a_f = v_f.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
const int i = d_ilist[ii];
if (i >= nlocal) return;
@ -401,7 +400,7 @@ void PairTersoffKokkos<DeviceType>::operator()(TagPairTersoffComputeHalf<NEIGHFL
F_FLOAT fa, dfa, bij, prefactor;
ters_fa_k_and_ters_dfa(itype,jtype,jtype,rij,fa,dfa);
ters_bij_k_and_ters_dbij(itype,jtype,jtype, bo_ij, bij, prefactor);
ters_bij_k_and_ters_dbij(itype,jtype,jtype,bo_ij,bij,prefactor);
const F_FLOAT fatt = -0.5*bij * dfa / rij;
prefactor = 0.5*fa * prefactor;
@ -862,9 +861,6 @@ void PairTersoffKokkos<DeviceType>::ters_fc_k_and_ters_dfc(const int &i, const i
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
double PairTersoffKokkos<DeviceType>::bondorder(const int &i, const int &j, const int &k,
@ -935,7 +931,6 @@ void PairTersoffKokkos<DeviceType>::
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
double PairTersoffKokkos<DeviceType>::ters_fa_k(const int &i, const int &j,
@ -960,7 +955,6 @@ double PairTersoffKokkos<DeviceType>::ters_dfa(const int &i, const int &j,
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairTersoffKokkos<DeviceType>::ters_fa_k_and_ters_dfa(const int &i, const int &j,
@ -978,6 +972,7 @@ void PairTersoffKokkos<DeviceType>::ters_fa_k_and_ters_dfa(const int &i, const i
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
@ -1057,6 +1052,8 @@ void PairTersoffKokkos<DeviceType>::ters_bij_k_and_ters_dbij(const int &i, const
prefactor = -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*prm_ijk_pn)))*tmp_n / bo;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairTersoffKokkos<DeviceType>::ters_dthb(