Fix integer overflow for large ReaxFF systems with KOKKOS package

This commit is contained in:
Stan Moore
2024-06-26 15:14:55 -04:00
parent acc28e01c3
commit 7da7e69ccb
7 changed files with 128 additions and 95 deletions

View File

@ -53,7 +53,8 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK;
datamask_modify = Q_MASK | X_MASK;
nmax = m_cap = 0;
nmax = 0;
m_cap_big = 0;
allocated_flag = 0;
nprev = 4;
@ -66,7 +67,7 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
buf = new double[2*nprev];
prev_last_rows_rank = 0;
d_mfill_offset = typename AT::t_int_scalar("acks2/kk:mfill_offset");
d_mfill_offset = typename AT::t_bigint_scalar("acks2/kk:mfill_offset");
}
/* ---------------------------------------------------------------------- */
@ -418,10 +419,10 @@ void FixACKS2ReaxFFKokkos<DeviceType>::pre_force(int /*vflag*/)
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixACKS2ReaxFFKokkos<DeviceType>::num_neigh_item(int ii, int &maxneigh) const
void FixACKS2ReaxFFKokkos<DeviceType>::num_neigh_item(int ii, bigint &totneigh) const
{
const int i = d_ilist[ii];
maxneigh += d_numneigh[i];
totneigh += d_numneigh[i];
}
/* ---------------------------------------------------------------------- */
@ -433,39 +434,39 @@ void FixACKS2ReaxFFKokkos<DeviceType>::allocate_matrix()
// determine the total space for the H matrix
m_cap = 0;
m_cap_big = 0;
// limit scope of functor to allow deallocation of views
{
FixACKS2ReaxFFKokkosNumNeighFunctor<DeviceType> neigh_functor(this);
Kokkos::parallel_reduce(nn,neigh_functor,m_cap);
Kokkos::parallel_reduce(nn,neigh_functor,m_cap_big);
}
// deallocate first to reduce memory overhead
d_firstnbr = typename AT::t_int_1d();
d_firstnbr = typename AT::t_bigint_1d();
d_numnbrs = typename AT::t_int_1d();
d_jlist = typename AT::t_int_1d();
d_val = typename AT::t_ffloat_1d();
d_firstnbr_X = typename AT::t_int_1d();
d_firstnbr_X = typename AT::t_bigint_1d();
d_numnbrs_X = typename AT::t_int_1d();
d_jlist_X = typename AT::t_int_1d();
d_val_X = typename AT::t_ffloat_1d();
// H matrix
d_firstnbr = typename AT::t_int_1d("acks2/kk:firstnbr",nmax);
d_firstnbr = typename AT::t_bigint_1d("acks2/kk:firstnbr",nmax);
d_numnbrs = typename AT::t_int_1d("acks2/kk:numnbrs",nmax);
d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap);
d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap);
d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap_big);
d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap_big);
// X matrix
d_firstnbr_X = typename AT::t_int_1d("acks2/kk:firstnbr_X",nmax);
d_firstnbr_X = typename AT::t_bigint_1d("acks2/kk:firstnbr_X",nmax);
d_numnbrs_X = typename AT::t_int_1d("acks2/kk:numnbrs_X",nmax);
d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap);
d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap);
d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap_big);
d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap_big);
}
/* ---------------------------------------------------------------------- */
@ -566,7 +567,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::operator() (TagACKS2Zero, const int &ii)
template<class DeviceType>
template <int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const bool &final) const
void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_item(int ii, bigint &m_fill, const bool &final) const
{
const int i = d_ilist[ii];
int j,jj,jtype;
@ -619,7 +620,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const
m_fill++;
}
if (final)
d_numnbrs[i] = m_fill - d_firstnbr[i];
d_numnbrs[i] = int(m_fill - d_firstnbr[i]);
}
}
@ -698,9 +699,9 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_team(
// calculate the global memory offset from where the H matrix values to be
// calculated by the current team will be stored in d_val
int team_firstnbr_idx = 0;
bigint team_firstnbr_idx = 0;
Kokkos::single(Kokkos::PerTeam(team),
[=](int &val) {
[=](bigint &val) {
int totalnbrs = s_firstnbr[lastatom - firstatom - 1] +
s_numnbrs[lastatom - firstatom - 1];
val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs);
@ -726,7 +727,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_team(
int jnum = s_numnbrs[idx];
// calculate the write-offset for atom-i's first neighbor
int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
Kokkos::single(Kokkos::PerThread(team),
[&]() { d_firstnbr[i] = atomi_firstnbr_idx; });
@ -739,7 +740,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_team(
// are processed in batches and the batch size is vector_length
for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) {
int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
// count the # of neighbor atoms with non-zero electrostatic
// interaction coefficients with atom-i in the current batch
@ -782,7 +783,8 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_team(
valid = false;
if (x(j, 2) == ztmp && x(j, 1) < ytmp)
valid = false;
if (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)
if (x(j, 2) == ztmp && x(j, 1) == ytmp &&
x(j, 0) < xtmp)
valid = false;
}
}
@ -851,7 +853,7 @@ double FixACKS2ReaxFFKokkos<DeviceType>::calculate_H_k(const F_FLOAT &r, const F
taper = taper * r + d_tap[0];
denom = r * r * r + shld;
denom = pow(denom,1.0/3.0);
denom = cbrt(denom);
return taper * EV_TO_KCAL_PER_MOL / denom;
}
@ -861,7 +863,7 @@ double FixACKS2ReaxFFKokkos<DeviceType>::calculate_H_k(const F_FLOAT &r, const F
template<class DeviceType>
template <int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_item(int ii, int &m_fill, const bool &final) const
void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_item(int ii, bigint &m_fill, const bool &final) const
{
// The X_diag array is duplicated for OpenMP, atomic for GPU, and neither for Serial
auto v_X_diag = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_X_diag),decltype(ndup_X_diag)>::get(dup_X_diag,ndup_X_diag);
@ -927,7 +929,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_item(int ii, int &m_fill, const
}
if (final) {
a_X_diag[i] += tmp;
d_numnbrs_X[i] = m_fill - d_firstnbr_X[i];
d_numnbrs_X[i] = int(m_fill - d_firstnbr_X[i]);
}
}
}
@ -1005,9 +1007,9 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_team(
// calculate the global memory offset from where the H matrix values to be
// calculated by the current team will be stored in d_val_X
int team_firstnbr_idx = 0;
bigint team_firstnbr_idx = 0;
Kokkos::single(Kokkos::PerTeam(team),
[=](int &val) {
[=](bigint &val) {
int totalnbrs = s_firstnbr[lastatom - firstatom - 1] +
s_numnbrs[lastatom - firstatom - 1];
val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs);
@ -1033,7 +1035,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_team(
int jnum = s_numnbrs[idx];
// calculate the write-offset for atom-i's first neighbor
int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
Kokkos::single(Kokkos::PerThread(team),
[&]() { d_firstnbr_X[i] = atomi_firstnbr_idx; });
@ -1046,7 +1048,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_team(
// are processed in batches and the batch size is vector_length
for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) {
int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
// count the # of neighbor atoms with non-zero electrostatic
// interaction coefficients with atom-i in the current batch
@ -1464,7 +1466,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::operator() (TagACKS2SparseMatvec3_Half<NE
F_FLOAT tmp = 0.0;
// H Matrix
for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
for (bigint jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
const int j = d_jlist(jj);
tmp += d_val(jj) * d_xx[j];
a_bb[j] += d_val(jj) * d_xx[i];
@ -1473,7 +1475,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::operator() (TagACKS2SparseMatvec3_Half<NE
// X Matrix
tmp = 0.0;
for(int jj = d_firstnbr_X[i]; jj < d_firstnbr_X[i] + d_numnbrs_X[i]; jj++) {
for (bigint jj = d_firstnbr_X[i]; jj < d_firstnbr_X[i] + d_numnbrs_X[i]; jj++) {
const int j = d_jlist_X(jj);
tmp += d_val_X(jj) * d_xx[NN + j];
a_bb[NN + j] += d_val_X(jj) * d_xx[NN + i];
@ -1505,13 +1507,13 @@ void FixACKS2ReaxFFKokkos<DeviceType>::operator() (TagACKS2SparseMatvec3_Full, c
F_FLOAT sum;
F_FLOAT sum2;
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &sum) {
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT &sum) {
const int j = d_jlist(jj);
sum += d_val(jj) * d_xx[j];
}, sum);
team.team_barrier();
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const int &jj, F_FLOAT &sum2) {
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const bigint &jj, F_FLOAT &sum2) {
const int j = d_jlist_X(jj);
sum2 += d_val_X(jj) * d_xx[NN + j];
}, sum2);
@ -1865,8 +1867,8 @@ double FixACKS2ReaxFFKokkos<DeviceType>::memory_usage()
bytes += nmax*4 * sizeof(double); // storage
bytes += size*11 * sizeof(double); // storage
bytes += n_cap*4 * sizeof(int); // matrix...
bytes += m_cap*2 * sizeof(int);
bytes += m_cap*2 * sizeof(double);
bytes += m_cap_big*2 * sizeof(int);
bytes += m_cap_big*2 * sizeof(double);
return bytes;
}

View File

@ -74,7 +74,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
DAT::tdual_ffloat_1d get_s() {return k_s;}
KOKKOS_INLINE_FUNCTION
void num_neigh_item(int, int&) const;
void num_neigh_item(int, bigint&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagACKS2Zero, const int&) const;
@ -84,7 +84,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void compute_h_item(int, int &, const bool &) const;
void compute_h_item(int, bigint &, const bool &) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
@ -92,7 +92,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void compute_x_item(int, int &, const bool &) const;
void compute_x_item(int, bigint &, const bool &) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
@ -173,8 +173,9 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
int allocated_flag, last_allocate;
int need_dup,prev_last_rows_rank;
double* buf;
bigint m_cap_big;
typename AT::t_int_scalar d_mfill_offset;
typename AT::t_bigint_scalar d_mfill_offset;
typedef Kokkos::DualView<int***,DeviceType> tdual_int_1d;
Kokkos::DualView<params_acks2*,Kokkos::LayoutRight,DeviceType> k_params;
@ -197,12 +198,12 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
DAT::tdual_ffloat_2d k_bcut;
typename AT::t_ffloat_2d d_bcut;
typename AT::t_int_1d d_firstnbr;
typename AT::t_bigint_1d d_firstnbr;
typename AT::t_int_1d d_numnbrs;
typename AT::t_int_1d d_jlist;
typename AT::t_ffloat_1d d_val;
typename AT::t_int_1d d_firstnbr_X;
typename AT::t_bigint_1d d_firstnbr_X;
typename AT::t_int_1d d_numnbrs_X;
typename AT::t_int_1d d_jlist_X;
typename AT::t_ffloat_1d d_val_X;
@ -264,21 +265,21 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
template <class DeviceType>
struct FixACKS2ReaxFFKokkosNumNeighFunctor {
typedef DeviceType device_type;
typedef int value_type;
typedef bigint value_type;
FixACKS2ReaxFFKokkos<DeviceType> c;
FixACKS2ReaxFFKokkosNumNeighFunctor(FixACKS2ReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
c.cleanup_copy();
};
KOKKOS_INLINE_FUNCTION
void operator()(const int ii, int &maxneigh) const {
c.num_neigh_item(ii, maxneigh);
void operator()(const int ii, bigint &totneigh) const {
c.num_neigh_item(ii, totneigh);
}
};
template <class DeviceType, int NEIGHFLAG>
struct FixACKS2ReaxFFKokkosComputeHFunctor {
int atoms_per_team, vector_length;
typedef int value_type;
typedef bigint value_type;
typedef Kokkos::ScratchMemorySpace<DeviceType> scratch_space;
FixACKS2ReaxFFKokkos<DeviceType> c;
@ -293,7 +294,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor {
};
KOKKOS_INLINE_FUNCTION
void operator()(const int ii, int &m_fill, const bool &final) const {
void operator()(const int ii, bigint &m_fill, const bool &final) const {
c.template compute_h_item<NEIGHFLAG>(ii,m_fill,final);
}
@ -325,7 +326,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor {
template <class DeviceType, int NEIGHFLAG>
struct FixACKS2ReaxFFKokkosComputeXFunctor {
int atoms_per_team, vector_length;
typedef int value_type;
typedef bigint value_type;
typedef Kokkos::ScratchMemorySpace<DeviceType> scratch_space;
FixACKS2ReaxFFKokkos<DeviceType> c;
@ -340,7 +341,7 @@ struct FixACKS2ReaxFFKokkosComputeXFunctor {
};
KOKKOS_INLINE_FUNCTION
void operator()(const int ii, int &m_fill, const bool &final) const {
void operator()(const int ii, bigint &m_fill, const bool &final) const {
c.template compute_x_item<NEIGHFLAG>(ii,m_fill,final);
}

View File

@ -62,7 +62,8 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
datamask_read = X_MASK | V_MASK | F_MASK | Q_MASK | MASK_MASK | TYPE_MASK | TAG_MASK;
datamask_modify = X_MASK;
nmax = m_cap = 0;
nmax = 0;
m_cap_big = 0;
allocated_flag = 0;
nprev = 4;
maxexchange = nprev*2;
@ -71,7 +72,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
memory->destroy(t_hist);
grow_arrays(atom->nmax);
d_mfill_offset = typename AT::t_int_scalar("qeq/kk:mfill_offset");
d_mfill_offset = typename AT::t_bigint_scalar("qeq/kk:mfill_offset");
converged = 0;
}
@ -301,10 +302,10 @@ void FixQEqReaxFFKokkos<DeviceType>::pre_force(int /*vflag*/)
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::num_neigh_item(int ii, int &maxneigh) const
void FixQEqReaxFFKokkos<DeviceType>::num_neigh_item(int ii, bigint &totneigh) const
{
const int i = d_ilist[ii];
maxneigh += d_numneigh[i];
totneigh += d_numneigh[i];
}
/* ---------------------------------------------------------------------- */
@ -316,25 +317,25 @@ void FixQEqReaxFFKokkos<DeviceType>::allocate_matrix()
// determine the total space for the H matrix
m_cap = 0;
m_cap_big = 0;
// limit scope of functor to allow deallocation of views
{
FixQEqReaxFFKokkosNumNeighFunctor<DeviceType> neigh_functor(this);
Kokkos::parallel_reduce(nn,neigh_functor,m_cap);
Kokkos::parallel_reduce(nn,neigh_functor,m_cap_big);
}
// deallocate first to reduce memory overhead
d_firstnbr = typename AT::t_int_1d();
d_firstnbr = typename AT::t_bigint_1d();
d_numnbrs = typename AT::t_int_1d();
d_jlist = typename AT::t_int_1d();
d_val = typename AT::t_ffloat_1d();
d_firstnbr = typename AT::t_int_1d("qeq/kk:firstnbr",nmax);
d_firstnbr = typename AT::t_bigint_1d("qeq/kk:firstnbr",nmax);
d_numnbrs = typename AT::t_int_1d("qeq/kk:numnbrs",nmax);
d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap);
d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap);
d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap_big);
d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap_big);
}
/* ---------------------------------------------------------------------- */
@ -405,7 +406,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqZero, const int &ii) const
template<class DeviceType>
template <int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void FixQEqReaxFFKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const bool &final) const
void FixQEqReaxFFKokkos<DeviceType>::compute_h_item(int ii, bigint &m_fill, const bool &final) const
{
const int i = d_ilist[ii];
int j,jj,jtype;
@ -458,7 +459,7 @@ void FixQEqReaxFFKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const b
m_fill++;
}
if (final)
d_numnbrs[i] = m_fill - d_firstnbr[i];
d_numnbrs[i] = int(m_fill - d_firstnbr[i]);
}
}
@ -537,9 +538,9 @@ void FixQEqReaxFFKokkos<DeviceType>::compute_h_team(
// calculate the global memory offset from where the H matrix values to be
// calculated by the current team will be stored in d_val
int team_firstnbr_idx = 0;
bigint team_firstnbr_idx = 0;
Kokkos::single(Kokkos::PerTeam(team),
[=](int &val) {
[=](bigint &val) {
int totalnbrs = s_firstnbr[lastatom - firstatom - 1] +
s_numnbrs[lastatom - firstatom - 1];
val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs);
@ -565,7 +566,7 @@ void FixQEqReaxFFKokkos<DeviceType>::compute_h_team(
int jnum = s_numnbrs[idx]; // removed "const" to work around GCC 7 bug
// calculate the write-offset for atom-i's first neighbor
int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
Kokkos::single(Kokkos::PerThread(team),
[&]() { d_firstnbr[i] = atomi_firstnbr_idx; });
@ -578,7 +579,7 @@ void FixQEqReaxFFKokkos<DeviceType>::compute_h_team(
// are processed in batches and the batch size is vector_length
for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) {
int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
// count the # of neighbor atoms with non-zero electrostatic
// interaction coefficients with atom-i in the current batch
@ -935,7 +936,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec2_Half<NEIGHFL
const double d_xx_i0 = d_xx(i,0);
const double d_xx_i1 = d_xx(i,1);
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT2& doi) {
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT2& doi) {
const int j = d_jlist(jj);
const auto d_val_jj = d_val(jj);
if (!(converged & 1)) {
@ -968,7 +969,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec2_Full, const
const int i = d_ilist[k];
if (mask[i] & groupbit) {
F_FLOAT2 doitmp;
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT2& doi) {
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT2& doi) {
const int j = d_jlist(jj);
const auto d_val_jj = d_val(jj);
if (!(converged & 1))
@ -1286,8 +1287,8 @@ double FixQEqReaxFFKokkos<DeviceType>::memory_usage()
bytes = atom->nmax*nprev*2 * sizeof(F_FLOAT); // s_hist & t_hist
bytes += (double)atom->nmax*8 * sizeof(F_FLOAT); // storage
bytes += (double)n_cap*2 * sizeof(int); // matrix...
bytes += (double)m_cap * sizeof(int);
bytes += (double)m_cap * sizeof(F_FLOAT);
bytes += (double)m_cap_big * sizeof(int);
bytes += (double)m_cap_big * sizeof(F_FLOAT);
return bytes;
}

View File

@ -70,7 +70,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
void pre_force(int) override;
KOKKOS_INLINE_FUNCTION
void num_neigh_item(int, int&) const;
void num_neigh_item(int, bigint&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagQEqZero, const int&) const;
@ -80,7 +80,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void compute_h_item(int, int &, const bool &) const;
void compute_h_item(int, bigint &, const bool &) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
@ -201,8 +201,9 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
int allocated_flag, last_allocate;
int need_dup;
int converged;
bigint m_cap_big;
typename AT::t_int_scalar d_mfill_offset;
typename AT::t_bigint_scalar d_mfill_offset;
typedef Kokkos::DualView<int***,DeviceType> tdual_int_1d;
Kokkos::DualView<params_qeq*,Kokkos::LayoutRight,DeviceType> k_params;
@ -227,7 +228,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
DAT::tdual_ffloat_1d k_tap;
typename AT::t_ffloat_1d d_tap;
typename AT::t_int_1d d_firstnbr;
typename AT::t_bigint_1d d_firstnbr;
typename AT::t_int_1d d_numnbrs;
typename AT::t_int_1d d_jlist;
typename AT::t_ffloat_1d d_val;
@ -290,21 +291,21 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
template <class DeviceType>
struct FixQEqReaxFFKokkosNumNeighFunctor {
typedef DeviceType device_type;
typedef int value_type;
typedef bigint value_type;
FixQEqReaxFFKokkos<DeviceType> c;
FixQEqReaxFFKokkosNumNeighFunctor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
c.cleanup_copy();
};
KOKKOS_INLINE_FUNCTION
void operator()(const int ii, int &maxneigh) const {
c.num_neigh_item(ii, maxneigh);
void operator()(const int ii, bigint &totneigh) const {
c.num_neigh_item(ii, totneigh);
}
};
template <class DeviceType, int NEIGHFLAG>
struct FixQEqReaxFFKokkosComputeHFunctor {
int atoms_per_team, vector_length;
typedef int value_type;
typedef bigint value_type;
typedef Kokkos::ScratchMemorySpace<DeviceType> scratch_space;
FixQEqReaxFFKokkos<DeviceType> c;
@ -319,7 +320,7 @@ struct FixQEqReaxFFKokkosComputeHFunctor {
};
KOKKOS_INLINE_FUNCTION
void operator()(const int ii, int &m_fill, const bool &final) const {
void operator()(const int ii, bigint &m_fill, const bool &final) const {
c.template compute_h_item<NEIGHFLAG>(ii,m_fill,final);
}

View File

@ -638,33 +638,32 @@ void KokkosLMP::accelerator(int narg, char **arg)
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_count(int m)
bigint KokkosLMP::neigh_count(int m)
{
int inum = 0;
int nneigh = 0;
ArrayTypes<LMPHostType>::t_int_1d h_ilist;
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
bigint nneigh = 0;
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists[m]->execution_space == Host) {
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
inum = nlistKK->inum;
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
int inum = nlistKK->inum;
auto d_ilist = nlistKK->d_ilist;
auto d_numneigh = nlistKK->d_numneigh;
Kokkos::parallel_reduce(Kokkos::RangePolicy<LMPHostType>(0,inum), LAMMPS_LAMBDA(int ii, bigint &nneigh) {
const int i = d_ilist[ii];
nneigh += d_numneigh[i];
},nneigh);
} else if (nk->lists[m]->execution_space == Device) {
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
inum = nlistKK->inum;
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
int inum = nlistKK->inum;
auto d_ilist = nlistKK->d_ilist;
auto d_numneigh = nlistKK->d_numneigh;
Kokkos::parallel_reduce(Kokkos::RangePolicy<LMPDeviceType>(0,inum), LAMMPS_LAMBDA(int ii, bigint &nneigh) {
const int i = d_ilist[ii];
nneigh += d_numneigh[i];
},nneigh);
}
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];
return nneigh;
}

View File

@ -64,7 +64,7 @@ class KokkosLMP : protected Pointers {
static void initialize(const Kokkos::InitializationSettings&, Error *);
static void finalize();
void accelerator(int, char **);
int neigh_count(int);
bigint neigh_count(int);
template<class DeviceType>
int need_dup(int qeq_flag = 0)

View File

@ -641,6 +641,14 @@ typedef tdual_int_scalar::t_dev_const t_int_scalar_const;
typedef tdual_int_scalar::t_dev_um t_int_scalar_um;
typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um;
typedef Kokkos::
DualView<LAMMPS_NS::bigint, LMPDeviceType::array_layout, LMPDeviceType> tdual_bigint_scalar;
typedef tdual_bigint_scalar::t_dev t_bigint_scalar;
typedef tdual_bigint_scalar::t_dev_const t_bigint_scalar_const;
typedef tdual_bigint_scalar::t_dev_um t_bigint_scalar_um;
typedef tdual_bigint_scalar::t_dev_const_um t_bigint_scalar_const_um;
typedef tdual_bigint_scalar::t_dev_const_randomread t_bigint_scalar_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::tagint, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_scalar;
typedef tdual_tagint_scalar::t_dev t_tagint_scalar;
@ -666,6 +674,14 @@ typedef tdual_int_1d::t_dev_um t_int_1d_um;
typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um;
typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread;
typedef Kokkos::
DualView<LAMMPS_NS::bigint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_bigint_1d;
typedef tdual_bigint_1d::t_dev t_bigint_1d;
typedef tdual_bigint_1d::t_dev_const t_bigint_1d_const;
typedef tdual_bigint_1d::t_dev_um t_bigint_1d_um;
typedef tdual_bigint_1d::t_dev_const_um t_bigint_1d_const_um;
typedef tdual_bigint_1d::t_dev_const_randomread t_bigint_1d_randomread;
typedef Kokkos::
DualView<int*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_int_1d_3;
typedef tdual_int_1d_3::t_dev t_int_1d_3;
@ -974,6 +990,12 @@ typedef tdual_int_scalar::t_host_const t_int_scalar_const;
typedef tdual_int_scalar::t_host_um t_int_scalar_um;
typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um;
typedef Kokkos::DualView<LAMMPS_NS::bigint, LMPDeviceType::array_layout, LMPDeviceType> tdual_bigint_scalar;
typedef tdual_bigint_scalar::t_host t_bigint_scalar;
typedef tdual_bigint_scalar::t_host_const t_bigint_scalar_const;
typedef tdual_bigint_scalar::t_host_um t_bigint_scalar_um;
typedef tdual_bigint_scalar::t_host_const_um t_bigint_scalar_const_um;
typedef Kokkos::DualView<LAMMPS_NS::tagint, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_scalar;
typedef tdual_tagint_scalar::t_host t_tagint_scalar;
typedef tdual_tagint_scalar::t_host_const t_tagint_scalar_const;
@ -994,6 +1016,13 @@ typedef tdual_int_1d::t_host_um t_int_1d_um;
typedef tdual_int_1d::t_host_const_um t_int_1d_const_um;
typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread;
typedef Kokkos::DualView<LAMMPS_NS::bigint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_bigint_1d;
typedef tdual_bigint_1d::t_host t_bigint_1d;
typedef tdual_bigint_1d::t_host_const t_bigint_1d_const;
typedef tdual_bigint_1d::t_host_um t_bigint_1d_um;
typedef tdual_bigint_1d::t_host_const_um t_bigint_1d_const_um;
typedef tdual_bigint_1d::t_host_const_randomread t_bigint_1d_randomread;
typedef Kokkos::DualView<int*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_int_1d_3;
typedef tdual_int_1d_3::t_host t_int_1d_3;
typedef tdual_int_1d_3::t_host_const t_int_1d_3_const;