Fix integer overflow for large ReaxFF systems with KOKKOS package
This commit is contained in:
@ -53,7 +53,8 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK;
|
||||
datamask_modify = Q_MASK | X_MASK;
|
||||
|
||||
nmax = m_cap = 0;
|
||||
nmax = 0;
|
||||
m_cap_big = 0;
|
||||
allocated_flag = 0;
|
||||
nprev = 4;
|
||||
|
||||
@ -66,7 +67,7 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
buf = new double[2*nprev];
|
||||
prev_last_rows_rank = 0;
|
||||
|
||||
d_mfill_offset = typename AT::t_int_scalar("acks2/kk:mfill_offset");
|
||||
d_mfill_offset = typename AT::t_bigint_scalar("acks2/kk:mfill_offset");
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -418,10 +419,10 @@ void FixACKS2ReaxFFKokkos<DeviceType>::pre_force(int /*vflag*/)
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixACKS2ReaxFFKokkos<DeviceType>::num_neigh_item(int ii, int &maxneigh) const
|
||||
void FixACKS2ReaxFFKokkos<DeviceType>::num_neigh_item(int ii, bigint &totneigh) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
maxneigh += d_numneigh[i];
|
||||
totneigh += d_numneigh[i];
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -433,39 +434,39 @@ void FixACKS2ReaxFFKokkos<DeviceType>::allocate_matrix()
|
||||
|
||||
// determine the total space for the H matrix
|
||||
|
||||
m_cap = 0;
|
||||
m_cap_big = 0;
|
||||
|
||||
// limit scope of functor to allow deallocation of views
|
||||
{
|
||||
FixACKS2ReaxFFKokkosNumNeighFunctor<DeviceType> neigh_functor(this);
|
||||
Kokkos::parallel_reduce(nn,neigh_functor,m_cap);
|
||||
Kokkos::parallel_reduce(nn,neigh_functor,m_cap_big);
|
||||
}
|
||||
|
||||
// deallocate first to reduce memory overhead
|
||||
|
||||
d_firstnbr = typename AT::t_int_1d();
|
||||
d_firstnbr = typename AT::t_bigint_1d();
|
||||
d_numnbrs = typename AT::t_int_1d();
|
||||
d_jlist = typename AT::t_int_1d();
|
||||
d_val = typename AT::t_ffloat_1d();
|
||||
|
||||
d_firstnbr_X = typename AT::t_int_1d();
|
||||
d_firstnbr_X = typename AT::t_bigint_1d();
|
||||
d_numnbrs_X = typename AT::t_int_1d();
|
||||
d_jlist_X = typename AT::t_int_1d();
|
||||
d_val_X = typename AT::t_ffloat_1d();
|
||||
|
||||
// H matrix
|
||||
|
||||
d_firstnbr = typename AT::t_int_1d("acks2/kk:firstnbr",nmax);
|
||||
d_firstnbr = typename AT::t_bigint_1d("acks2/kk:firstnbr",nmax);
|
||||
d_numnbrs = typename AT::t_int_1d("acks2/kk:numnbrs",nmax);
|
||||
d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap);
|
||||
d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap);
|
||||
d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap_big);
|
||||
d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap_big);
|
||||
|
||||
// X matrix
|
||||
|
||||
d_firstnbr_X = typename AT::t_int_1d("acks2/kk:firstnbr_X",nmax);
|
||||
d_firstnbr_X = typename AT::t_bigint_1d("acks2/kk:firstnbr_X",nmax);
|
||||
d_numnbrs_X = typename AT::t_int_1d("acks2/kk:numnbrs_X",nmax);
|
||||
d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap);
|
||||
d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap);
|
||||
d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap_big);
|
||||
d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap_big);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -566,7 +567,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::operator() (TagACKS2Zero, const int &ii)
|
||||
template<class DeviceType>
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const bool &final) const
|
||||
void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_item(int ii, bigint &m_fill, const bool &final) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
int j,jj,jtype;
|
||||
@ -619,7 +620,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const
|
||||
m_fill++;
|
||||
}
|
||||
if (final)
|
||||
d_numnbrs[i] = m_fill - d_firstnbr[i];
|
||||
d_numnbrs[i] = int(m_fill - d_firstnbr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -698,9 +699,9 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_team(
|
||||
|
||||
// calculate the global memory offset from where the H matrix values to be
|
||||
// calculated by the current team will be stored in d_val
|
||||
int team_firstnbr_idx = 0;
|
||||
bigint team_firstnbr_idx = 0;
|
||||
Kokkos::single(Kokkos::PerTeam(team),
|
||||
[=](int &val) {
|
||||
[=](bigint &val) {
|
||||
int totalnbrs = s_firstnbr[lastatom - firstatom - 1] +
|
||||
s_numnbrs[lastatom - firstatom - 1];
|
||||
val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs);
|
||||
@ -726,7 +727,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_team(
|
||||
int jnum = s_numnbrs[idx];
|
||||
|
||||
// calculate the write-offset for atom-i's first neighbor
|
||||
int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
|
||||
bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
|
||||
Kokkos::single(Kokkos::PerThread(team),
|
||||
[&]() { d_firstnbr[i] = atomi_firstnbr_idx; });
|
||||
|
||||
@ -739,7 +740,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_team(
|
||||
// are processed in batches and the batch size is vector_length
|
||||
for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) {
|
||||
|
||||
int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
|
||||
bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
|
||||
|
||||
// count the # of neighbor atoms with non-zero electrostatic
|
||||
// interaction coefficients with atom-i in the current batch
|
||||
@ -782,7 +783,8 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_h_team(
|
||||
valid = false;
|
||||
if (x(j, 2) == ztmp && x(j, 1) < ytmp)
|
||||
valid = false;
|
||||
if (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)
|
||||
if (x(j, 2) == ztmp && x(j, 1) == ytmp &&
|
||||
x(j, 0) < xtmp)
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
@ -851,7 +853,7 @@ double FixACKS2ReaxFFKokkos<DeviceType>::calculate_H_k(const F_FLOAT &r, const F
|
||||
taper = taper * r + d_tap[0];
|
||||
|
||||
denom = r * r * r + shld;
|
||||
denom = pow(denom,1.0/3.0);
|
||||
denom = cbrt(denom);
|
||||
|
||||
return taper * EV_TO_KCAL_PER_MOL / denom;
|
||||
}
|
||||
@ -861,7 +863,7 @@ double FixACKS2ReaxFFKokkos<DeviceType>::calculate_H_k(const F_FLOAT &r, const F
|
||||
template<class DeviceType>
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_item(int ii, int &m_fill, const bool &final) const
|
||||
void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_item(int ii, bigint &m_fill, const bool &final) const
|
||||
{
|
||||
// The X_diag array is duplicated for OpenMP, atomic for GPU, and neither for Serial
|
||||
auto v_X_diag = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_X_diag),decltype(ndup_X_diag)>::get(dup_X_diag,ndup_X_diag);
|
||||
@ -927,7 +929,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_item(int ii, int &m_fill, const
|
||||
}
|
||||
if (final) {
|
||||
a_X_diag[i] += tmp;
|
||||
d_numnbrs_X[i] = m_fill - d_firstnbr_X[i];
|
||||
d_numnbrs_X[i] = int(m_fill - d_firstnbr_X[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1005,9 +1007,9 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_team(
|
||||
|
||||
// calculate the global memory offset from where the H matrix values to be
|
||||
// calculated by the current team will be stored in d_val_X
|
||||
int team_firstnbr_idx = 0;
|
||||
bigint team_firstnbr_idx = 0;
|
||||
Kokkos::single(Kokkos::PerTeam(team),
|
||||
[=](int &val) {
|
||||
[=](bigint &val) {
|
||||
int totalnbrs = s_firstnbr[lastatom - firstatom - 1] +
|
||||
s_numnbrs[lastatom - firstatom - 1];
|
||||
val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs);
|
||||
@ -1033,7 +1035,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_team(
|
||||
int jnum = s_numnbrs[idx];
|
||||
|
||||
// calculate the write-offset for atom-i's first neighbor
|
||||
int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
|
||||
bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
|
||||
Kokkos::single(Kokkos::PerThread(team),
|
||||
[&]() { d_firstnbr_X[i] = atomi_firstnbr_idx; });
|
||||
|
||||
@ -1046,7 +1048,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::compute_x_team(
|
||||
// are processed in batches and the batch size is vector_length
|
||||
for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) {
|
||||
|
||||
int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
|
||||
bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
|
||||
|
||||
// count the # of neighbor atoms with non-zero electrostatic
|
||||
// interaction coefficients with atom-i in the current batch
|
||||
@ -1464,7 +1466,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::operator() (TagACKS2SparseMatvec3_Half<NE
|
||||
F_FLOAT tmp = 0.0;
|
||||
|
||||
// H Matrix
|
||||
for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
||||
for (bigint jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
||||
const int j = d_jlist(jj);
|
||||
tmp += d_val(jj) * d_xx[j];
|
||||
a_bb[j] += d_val(jj) * d_xx[i];
|
||||
@ -1473,7 +1475,7 @@ void FixACKS2ReaxFFKokkos<DeviceType>::operator() (TagACKS2SparseMatvec3_Half<NE
|
||||
|
||||
// X Matrix
|
||||
tmp = 0.0;
|
||||
for(int jj = d_firstnbr_X[i]; jj < d_firstnbr_X[i] + d_numnbrs_X[i]; jj++) {
|
||||
for (bigint jj = d_firstnbr_X[i]; jj < d_firstnbr_X[i] + d_numnbrs_X[i]; jj++) {
|
||||
const int j = d_jlist_X(jj);
|
||||
tmp += d_val_X(jj) * d_xx[NN + j];
|
||||
a_bb[NN + j] += d_val_X(jj) * d_xx[NN + i];
|
||||
@ -1505,13 +1507,13 @@ void FixACKS2ReaxFFKokkos<DeviceType>::operator() (TagACKS2SparseMatvec3_Full, c
|
||||
F_FLOAT sum;
|
||||
F_FLOAT sum2;
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &sum) {
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT &sum) {
|
||||
const int j = d_jlist(jj);
|
||||
sum += d_val(jj) * d_xx[j];
|
||||
}, sum);
|
||||
team.team_barrier();
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const int &jj, F_FLOAT &sum2) {
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const bigint &jj, F_FLOAT &sum2) {
|
||||
const int j = d_jlist_X(jj);
|
||||
sum2 += d_val_X(jj) * d_xx[NN + j];
|
||||
}, sum2);
|
||||
@ -1865,8 +1867,8 @@ double FixACKS2ReaxFFKokkos<DeviceType>::memory_usage()
|
||||
bytes += nmax*4 * sizeof(double); // storage
|
||||
bytes += size*11 * sizeof(double); // storage
|
||||
bytes += n_cap*4 * sizeof(int); // matrix...
|
||||
bytes += m_cap*2 * sizeof(int);
|
||||
bytes += m_cap*2 * sizeof(double);
|
||||
bytes += m_cap_big*2 * sizeof(int);
|
||||
bytes += m_cap_big*2 * sizeof(double);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@ -74,7 +74,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
DAT::tdual_ffloat_1d get_s() {return k_s;}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void num_neigh_item(int, int&) const;
|
||||
void num_neigh_item(int, bigint&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagACKS2Zero, const int&) const;
|
||||
@ -84,7 +84,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_h_item(int, int &, const bool &) const;
|
||||
void compute_h_item(int, bigint &, const bool &) const;
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -92,7 +92,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_x_item(int, int &, const bool &) const;
|
||||
void compute_x_item(int, bigint &, const bool &) const;
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -173,8 +173,9 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
int allocated_flag, last_allocate;
|
||||
int need_dup,prev_last_rows_rank;
|
||||
double* buf;
|
||||
bigint m_cap_big;
|
||||
|
||||
typename AT::t_int_scalar d_mfill_offset;
|
||||
typename AT::t_bigint_scalar d_mfill_offset;
|
||||
|
||||
typedef Kokkos::DualView<int***,DeviceType> tdual_int_1d;
|
||||
Kokkos::DualView<params_acks2*,Kokkos::LayoutRight,DeviceType> k_params;
|
||||
@ -197,12 +198,12 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
DAT::tdual_ffloat_2d k_bcut;
|
||||
typename AT::t_ffloat_2d d_bcut;
|
||||
|
||||
typename AT::t_int_1d d_firstnbr;
|
||||
typename AT::t_bigint_1d d_firstnbr;
|
||||
typename AT::t_int_1d d_numnbrs;
|
||||
typename AT::t_int_1d d_jlist;
|
||||
typename AT::t_ffloat_1d d_val;
|
||||
|
||||
typename AT::t_int_1d d_firstnbr_X;
|
||||
typename AT::t_bigint_1d d_firstnbr_X;
|
||||
typename AT::t_int_1d d_numnbrs_X;
|
||||
typename AT::t_int_1d d_jlist_X;
|
||||
typename AT::t_ffloat_1d d_val_X;
|
||||
@ -264,21 +265,21 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
|
||||
template <class DeviceType>
|
||||
struct FixACKS2ReaxFFKokkosNumNeighFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef int value_type;
|
||||
typedef bigint value_type;
|
||||
FixACKS2ReaxFFKokkos<DeviceType> c;
|
||||
FixACKS2ReaxFFKokkosNumNeighFunctor(FixACKS2ReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int ii, int &maxneigh) const {
|
||||
c.num_neigh_item(ii, maxneigh);
|
||||
void operator()(const int ii, bigint &totneigh) const {
|
||||
c.num_neigh_item(ii, totneigh);
|
||||
}
|
||||
};
|
||||
|
||||
template <class DeviceType, int NEIGHFLAG>
|
||||
struct FixACKS2ReaxFFKokkosComputeHFunctor {
|
||||
int atoms_per_team, vector_length;
|
||||
typedef int value_type;
|
||||
typedef bigint value_type;
|
||||
typedef Kokkos::ScratchMemorySpace<DeviceType> scratch_space;
|
||||
FixACKS2ReaxFFKokkos<DeviceType> c;
|
||||
|
||||
@ -293,7 +294,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int ii, int &m_fill, const bool &final) const {
|
||||
void operator()(const int ii, bigint &m_fill, const bool &final) const {
|
||||
c.template compute_h_item<NEIGHFLAG>(ii,m_fill,final);
|
||||
}
|
||||
|
||||
@ -325,7 +326,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor {
|
||||
template <class DeviceType, int NEIGHFLAG>
|
||||
struct FixACKS2ReaxFFKokkosComputeXFunctor {
|
||||
int atoms_per_team, vector_length;
|
||||
typedef int value_type;
|
||||
typedef bigint value_type;
|
||||
typedef Kokkos::ScratchMemorySpace<DeviceType> scratch_space;
|
||||
FixACKS2ReaxFFKokkos<DeviceType> c;
|
||||
|
||||
@ -340,7 +341,7 @@ struct FixACKS2ReaxFFKokkosComputeXFunctor {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int ii, int &m_fill, const bool &final) const {
|
||||
void operator()(const int ii, bigint &m_fill, const bool &final) const {
|
||||
c.template compute_x_item<NEIGHFLAG>(ii,m_fill,final);
|
||||
}
|
||||
|
||||
|
||||
@ -62,7 +62,8 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
datamask_read = X_MASK | V_MASK | F_MASK | Q_MASK | MASK_MASK | TYPE_MASK | TAG_MASK;
|
||||
datamask_modify = X_MASK;
|
||||
|
||||
nmax = m_cap = 0;
|
||||
nmax = 0;
|
||||
m_cap_big = 0;
|
||||
allocated_flag = 0;
|
||||
nprev = 4;
|
||||
maxexchange = nprev*2;
|
||||
@ -71,7 +72,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
memory->destroy(t_hist);
|
||||
grow_arrays(atom->nmax);
|
||||
|
||||
d_mfill_offset = typename AT::t_int_scalar("qeq/kk:mfill_offset");
|
||||
d_mfill_offset = typename AT::t_bigint_scalar("qeq/kk:mfill_offset");
|
||||
|
||||
converged = 0;
|
||||
}
|
||||
@ -301,10 +302,10 @@ void FixQEqReaxFFKokkos<DeviceType>::pre_force(int /*vflag*/)
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxFFKokkos<DeviceType>::num_neigh_item(int ii, int &maxneigh) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::num_neigh_item(int ii, bigint &totneigh) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
maxneigh += d_numneigh[i];
|
||||
totneigh += d_numneigh[i];
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -316,25 +317,25 @@ void FixQEqReaxFFKokkos<DeviceType>::allocate_matrix()
|
||||
|
||||
// determine the total space for the H matrix
|
||||
|
||||
m_cap = 0;
|
||||
m_cap_big = 0;
|
||||
|
||||
// limit scope of functor to allow deallocation of views
|
||||
{
|
||||
FixQEqReaxFFKokkosNumNeighFunctor<DeviceType> neigh_functor(this);
|
||||
Kokkos::parallel_reduce(nn,neigh_functor,m_cap);
|
||||
Kokkos::parallel_reduce(nn,neigh_functor,m_cap_big);
|
||||
}
|
||||
|
||||
// deallocate first to reduce memory overhead
|
||||
|
||||
d_firstnbr = typename AT::t_int_1d();
|
||||
d_firstnbr = typename AT::t_bigint_1d();
|
||||
d_numnbrs = typename AT::t_int_1d();
|
||||
d_jlist = typename AT::t_int_1d();
|
||||
d_val = typename AT::t_ffloat_1d();
|
||||
|
||||
d_firstnbr = typename AT::t_int_1d("qeq/kk:firstnbr",nmax);
|
||||
d_firstnbr = typename AT::t_bigint_1d("qeq/kk:firstnbr",nmax);
|
||||
d_numnbrs = typename AT::t_int_1d("qeq/kk:numnbrs",nmax);
|
||||
d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap);
|
||||
d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap);
|
||||
d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap_big);
|
||||
d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap_big);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -405,7 +406,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqZero, const int &ii) const
|
||||
template<class DeviceType>
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxFFKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const bool &final) const
|
||||
void FixQEqReaxFFKokkos<DeviceType>::compute_h_item(int ii, bigint &m_fill, const bool &final) const
|
||||
{
|
||||
const int i = d_ilist[ii];
|
||||
int j,jj,jtype;
|
||||
@ -458,7 +459,7 @@ void FixQEqReaxFFKokkos<DeviceType>::compute_h_item(int ii, int &m_fill, const b
|
||||
m_fill++;
|
||||
}
|
||||
if (final)
|
||||
d_numnbrs[i] = m_fill - d_firstnbr[i];
|
||||
d_numnbrs[i] = int(m_fill - d_firstnbr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -537,9 +538,9 @@ void FixQEqReaxFFKokkos<DeviceType>::compute_h_team(
|
||||
|
||||
// calculate the global memory offset from where the H matrix values to be
|
||||
// calculated by the current team will be stored in d_val
|
||||
int team_firstnbr_idx = 0;
|
||||
bigint team_firstnbr_idx = 0;
|
||||
Kokkos::single(Kokkos::PerTeam(team),
|
||||
[=](int &val) {
|
||||
[=](bigint &val) {
|
||||
int totalnbrs = s_firstnbr[lastatom - firstatom - 1] +
|
||||
s_numnbrs[lastatom - firstatom - 1];
|
||||
val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs);
|
||||
@ -565,7 +566,7 @@ void FixQEqReaxFFKokkos<DeviceType>::compute_h_team(
|
||||
int jnum = s_numnbrs[idx]; // removed "const" to work around GCC 7 bug
|
||||
|
||||
// calculate the write-offset for atom-i's first neighbor
|
||||
int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
|
||||
bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx];
|
||||
Kokkos::single(Kokkos::PerThread(team),
|
||||
[&]() { d_firstnbr[i] = atomi_firstnbr_idx; });
|
||||
|
||||
@ -578,7 +579,7 @@ void FixQEqReaxFFKokkos<DeviceType>::compute_h_team(
|
||||
// are processed in batches and the batch size is vector_length
|
||||
for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) {
|
||||
|
||||
int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
|
||||
bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH;
|
||||
|
||||
// count the # of neighbor atoms with non-zero electrostatic
|
||||
// interaction coefficients with atom-i in the current batch
|
||||
@ -935,7 +936,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec2_Half<NEIGHFL
|
||||
const double d_xx_i0 = d_xx(i,0);
|
||||
const double d_xx_i1 = d_xx(i,1);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT2& doi) {
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT2& doi) {
|
||||
const int j = d_jlist(jj);
|
||||
const auto d_val_jj = d_val(jj);
|
||||
if (!(converged & 1)) {
|
||||
@ -968,7 +969,7 @@ void FixQEqReaxFFKokkos<DeviceType>::operator()(TagQEqSparseMatvec2_Full, const
|
||||
const int i = d_ilist[k];
|
||||
if (mask[i] & groupbit) {
|
||||
F_FLOAT2 doitmp;
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT2& doi) {
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT2& doi) {
|
||||
const int j = d_jlist(jj);
|
||||
const auto d_val_jj = d_val(jj);
|
||||
if (!(converged & 1))
|
||||
@ -1286,8 +1287,8 @@ double FixQEqReaxFFKokkos<DeviceType>::memory_usage()
|
||||
bytes = atom->nmax*nprev*2 * sizeof(F_FLOAT); // s_hist & t_hist
|
||||
bytes += (double)atom->nmax*8 * sizeof(F_FLOAT); // storage
|
||||
bytes += (double)n_cap*2 * sizeof(int); // matrix...
|
||||
bytes += (double)m_cap * sizeof(int);
|
||||
bytes += (double)m_cap * sizeof(F_FLOAT);
|
||||
bytes += (double)m_cap_big * sizeof(int);
|
||||
bytes += (double)m_cap_big * sizeof(F_FLOAT);
|
||||
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@ -70,7 +70,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
void pre_force(int) override;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void num_neigh_item(int, int&) const;
|
||||
void num_neigh_item(int, bigint&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagQEqZero, const int&) const;
|
||||
@ -80,7 +80,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_h_item(int, int &, const bool &) const;
|
||||
void compute_h_item(int, bigint &, const bool &) const;
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -201,8 +201,9 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
int allocated_flag, last_allocate;
|
||||
int need_dup;
|
||||
int converged;
|
||||
bigint m_cap_big;
|
||||
|
||||
typename AT::t_int_scalar d_mfill_offset;
|
||||
typename AT::t_bigint_scalar d_mfill_offset;
|
||||
|
||||
typedef Kokkos::DualView<int***,DeviceType> tdual_int_1d;
|
||||
Kokkos::DualView<params_qeq*,Kokkos::LayoutRight,DeviceType> k_params;
|
||||
@ -227,7 +228,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
DAT::tdual_ffloat_1d k_tap;
|
||||
typename AT::t_ffloat_1d d_tap;
|
||||
|
||||
typename AT::t_int_1d d_firstnbr;
|
||||
typename AT::t_bigint_1d d_firstnbr;
|
||||
typename AT::t_int_1d d_numnbrs;
|
||||
typename AT::t_int_1d d_jlist;
|
||||
typename AT::t_ffloat_1d d_val;
|
||||
@ -290,21 +291,21 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase {
|
||||
template <class DeviceType>
|
||||
struct FixQEqReaxFFKokkosNumNeighFunctor {
|
||||
typedef DeviceType device_type;
|
||||
typedef int value_type;
|
||||
typedef bigint value_type;
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
FixQEqReaxFFKokkosNumNeighFunctor(FixQEqReaxFFKokkos<DeviceType>* c_ptr):c(*c_ptr) {
|
||||
c.cleanup_copy();
|
||||
};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int ii, int &maxneigh) const {
|
||||
c.num_neigh_item(ii, maxneigh);
|
||||
void operator()(const int ii, bigint &totneigh) const {
|
||||
c.num_neigh_item(ii, totneigh);
|
||||
}
|
||||
};
|
||||
|
||||
template <class DeviceType, int NEIGHFLAG>
|
||||
struct FixQEqReaxFFKokkosComputeHFunctor {
|
||||
int atoms_per_team, vector_length;
|
||||
typedef int value_type;
|
||||
typedef bigint value_type;
|
||||
typedef Kokkos::ScratchMemorySpace<DeviceType> scratch_space;
|
||||
FixQEqReaxFFKokkos<DeviceType> c;
|
||||
|
||||
@ -319,7 +320,7 @@ struct FixQEqReaxFFKokkosComputeHFunctor {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int ii, int &m_fill, const bool &final) const {
|
||||
void operator()(const int ii, bigint &m_fill, const bool &final) const {
|
||||
c.template compute_h_item<NEIGHFLAG>(ii,m_fill,final);
|
||||
}
|
||||
|
||||
|
||||
@ -638,33 +638,32 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
called by Finish
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int KokkosLMP::neigh_count(int m)
|
||||
bigint KokkosLMP::neigh_count(int m)
|
||||
{
|
||||
int inum = 0;
|
||||
int nneigh = 0;
|
||||
|
||||
ArrayTypes<LMPHostType>::t_int_1d h_ilist;
|
||||
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
|
||||
bigint nneigh = 0;
|
||||
|
||||
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
|
||||
if (nk->lists[m]->execution_space == Host) {
|
||||
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
|
||||
inum = nlistKK->inum;
|
||||
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
|
||||
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
|
||||
int inum = nlistKK->inum;
|
||||
auto d_ilist = nlistKK->d_ilist;
|
||||
auto d_numneigh = nlistKK->d_numneigh;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<LMPHostType>(0,inum), LAMMPS_LAMBDA(int ii, bigint &nneigh) {
|
||||
const int i = d_ilist[ii];
|
||||
nneigh += d_numneigh[i];
|
||||
},nneigh);
|
||||
|
||||
} else if (nk->lists[m]->execution_space == Device) {
|
||||
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
|
||||
inum = nlistKK->inum;
|
||||
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
|
||||
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
|
||||
int inum = nlistKK->inum;
|
||||
auto d_ilist = nlistKK->d_ilist;
|
||||
auto d_numneigh = nlistKK->d_numneigh;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<LMPDeviceType>(0,inum), LAMMPS_LAMBDA(int ii, bigint &nneigh) {
|
||||
const int i = d_ilist[ii];
|
||||
nneigh += d_numneigh[i];
|
||||
},nneigh);
|
||||
}
|
||||
|
||||
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];
|
||||
|
||||
return nneigh;
|
||||
}
|
||||
|
||||
|
||||
@ -64,7 +64,7 @@ class KokkosLMP : protected Pointers {
|
||||
static void initialize(const Kokkos::InitializationSettings&, Error *);
|
||||
static void finalize();
|
||||
void accelerator(int, char **);
|
||||
int neigh_count(int);
|
||||
bigint neigh_count(int);
|
||||
|
||||
template<class DeviceType>
|
||||
int need_dup(int qeq_flag = 0)
|
||||
|
||||
@ -641,6 +641,14 @@ typedef tdual_int_scalar::t_dev_const t_int_scalar_const;
|
||||
typedef tdual_int_scalar::t_dev_um t_int_scalar_um;
|
||||
typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::bigint, LMPDeviceType::array_layout, LMPDeviceType> tdual_bigint_scalar;
|
||||
typedef tdual_bigint_scalar::t_dev t_bigint_scalar;
|
||||
typedef tdual_bigint_scalar::t_dev_const t_bigint_scalar_const;
|
||||
typedef tdual_bigint_scalar::t_dev_um t_bigint_scalar_um;
|
||||
typedef tdual_bigint_scalar::t_dev_const_um t_bigint_scalar_const_um;
|
||||
typedef tdual_bigint_scalar::t_dev_const_randomread t_bigint_scalar_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::tagint, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_scalar;
|
||||
typedef tdual_tagint_scalar::t_dev t_tagint_scalar;
|
||||
@ -666,6 +674,14 @@ typedef tdual_int_1d::t_dev_um t_int_1d_um;
|
||||
typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um;
|
||||
typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<LAMMPS_NS::bigint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_bigint_1d;
|
||||
typedef tdual_bigint_1d::t_dev t_bigint_1d;
|
||||
typedef tdual_bigint_1d::t_dev_const t_bigint_1d_const;
|
||||
typedef tdual_bigint_1d::t_dev_um t_bigint_1d_um;
|
||||
typedef tdual_bigint_1d::t_dev_const_um t_bigint_1d_const_um;
|
||||
typedef tdual_bigint_1d::t_dev_const_randomread t_bigint_1d_randomread;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<int*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_int_1d_3;
|
||||
typedef tdual_int_1d_3::t_dev t_int_1d_3;
|
||||
@ -974,6 +990,12 @@ typedef tdual_int_scalar::t_host_const t_int_scalar_const;
|
||||
typedef tdual_int_scalar::t_host_um t_int_scalar_um;
|
||||
typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um;
|
||||
|
||||
typedef Kokkos::DualView<LAMMPS_NS::bigint, LMPDeviceType::array_layout, LMPDeviceType> tdual_bigint_scalar;
|
||||
typedef tdual_bigint_scalar::t_host t_bigint_scalar;
|
||||
typedef tdual_bigint_scalar::t_host_const t_bigint_scalar_const;
|
||||
typedef tdual_bigint_scalar::t_host_um t_bigint_scalar_um;
|
||||
typedef tdual_bigint_scalar::t_host_const_um t_bigint_scalar_const_um;
|
||||
|
||||
typedef Kokkos::DualView<LAMMPS_NS::tagint, LMPDeviceType::array_layout, LMPDeviceType> tdual_tagint_scalar;
|
||||
typedef tdual_tagint_scalar::t_host t_tagint_scalar;
|
||||
typedef tdual_tagint_scalar::t_host_const t_tagint_scalar_const;
|
||||
@ -994,6 +1016,13 @@ typedef tdual_int_1d::t_host_um t_int_1d_um;
|
||||
typedef tdual_int_1d::t_host_const_um t_int_1d_const_um;
|
||||
typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread;
|
||||
|
||||
typedef Kokkos::DualView<LAMMPS_NS::bigint*, LMPDeviceType::array_layout, LMPDeviceType> tdual_bigint_1d;
|
||||
typedef tdual_bigint_1d::t_host t_bigint_1d;
|
||||
typedef tdual_bigint_1d::t_host_const t_bigint_1d_const;
|
||||
typedef tdual_bigint_1d::t_host_um t_bigint_1d_um;
|
||||
typedef tdual_bigint_1d::t_host_const_um t_bigint_1d_const_um;
|
||||
typedef tdual_bigint_1d::t_host_const_randomread t_bigint_1d_randomread;
|
||||
|
||||
typedef Kokkos::DualView<int*[3], Kokkos::LayoutRight, LMPDeviceType> tdual_int_1d_3;
|
||||
typedef tdual_int_1d_3::t_host t_int_1d_3;
|
||||
typedef tdual_int_1d_3::t_host_const t_int_1d_3_const;
|
||||
|
||||
Reference in New Issue
Block a user