diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp index 308df20c0e..c0b263d736 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp @@ -53,7 +53,8 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK; datamask_modify = Q_MASK | X_MASK; - nmax = m_cap = 0; + nmax = 0; + m_cap_big = 0; allocated_flag = 0; nprev = 4; @@ -66,7 +67,7 @@ FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : buf = new double[2*nprev]; prev_last_rows_rank = 0; - d_mfill_offset = typename AT::t_int_scalar("acks2/kk:mfill_offset"); + d_mfill_offset = typename AT::t_bigint_scalar("acks2/kk:mfill_offset"); } /* ---------------------------------------------------------------------- */ @@ -418,10 +419,10 @@ void FixACKS2ReaxFFKokkos::pre_force(int /*vflag*/) template KOKKOS_INLINE_FUNCTION -void FixACKS2ReaxFFKokkos::num_neigh_item(int ii, int &maxneigh) const +void FixACKS2ReaxFFKokkos::num_neigh_item(int ii, bigint &totneigh) const { const int i = d_ilist[ii]; - maxneigh += d_numneigh[i]; + totneigh += d_numneigh[i]; } /* ---------------------------------------------------------------------- */ @@ -433,39 +434,39 @@ void FixACKS2ReaxFFKokkos::allocate_matrix() // determine the total space for the H matrix - m_cap = 0; + m_cap_big = 0; // limit scope of functor to allow deallocation of views { FixACKS2ReaxFFKokkosNumNeighFunctor neigh_functor(this); - Kokkos::parallel_reduce(nn,neigh_functor,m_cap); + Kokkos::parallel_reduce(nn,neigh_functor,m_cap_big); } // deallocate first to reduce memory overhead - d_firstnbr = typename AT::t_int_1d(); + d_firstnbr = typename AT::t_bigint_1d(); d_numnbrs = typename AT::t_int_1d(); d_jlist = typename AT::t_int_1d(); d_val = typename AT::t_ffloat_1d(); - d_firstnbr_X = typename AT::t_int_1d(); + d_firstnbr_X = typename AT::t_bigint_1d(); d_numnbrs_X = typename AT::t_int_1d(); d_jlist_X = typename AT::t_int_1d(); d_val_X = typename AT::t_ffloat_1d(); // H matrix - d_firstnbr = typename AT::t_int_1d("acks2/kk:firstnbr",nmax); + d_firstnbr = typename AT::t_bigint_1d("acks2/kk:firstnbr",nmax); d_numnbrs = typename AT::t_int_1d("acks2/kk:numnbrs",nmax); - d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap); - d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap); + d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap_big); + d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap_big); // X matrix - d_firstnbr_X = typename AT::t_int_1d("acks2/kk:firstnbr_X",nmax); + d_firstnbr_X = typename AT::t_bigint_1d("acks2/kk:firstnbr_X",nmax); d_numnbrs_X = typename AT::t_int_1d("acks2/kk:numnbrs_X",nmax); - d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap); - d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap); + d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap_big); + d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap_big); } /* ---------------------------------------------------------------------- */ @@ -566,7 +567,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2Zero, const int &ii) template template KOKKOS_INLINE_FUNCTION -void FixACKS2ReaxFFKokkos::compute_h_item(int ii, int &m_fill, const bool &final) const +void FixACKS2ReaxFFKokkos::compute_h_item(int ii, bigint &m_fill, const bool &final) const { const int i = d_ilist[ii]; int j,jj,jtype; @@ -619,7 +620,7 @@ void FixACKS2ReaxFFKokkos::compute_h_item(int ii, int &m_fill, const m_fill++; } if (final) - d_numnbrs[i] = m_fill - d_firstnbr[i]; + d_numnbrs[i] = int(m_fill - d_firstnbr[i]); } } @@ -698,9 +699,9 @@ void FixACKS2ReaxFFKokkos::compute_h_team( // calculate the global memory offset from where the H matrix values to be // calculated by the current team will be stored in d_val - int team_firstnbr_idx = 0; + bigint team_firstnbr_idx = 0; Kokkos::single(Kokkos::PerTeam(team), - [=](int &val) { + [=](bigint &val) { int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1]; val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); @@ -726,7 +727,7 @@ void FixACKS2ReaxFFKokkos::compute_h_team( int jnum = s_numnbrs[idx]; // calculate the write-offset for atom-i's first neighbor - int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; Kokkos::single(Kokkos::PerThread(team), [&]() { d_firstnbr[i] = atomi_firstnbr_idx; }); @@ -739,7 +740,7 @@ void FixACKS2ReaxFFKokkos::compute_h_team( // are processed in batches and the batch size is vector_length for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { - int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; // count the # of neighbor atoms with non-zero electrostatic // interaction coefficients with atom-i in the current batch @@ -782,7 +783,8 @@ void FixACKS2ReaxFFKokkos::compute_h_team( valid = false; if (x(j, 2) == ztmp && x(j, 1) < ytmp) valid = false; - if (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp) + if (x(j, 2) == ztmp && x(j, 1) == ytmp && + x(j, 0) < xtmp) valid = false; } } @@ -851,7 +853,7 @@ double FixACKS2ReaxFFKokkos::calculate_H_k(const F_FLOAT &r, const F taper = taper * r + d_tap[0]; denom = r * r * r + shld; - denom = pow(denom,1.0/3.0); + denom = cbrt(denom); return taper * EV_TO_KCAL_PER_MOL / denom; } @@ -861,7 +863,7 @@ double FixACKS2ReaxFFKokkos::calculate_H_k(const F_FLOAT &r, const F template template KOKKOS_INLINE_FUNCTION -void FixACKS2ReaxFFKokkos::compute_x_item(int ii, int &m_fill, const bool &final) const +void FixACKS2ReaxFFKokkos::compute_x_item(int ii, bigint &m_fill, const bool &final) const { // The X_diag array is duplicated for OpenMP, atomic for GPU, and neither for Serial auto v_X_diag = ScatterViewHelper,decltype(dup_X_diag),decltype(ndup_X_diag)>::get(dup_X_diag,ndup_X_diag); @@ -927,7 +929,7 @@ void FixACKS2ReaxFFKokkos::compute_x_item(int ii, int &m_fill, const } if (final) { a_X_diag[i] += tmp; - d_numnbrs_X[i] = m_fill - d_firstnbr_X[i]; + d_numnbrs_X[i] = int(m_fill - d_firstnbr_X[i]); } } } @@ -1005,9 +1007,9 @@ void FixACKS2ReaxFFKokkos::compute_x_team( // calculate the global memory offset from where the H matrix values to be // calculated by the current team will be stored in d_val_X - int team_firstnbr_idx = 0; + bigint team_firstnbr_idx = 0; Kokkos::single(Kokkos::PerTeam(team), - [=](int &val) { + [=](bigint &val) { int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1]; val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); @@ -1033,7 +1035,7 @@ void FixACKS2ReaxFFKokkos::compute_x_team( int jnum = s_numnbrs[idx]; // calculate the write-offset for atom-i's first neighbor - int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; Kokkos::single(Kokkos::PerThread(team), [&]() { d_firstnbr_X[i] = atomi_firstnbr_idx; }); @@ -1046,7 +1048,7 @@ void FixACKS2ReaxFFKokkos::compute_x_team( // are processed in batches and the batch size is vector_length for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { - int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; // count the # of neighbor atoms with non-zero electrostatic // interaction coefficients with atom-i in the current batch @@ -1464,7 +1466,7 @@ void FixACKS2ReaxFFKokkos::operator() (TagACKS2SparseMatvec3_Half::operator() (TagACKS2SparseMatvec3_Half::operator() (TagACKS2SparseMatvec3_Full, c F_FLOAT sum; F_FLOAT sum2; - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &sum) { + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT &sum) { const int j = d_jlist(jj); sum += d_val(jj) * d_xx[j]; }, sum); team.team_barrier(); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const int &jj, F_FLOAT &sum2) { + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const bigint &jj, F_FLOAT &sum2) { const int j = d_jlist_X(jj); sum2 += d_val_X(jj) * d_xx[NN + j]; }, sum2); @@ -1865,8 +1867,8 @@ double FixACKS2ReaxFFKokkos::memory_usage() bytes += nmax*4 * sizeof(double); // storage bytes += size*11 * sizeof(double); // storage bytes += n_cap*4 * sizeof(int); // matrix... - bytes += m_cap*2 * sizeof(int); - bytes += m_cap*2 * sizeof(double); + bytes += m_cap_big*2 * sizeof(int); + bytes += m_cap_big*2 * sizeof(double); return bytes; } diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.h b/src/KOKKOS/fix_acks2_reaxff_kokkos.h index cb16b4cd24..6adca39d17 100644 --- a/src/KOKKOS/fix_acks2_reaxff_kokkos.h +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.h @@ -74,7 +74,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { DAT::tdual_ffloat_1d get_s() {return k_s;} KOKKOS_INLINE_FUNCTION - void num_neigh_item(int, int&) const; + void num_neigh_item(int, bigint&) const; KOKKOS_INLINE_FUNCTION void operator()(TagACKS2Zero, const int&) const; @@ -84,7 +84,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { template KOKKOS_INLINE_FUNCTION - void compute_h_item(int, int &, const bool &) const; + void compute_h_item(int, bigint &, const bool &) const; template KOKKOS_INLINE_FUNCTION @@ -92,7 +92,7 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { template KOKKOS_INLINE_FUNCTION - void compute_x_item(int, int &, const bool &) const; + void compute_x_item(int, bigint &, const bool &) const; template KOKKOS_INLINE_FUNCTION @@ -173,8 +173,9 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { int allocated_flag, last_allocate; int need_dup,prev_last_rows_rank; double* buf; + bigint m_cap_big; - typename AT::t_int_scalar d_mfill_offset; + typename AT::t_bigint_scalar d_mfill_offset; typedef Kokkos::DualView tdual_int_1d; Kokkos::DualView k_params; @@ -197,12 +198,12 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { DAT::tdual_ffloat_2d k_bcut; typename AT::t_ffloat_2d d_bcut; - typename AT::t_int_1d d_firstnbr; + typename AT::t_bigint_1d d_firstnbr; typename AT::t_int_1d d_numnbrs; typename AT::t_int_1d d_jlist; typename AT::t_ffloat_1d d_val; - typename AT::t_int_1d d_firstnbr_X; + typename AT::t_bigint_1d d_firstnbr_X; typename AT::t_int_1d d_numnbrs_X; typename AT::t_int_1d d_jlist_X; typename AT::t_ffloat_1d d_val_X; @@ -264,21 +265,21 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase { template struct FixACKS2ReaxFFKokkosNumNeighFunctor { typedef DeviceType device_type; - typedef int value_type; + typedef bigint value_type; FixACKS2ReaxFFKokkos c; FixACKS2ReaxFFKokkosNumNeighFunctor(FixACKS2ReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &maxneigh) const { - c.num_neigh_item(ii, maxneigh); + void operator()(const int ii, bigint &totneigh) const { + c.num_neigh_item(ii, totneigh); } }; template struct FixACKS2ReaxFFKokkosComputeHFunctor { int atoms_per_team, vector_length; - typedef int value_type; + typedef bigint value_type; typedef Kokkos::ScratchMemorySpace scratch_space; FixACKS2ReaxFFKokkos c; @@ -293,7 +294,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor { }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &m_fill, const bool &final) const { + void operator()(const int ii, bigint &m_fill, const bool &final) const { c.template compute_h_item(ii,m_fill,final); } @@ -325,7 +326,7 @@ struct FixACKS2ReaxFFKokkosComputeHFunctor { template struct FixACKS2ReaxFFKokkosComputeXFunctor { int atoms_per_team, vector_length; - typedef int value_type; + typedef bigint value_type; typedef Kokkos::ScratchMemorySpace scratch_space; FixACKS2ReaxFFKokkos c; @@ -340,7 +341,7 @@ struct FixACKS2ReaxFFKokkosComputeXFunctor { }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &m_fill, const bool &final) const { + void operator()(const int ii, bigint &m_fill, const bool &final) const { c.template compute_x_item(ii,m_fill,final); } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index deb41944bc..f93f6cb70e 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -62,7 +62,8 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : datamask_read = X_MASK | V_MASK | F_MASK | Q_MASK | MASK_MASK | TYPE_MASK | TAG_MASK; datamask_modify = X_MASK; - nmax = m_cap = 0; + nmax = 0; + m_cap_big = 0; allocated_flag = 0; nprev = 4; maxexchange = nprev*2; @@ -71,7 +72,7 @@ FixQEqReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : memory->destroy(t_hist); grow_arrays(atom->nmax); - d_mfill_offset = typename AT::t_int_scalar("qeq/kk:mfill_offset"); + d_mfill_offset = typename AT::t_bigint_scalar("qeq/kk:mfill_offset"); converged = 0; } @@ -301,10 +302,10 @@ void FixQEqReaxFFKokkos::pre_force(int /*vflag*/) template KOKKOS_INLINE_FUNCTION -void FixQEqReaxFFKokkos::num_neigh_item(int ii, int &maxneigh) const +void FixQEqReaxFFKokkos::num_neigh_item(int ii, bigint &totneigh) const { const int i = d_ilist[ii]; - maxneigh += d_numneigh[i]; + totneigh += d_numneigh[i]; } /* ---------------------------------------------------------------------- */ @@ -316,25 +317,25 @@ void FixQEqReaxFFKokkos::allocate_matrix() // determine the total space for the H matrix - m_cap = 0; + m_cap_big = 0; // limit scope of functor to allow deallocation of views { FixQEqReaxFFKokkosNumNeighFunctor neigh_functor(this); - Kokkos::parallel_reduce(nn,neigh_functor,m_cap); + Kokkos::parallel_reduce(nn,neigh_functor,m_cap_big); } // deallocate first to reduce memory overhead - d_firstnbr = typename AT::t_int_1d(); + d_firstnbr = typename AT::t_bigint_1d(); d_numnbrs = typename AT::t_int_1d(); d_jlist = typename AT::t_int_1d(); d_val = typename AT::t_ffloat_1d(); - d_firstnbr = typename AT::t_int_1d("qeq/kk:firstnbr",nmax); + d_firstnbr = typename AT::t_bigint_1d("qeq/kk:firstnbr",nmax); d_numnbrs = typename AT::t_int_1d("qeq/kk:numnbrs",nmax); - d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap); - d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap); + d_jlist = typename AT::t_int_1d("qeq/kk:jlist",m_cap_big); + d_val = typename AT::t_ffloat_1d("qeq/kk:val",m_cap_big); } /* ---------------------------------------------------------------------- */ @@ -405,7 +406,7 @@ void FixQEqReaxFFKokkos::operator()(TagQEqZero, const int &ii) const template template KOKKOS_INLINE_FUNCTION -void FixQEqReaxFFKokkos::compute_h_item(int ii, int &m_fill, const bool &final) const +void FixQEqReaxFFKokkos::compute_h_item(int ii, bigint &m_fill, const bool &final) const { const int i = d_ilist[ii]; int j,jj,jtype; @@ -458,7 +459,7 @@ void FixQEqReaxFFKokkos::compute_h_item(int ii, int &m_fill, const b m_fill++; } if (final) - d_numnbrs[i] = m_fill - d_firstnbr[i]; + d_numnbrs[i] = int(m_fill - d_firstnbr[i]); } } @@ -537,9 +538,9 @@ void FixQEqReaxFFKokkos::compute_h_team( // calculate the global memory offset from where the H matrix values to be // calculated by the current team will be stored in d_val - int team_firstnbr_idx = 0; + bigint team_firstnbr_idx = 0; Kokkos::single(Kokkos::PerTeam(team), - [=](int &val) { + [=](bigint &val) { int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + s_numnbrs[lastatom - firstatom - 1]; val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); @@ -565,7 +566,7 @@ void FixQEqReaxFFKokkos::compute_h_team( int jnum = s_numnbrs[idx]; // removed "const" to work around GCC 7 bug // calculate the write-offset for atom-i's first neighbor - int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + bigint atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; Kokkos::single(Kokkos::PerThread(team), [&]() { d_firstnbr[i] = atomi_firstnbr_idx; }); @@ -578,7 +579,7 @@ void FixQEqReaxFFKokkos::compute_h_team( // are processed in batches and the batch size is vector_length for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { - int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + bigint atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; // count the # of neighbor atoms with non-zero electrostatic // interaction coefficients with atom-i in the current batch @@ -935,7 +936,7 @@ void FixQEqReaxFFKokkos::operator()(TagQEqSparseMatvec2_Half::operator()(TagQEqSparseMatvec2_Full, const const int i = d_ilist[k]; if (mask[i] & groupbit) { F_FLOAT2 doitmp; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT2& doi) { + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const bigint &jj, F_FLOAT2& doi) { const int j = d_jlist(jj); const auto d_val_jj = d_val(jj); if (!(converged & 1)) @@ -1286,8 +1287,8 @@ double FixQEqReaxFFKokkos::memory_usage() bytes = atom->nmax*nprev*2 * sizeof(F_FLOAT); // s_hist & t_hist bytes += (double)atom->nmax*8 * sizeof(F_FLOAT); // storage bytes += (double)n_cap*2 * sizeof(int); // matrix... - bytes += (double)m_cap * sizeof(int); - bytes += (double)m_cap * sizeof(F_FLOAT); + bytes += (double)m_cap_big * sizeof(int); + bytes += (double)m_cap_big * sizeof(F_FLOAT); return bytes; } diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 92026b209d..0733a518a2 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -70,7 +70,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { void pre_force(int) override; KOKKOS_INLINE_FUNCTION - void num_neigh_item(int, int&) const; + void num_neigh_item(int, bigint&) const; KOKKOS_INLINE_FUNCTION void operator()(TagQEqZero, const int&) const; @@ -80,7 +80,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { template KOKKOS_INLINE_FUNCTION - void compute_h_item(int, int &, const bool &) const; + void compute_h_item(int, bigint &, const bool &) const; template KOKKOS_INLINE_FUNCTION @@ -201,8 +201,9 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { int allocated_flag, last_allocate; int need_dup; int converged; + bigint m_cap_big; - typename AT::t_int_scalar d_mfill_offset; + typename AT::t_bigint_scalar d_mfill_offset; typedef Kokkos::DualView tdual_int_1d; Kokkos::DualView k_params; @@ -227,7 +228,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { DAT::tdual_ffloat_1d k_tap; typename AT::t_ffloat_1d d_tap; - typename AT::t_int_1d d_firstnbr; + typename AT::t_bigint_1d d_firstnbr; typename AT::t_int_1d d_numnbrs; typename AT::t_int_1d d_jlist; typename AT::t_ffloat_1d d_val; @@ -290,21 +291,21 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { template struct FixQEqReaxFFKokkosNumNeighFunctor { typedef DeviceType device_type; - typedef int value_type; + typedef bigint value_type; FixQEqReaxFFKokkos c; FixQEqReaxFFKokkosNumNeighFunctor(FixQEqReaxFFKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &maxneigh) const { - c.num_neigh_item(ii, maxneigh); + void operator()(const int ii, bigint &totneigh) const { + c.num_neigh_item(ii, totneigh); } }; template struct FixQEqReaxFFKokkosComputeHFunctor { int atoms_per_team, vector_length; - typedef int value_type; + typedef bigint value_type; typedef Kokkos::ScratchMemorySpace scratch_space; FixQEqReaxFFKokkos c; @@ -319,7 +320,7 @@ struct FixQEqReaxFFKokkosComputeHFunctor { }; KOKKOS_INLINE_FUNCTION - void operator()(const int ii, int &m_fill, const bool &final) const { + void operator()(const int ii, bigint &m_fill, const bool &final) const { c.template compute_h_item(ii,m_fill,final); } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 58b9436af6..7dba47f889 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -638,33 +638,32 @@ void KokkosLMP::accelerator(int narg, char **arg) called by Finish ------------------------------------------------------------------------- */ -int KokkosLMP::neigh_count(int m) +bigint KokkosLMP::neigh_count(int m) { - int inum = 0; - int nneigh = 0; - - ArrayTypes::t_int_1d h_ilist; - ArrayTypes::t_int_1d h_numneigh; + bigint nneigh = 0; NeighborKokkos *nk = (NeighborKokkos *) neighbor; if (nk->lists[m]->execution_space == Host) { NeighListKokkos* nlistKK = (NeighListKokkos*) nk->lists[m]; - inum = nlistKK->inum; - h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist); - h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh); - Kokkos::deep_copy(h_ilist,nlistKK->d_ilist); - Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh); + int inum = nlistKK->inum; + auto d_ilist = nlistKK->d_ilist; + auto d_numneigh = nlistKK->d_numneigh; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,inum), LAMMPS_LAMBDA(int ii, bigint &nneigh) { + const int i = d_ilist[ii]; + nneigh += d_numneigh[i]; + },nneigh); + } else if (nk->lists[m]->execution_space == Device) { NeighListKokkos* nlistKK = (NeighListKokkos*) nk->lists[m]; - inum = nlistKK->inum; - h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist); - h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh); - Kokkos::deep_copy(h_ilist,nlistKK->d_ilist); - Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh); + int inum = nlistKK->inum; + auto d_ilist = nlistKK->d_ilist; + auto d_numneigh = nlistKK->d_numneigh; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,inum), LAMMPS_LAMBDA(int ii, bigint &nneigh) { + const int i = d_ilist[ii]; + nneigh += d_numneigh[i]; + },nneigh); } - for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]]; - return nneigh; } diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 748aff7f83..419de62dec 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -64,7 +64,7 @@ class KokkosLMP : protected Pointers { static void initialize(const Kokkos::InitializationSettings&, Error *); static void finalize(); void accelerator(int, char **); - int neigh_count(int); + bigint neigh_count(int); template int need_dup(int qeq_flag = 0) diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 7f0eb5c105..e4c1bdda6f 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -641,6 +641,14 @@ typedef tdual_int_scalar::t_dev_const t_int_scalar_const; typedef tdual_int_scalar::t_dev_um t_int_scalar_um; typedef tdual_int_scalar::t_dev_const_um t_int_scalar_const_um; +typedef Kokkos:: + DualView tdual_bigint_scalar; +typedef tdual_bigint_scalar::t_dev t_bigint_scalar; +typedef tdual_bigint_scalar::t_dev_const t_bigint_scalar_const; +typedef tdual_bigint_scalar::t_dev_um t_bigint_scalar_um; +typedef tdual_bigint_scalar::t_dev_const_um t_bigint_scalar_const_um; +typedef tdual_bigint_scalar::t_dev_const_randomread t_bigint_scalar_randomread; + typedef Kokkos:: DualView tdual_tagint_scalar; typedef tdual_tagint_scalar::t_dev t_tagint_scalar; @@ -666,6 +674,14 @@ typedef tdual_int_1d::t_dev_um t_int_1d_um; typedef tdual_int_1d::t_dev_const_um t_int_1d_const_um; typedef tdual_int_1d::t_dev_const_randomread t_int_1d_randomread; +typedef Kokkos:: + DualView tdual_bigint_1d; +typedef tdual_bigint_1d::t_dev t_bigint_1d; +typedef tdual_bigint_1d::t_dev_const t_bigint_1d_const; +typedef tdual_bigint_1d::t_dev_um t_bigint_1d_um; +typedef tdual_bigint_1d::t_dev_const_um t_bigint_1d_const_um; +typedef tdual_bigint_1d::t_dev_const_randomread t_bigint_1d_randomread; + typedef Kokkos:: DualView tdual_int_1d_3; typedef tdual_int_1d_3::t_dev t_int_1d_3; @@ -974,6 +990,12 @@ typedef tdual_int_scalar::t_host_const t_int_scalar_const; typedef tdual_int_scalar::t_host_um t_int_scalar_um; typedef tdual_int_scalar::t_host_const_um t_int_scalar_const_um; +typedef Kokkos::DualView tdual_bigint_scalar; +typedef tdual_bigint_scalar::t_host t_bigint_scalar; +typedef tdual_bigint_scalar::t_host_const t_bigint_scalar_const; +typedef tdual_bigint_scalar::t_host_um t_bigint_scalar_um; +typedef tdual_bigint_scalar::t_host_const_um t_bigint_scalar_const_um; + typedef Kokkos::DualView tdual_tagint_scalar; typedef tdual_tagint_scalar::t_host t_tagint_scalar; typedef tdual_tagint_scalar::t_host_const t_tagint_scalar_const; @@ -994,6 +1016,13 @@ typedef tdual_int_1d::t_host_um t_int_1d_um; typedef tdual_int_1d::t_host_const_um t_int_1d_const_um; typedef tdual_int_1d::t_host_const_randomread t_int_1d_randomread; +typedef Kokkos::DualView tdual_bigint_1d; +typedef tdual_bigint_1d::t_host t_bigint_1d; +typedef tdual_bigint_1d::t_host_const t_bigint_1d_const; +typedef tdual_bigint_1d::t_host_um t_bigint_1d_um; +typedef tdual_bigint_1d::t_host_const_um t_bigint_1d_const_um; +typedef tdual_bigint_1d::t_host_const_randomread t_bigint_1d_randomread; + typedef Kokkos::DualView tdual_int_1d_3; typedef tdual_int_1d_3::t_host t_int_1d_3; typedef tdual_int_1d_3::t_host_const t_int_1d_3_const;