Remove use_count check, reduce memory use in ReaxFF

This commit is contained in:
Stan Gerald Moore
2022-05-19 09:09:57 -06:00
parent 06ad119844
commit d8d6884def
3 changed files with 29 additions and 57 deletions

View File

@ -284,7 +284,6 @@ template <typename TYPE>
static void realloc_kokkos(TYPE &data, const char *name, int n1)
{
data = TYPE();
assert(data.use_count() == 0);
data = TYPE(Kokkos::NoInit(std::string(name)),n1);
}
@ -292,7 +291,6 @@ template <typename TYPE>
static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2)
{
data = TYPE();
assert(data.use_count() == 0);
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2);
}
@ -300,7 +298,6 @@ template <typename TYPE>
static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3)
{
data = TYPE();
assert(data.use_count() == 0);
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3);
}
@ -308,7 +305,6 @@ template <typename TYPE>
static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4)
{
data = TYPE();
assert(data.use_count() == 0);
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4);
}
@ -316,7 +312,6 @@ template <typename TYPE>
static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5)
{
data = TYPE();
assert(data.use_count() == 0);
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5);
}
@ -324,7 +319,6 @@ template <typename TYPE>
static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6)
{
data = TYPE();
assert(data.use_count() == 0);
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6);
}

View File

@ -884,17 +884,10 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairReaxBuildListsFull>(0,ignum),*this);
// allocate duplicated memory
if (need_dup) {
if (need_dup)
dup_CdDelta = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_CdDelta);
//dup_Cdbo = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbo);
//dup_Cdbopi = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbopi);
//dup_Cdbopi2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_Cdbopi2);
} else {
else
ndup_CdDelta = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_CdDelta);
//ndup_Cdbo = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbo);
//ndup_Cdbopi = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbopi);
//ndup_Cdbopi2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_Cdbopi2);
}
// reduction over duplicated memory
if (need_dup)
@ -1034,26 +1027,12 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
if (need_dup) {
Kokkos::Experimental::contribute(d_dDeltap_self, dup_dDeltap_self); // needed in ComputeBond2
Kokkos::Experimental::contribute(d_CdDelta, dup_CdDelta); // needed in ComputeBond2
//Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in UpdateBond, but also used in UpdateBond
//Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in UpdateBond, but also used in UpdateBond
//Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in UpdateBond, but also used in UpdateBond
//dup_Cdbo.reset_except(d_Cdbo);
//dup_Cdbopi.reset_except(d_Cdbopi);
//dup_Cdbopi2.reset_except(d_Cdbopi2);
}
// Bond force
if (neighflag == HALF) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairReaxUpdateBond<HALF>>(0,ignum),*this);
// reduction over duplicated memory
//if (need_dup) {
// Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2
// Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2
// Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2
//}
if (vflag_either)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairReaxComputeBond2<HALF,1>>(0,ignum),*this,ev);
else
@ -1063,13 +1042,6 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
} else { //if (neighflag == HALFTHREAD) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairReaxUpdateBond<HALFTHREAD>>(0,ignum),*this);
// reduction over duplicated memory
//if (need_dup) {
// Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2
// Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2
// Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2
//}
if (vflag_either)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairReaxComputeBond2<HALFTHREAD,1>>(0,ignum),*this,ev);
else
@ -1117,7 +1089,7 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
copymode = 0;
// free duplicated memory
// free scatterview memory
if (need_dup) {
dup_f = decltype(dup_f)();
dup_eatom = decltype(dup_eatom)();
@ -1125,9 +1097,13 @@ void PairReaxFFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
dup_dDeltap_self = decltype(dup_dDeltap_self)();
dup_total_bo = decltype(dup_total_bo)();
dup_CdDelta = decltype(dup_CdDelta)();
//dup_Cdbo = decltype(dup_Cdbo)();
//dup_Cdbopi = decltype(dup_Cdbopi)();
//dup_Cdbopi2 = decltype(dup_Cdbopi2)();
} else {
ndup_f = decltype(ndup_f)();
ndup_eatom = decltype(ndup_eatom)();
ndup_vatom = decltype(ndup_vatom)();
ndup_dDeltap_self = decltype(ndup_dDeltap_self)();
ndup_total_bo = decltype(ndup_total_bo)();
ndup_CdDelta = decltype(ndup_CdDelta)();
}
d_neighbors = typename AT::t_neighbors_2d();
@ -1513,6 +1489,17 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxComputeTabulatedLJCoulo
template<class DeviceType>
void PairReaxFFKokkos<DeviceType>::allocate_array()
{
// free scatterview memory
if (need_dup) {
dup_dDeltap_self = decltype(dup_dDeltap_self)();
dup_total_bo = decltype(dup_total_bo)();
dup_CdDelta = decltype(dup_CdDelta)();
} else {
ndup_dDeltap_self = decltype(ndup_dDeltap_self)();
ndup_total_bo = decltype(ndup_total_bo)();
ndup_CdDelta = decltype(ndup_CdDelta)();
}
if (cut_hbsq > 0.0) {
MemKK::realloc_kokkos(d_hb_first,"reaxff/kk:hb_first",nmax);
MemKK::realloc_kokkos(d_hb_num,"reaxff/kk:hb_num",nmax);
@ -3482,9 +3469,6 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxUpdateBond<NEIGHFLAG>,
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,KKDeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value>> a_Cdbo = d_Cdbo;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,KKDeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value>> a_Cdbopi = d_Cdbopi;
Kokkos::View<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout,KKDeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value>> a_Cdbopi2 = d_Cdbopi2;
//auto a_Cdbo = dup_Cdbo.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
//auto a_Cdbopi = dup_Cdbopi.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
//auto a_Cdbopi2 = dup_Cdbopi2.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
const int i = d_ilist[ii];
const tagint itag = tag(i);

View File

@ -435,6 +435,8 @@ class PairReaxFFKokkos : public PairReaxFF {
typename AT::t_ffloat_2d_dl d_C1dbopi2, d_C2dbopi2, d_C3dbopi2, d_C4dbopi2;
typename AT::t_ffloat_2d_dl d_Cdbo, d_Cdbopi, d_Cdbopi2, d_dDeltap_self;
int need_dup;
using KKDeviceType = typename KKDevice<DeviceType>::value;
template<typename DataType, typename Layout>
@ -443,27 +445,19 @@ class PairReaxFFKokkos : public PairReaxFF {
template<typename DataType, typename Layout>
using NonDupScatterView = KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterNonDuplicated>;
DupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> dup_total_bo;
DupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> dup_CdDelta;
DupScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout> dup_eatom;
DupScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout> dup_f;
DupScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout> dup_eatom;
DupScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout> dup_vatom;
DupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> dup_dDeltap_self;
DupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> dup_Cdbo;
DupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> dup_Cdbopi;
DupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> dup_Cdbopi2;
DupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> dup_total_bo;
DupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> dup_CdDelta;
NonDupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> ndup_total_bo;
NonDupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> ndup_CdDelta;
NonDupScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout> ndup_eatom;
NonDupScatterView<F_FLOAT*[3], typename DAT::t_f_array::array_layout> ndup_f;
NonDupScatterView<E_FLOAT*, typename DAT::t_efloat_1d::array_layout> ndup_eatom;
NonDupScatterView<F_FLOAT*[6], typename DAT::t_virial_array::array_layout> ndup_vatom;
NonDupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> ndup_dDeltap_self;
NonDupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> ndup_Cdbo;
NonDupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> ndup_Cdbopi;
NonDupScatterView<F_FLOAT**, typename DAT::t_ffloat_2d_dl::array_layout> ndup_Cdbopi2;
int need_dup;
NonDupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> ndup_total_bo;
NonDupScatterView<F_FLOAT*, typename DAT::t_float_1d::array_layout> ndup_CdDelta;
typedef Kokkos::DualView<F_FLOAT**[7],typename DeviceType::array_layout,DeviceType> tdual_ffloat_2d_n7;
typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread;