From 06ad1198448cab2a3d17f73306dac22226684339 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 18 May 2022 17:15:36 -0600 Subject: [PATCH 1/7] Initial refactor of view allocation in Kokkos package --- src/KOKKOS/memory_kokkos.h | 22 ++- src/KOKKOS/pair_pace_kokkos.cpp | 206 ++++++++++++++--------------- src/KOKKOS/pair_reaxff_kokkos.cpp | 161 +++++++--------------- src/KOKKOS/pair_reaxff_kokkos.h | 1 - src/KOKKOS/pair_snap_kokkos.h | 33 +---- src/KOKKOS/pair_snap_kokkos_impl.h | 34 ++--- src/KOKKOS/sna_kokkos_impl.h | 201 ++++++++++++++-------------- 7 files changed, 283 insertions(+), 375 deletions(-) diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index e82746f34d..092e20b6c7 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -20,6 +20,8 @@ namespace LAMMPS_NS { +typedef MemoryKokkos MemKK; + class MemoryKokkos : public Memory { public: MemoryKokkos(class LAMMPS *lmp) : Memory(lmp) {} @@ -279,44 +281,50 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type** &array) ------------------------------------------------------------------------- */ template -void realloc_kokkos(TYPE &data, const char *name, int n1) +static void realloc_kokkos(TYPE &data, const char *name, int n1) { data = TYPE(); + assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1); } template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2) +static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2) { data = TYPE(); + assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2); } template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3) +static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3) { data = TYPE(); + assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3); } template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4) +static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4) { data = TYPE(); + assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4); } template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5) +static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5) { data = TYPE(); + assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5); } template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6) +static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6) { data = TYPE(); + assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6); } @@ -325,7 +333,7 @@ void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4 ------------------------------------------------------------------------- */ template -double memory_usage(TYPE &data) +static double memory_usage(TYPE &data) { return data.span() * sizeof(typename TYPE::value_type); } diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp index ddf3bf9107..9d78b63167 100644 --- a/src/KOKKOS/pair_pace_kokkos.cpp +++ b/src/KOKKOS/pair_pace_kokkos.cpp @@ -104,55 +104,55 @@ void PairPACEKokkos::grow(int natom, int maxneigh) if ((int)A.extent(0) < natom) { - memoryKK->realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase); + MemKK::realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase); - memoryKK->realloc_kokkos(A_list, "pace:A_list", natom, idx_rho_max, basis_set->rankmax); + MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_rho_max, basis_set->rankmax); //size is +1 of max to avoid out-of-boundary array access in double-triangular scheme - memoryKK->realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_rho_max, basis_set->rankmax + 1); + MemKK::realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_rho_max, basis_set->rankmax + 1); - memoryKK->realloc_kokkos(e_atom, "pace:e_atom", natom); - memoryKK->realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion - memoryKK->realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion + MemKK::realloc_kokkos(e_atom, "pace:e_atom", natom); + MemKK::realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion + MemKK::realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion - memoryKK->realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase); + MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase); // hard-core repulsion - memoryKK->realloc_kokkos(rho_core, "pace:rho_core", natom); - memoryKK->realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom); - memoryKK->realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_rho_max, basis_set->rankmax); + MemKK::realloc_kokkos(rho_core, "pace:rho_core", natom); + MemKK::realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom); + MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_rho_max, basis_set->rankmax); } if (((int)ylm.extent(0) < natom) || ((int)ylm.extent(1) < maxneigh)) { // radial functions - memoryKK->realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1); - memoryKK->realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1); - memoryKK->realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase); - memoryKK->realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase); + MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1); + MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1); + MemKK::realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase); + MemKK::realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase); const int max_num_functions = MAX(nradbase, nradmax*(lmax + 1)); - memoryKK->realloc_kokkos(d_values, "pace:d_values", natom, maxneigh, max_num_functions); - memoryKK->realloc_kokkos(d_derivatives, "pace:d_derivatives", natom, maxneigh, max_num_functions); + MemKK::realloc_kokkos(d_values, "pace:d_values", natom, maxneigh, max_num_functions); + MemKK::realloc_kokkos(d_derivatives, "pace:d_derivatives", natom, maxneigh, max_num_functions); // hard-core repulsion - memoryKK->realloc_kokkos(cr, "pace:cr", natom, maxneigh); - memoryKK->realloc_kokkos(dcr, "pace:dcr", natom, maxneigh); + MemKK::realloc_kokkos(cr, "pace:cr", natom, maxneigh); + MemKK::realloc_kokkos(dcr, "pace:dcr", natom, maxneigh); // spherical harmonics - memoryKK->realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); // short neigh list - memoryKK->realloc_kokkos(d_ncount, "pace:ncount", natom); - memoryKK->realloc_kokkos(d_mu, "pace:mu", natom, maxneigh); - memoryKK->realloc_kokkos(d_rhats, "pace:rhats", natom, maxneigh); - memoryKK->realloc_kokkos(d_rnorms, "pace:rnorms", natom, maxneigh); - memoryKK->realloc_kokkos(d_nearest, "pace:nearest", natom, maxneigh); + MemKK::realloc_kokkos(d_ncount, "pace:ncount", natom); + MemKK::realloc_kokkos(d_mu, "pace:mu", natom, maxneigh); + MemKK::realloc_kokkos(d_rhats, "pace:rhats", natom, maxneigh); + MemKK::realloc_kokkos(d_rnorms, "pace:rnorms", natom, maxneigh); + MemKK::realloc_kokkos(d_nearest, "pace:nearest", natom, maxneigh); - memoryKK->realloc_kokkos(f_ij, "pace:f_ij", natom, maxneigh); + MemKK::realloc_kokkos(f_ij, "pace:f_ij", natom, maxneigh); } } @@ -163,11 +163,11 @@ void PairPACEKokkos::copy_pertype() { auto basis_set = aceimpl->basis_set; - memoryKK->realloc_kokkos(d_rho_core_cutoff, "pace:rho_core_cutoff", nelements); - memoryKK->realloc_kokkos(d_drho_core_cutoff, "pace:drho_core_cutoff", nelements); - memoryKK->realloc_kokkos(d_E0vals, "pace:E0vals", nelements); - memoryKK->realloc_kokkos(d_ndensity, "pace:ndensity", nelements); - memoryKK->realloc_kokkos(d_npoti, "pace:npoti", nelements); + MemKK::realloc_kokkos(d_rho_core_cutoff, "pace:rho_core_cutoff", nelements); + MemKK::realloc_kokkos(d_drho_core_cutoff, "pace:drho_core_cutoff", nelements); + MemKK::realloc_kokkos(d_E0vals, "pace:E0vals", nelements); + MemKK::realloc_kokkos(d_ndensity, "pace:ndensity", nelements); + MemKK::realloc_kokkos(d_npoti, "pace:npoti", nelements); auto h_rho_core_cutoff = Kokkos::create_mirror_view(d_rho_core_cutoff); auto h_drho_core_cutoff = Kokkos::create_mirror_view(d_drho_core_cutoff); @@ -196,8 +196,8 @@ void PairPACEKokkos::copy_pertype() Kokkos::deep_copy(d_ndensity, h_ndensity); Kokkos::deep_copy(d_npoti, h_npoti); - memoryKK->realloc_kokkos(d_wpre, "pace:wpre", nelements, basis_set->ndensitymax); - memoryKK->realloc_kokkos(d_mexp, "pace:mexp", nelements, basis_set->ndensitymax); + MemKK::realloc_kokkos(d_wpre, "pace:wpre", nelements, basis_set->ndensitymax); + MemKK::realloc_kokkos(d_mexp, "pace:mexp", nelements, basis_set->ndensitymax); auto h_wpre = Kokkos::create_mirror_view(d_wpre); auto h_mexp = Kokkos::create_mirror_view(d_mexp); @@ -266,7 +266,7 @@ void PairPACEKokkos::copy_tilde() idx_rho_max = 0; int total_basis_size_max = 0; - memoryKK->realloc_kokkos(d_idx_rho_count, "pace:idx_rho_count", nelements); + MemKK::realloc_kokkos(d_idx_rho_count, "pace:idx_rho_count", nelements); auto h_idx_rho_count = Kokkos::create_mirror_view(d_idx_rho_count); for (int n = 0; n < nelements; n++) { @@ -295,14 +295,14 @@ void PairPACEKokkos::copy_tilde() Kokkos::deep_copy(d_idx_rho_count, h_idx_rho_count); - memoryKK->realloc_kokkos(d_rank, "pace:rank", nelements, total_basis_size_max); - memoryKK->realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_basis_size_max); - memoryKK->realloc_kokkos(d_offsets, "pace:offsets", nelements, idx_rho_max); - memoryKK->realloc_kokkos(d_mus, "pace:mus", nelements, total_basis_size_max, basis_set->rankmax); - memoryKK->realloc_kokkos(d_ns, "pace:ns", nelements, total_basis_size_max, basis_set->rankmax); - memoryKK->realloc_kokkos(d_ls, "pace:ls", nelements, total_basis_size_max, basis_set->rankmax); - memoryKK->realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_rho_max, basis_set->rankmax); - memoryKK->realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_rho_max, basis_set->ndensitymax); + MemKK::realloc_kokkos(d_rank, "pace:rank", nelements, total_basis_size_max); + MemKK::realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_basis_size_max); + MemKK::realloc_kokkos(d_offsets, "pace:offsets", nelements, idx_rho_max); + MemKK::realloc_kokkos(d_mus, "pace:mus", nelements, total_basis_size_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ns, "pace:ns", nelements, total_basis_size_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ls, "pace:ls", nelements, total_basis_size_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_rho_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_rho_max, basis_set->ndensitymax); auto h_rank = Kokkos::create_mirror_view(d_rank); auto h_num_ms_combs = Kokkos::create_mirror_view(d_num_ms_combs); @@ -418,10 +418,10 @@ void PairPACEKokkos::init_style() // spherical harmonics - memoryKK->realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(cl, "pace:cl", lmax + 1); - memoryKK->realloc_kokkos(dl, "pace:dl", lmax + 1); + MemKK::realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(cl, "pace:cl", lmax + 1); + MemKK::realloc_kokkos(dl, "pace:dl", lmax + 1); pre_compute_harmonics(lmax); copy_pertype(); @@ -474,12 +474,12 @@ void PairPACEKokkos::allocate() PairPACE::allocate(); int n = atom->ntypes + 1; - memoryKK->realloc_kokkos(d_map, "pace:map", n); + MemKK::realloc_kokkos(d_map, "pace:map", n); - memoryKK->realloc_kokkos(k_cutsq, "pace:cutsq", n, n); + MemKK::realloc_kokkos(k_cutsq, "pace:cutsq", n, n); d_cutsq = k_cutsq.template view(); - memoryKK->realloc_kokkos(k_scale, "pace:scale", n, n); + MemKK::realloc_kokkos(k_scale, "pace:scale", n, n); d_scale = k_scale.template view(); } @@ -1651,56 +1651,56 @@ double PairPACEKokkos::memory_usage() { double bytes = 0; - bytes += memoryKK->memory_usage(A); - bytes += memoryKK->memory_usage(A_rank1); - bytes += memoryKK->memory_usage(A_list); - bytes += memoryKK->memory_usage(A_forward_prod); - bytes += memoryKK->memory_usage(e_atom); - bytes += memoryKK->memory_usage(rhos); - bytes += memoryKK->memory_usage(dF_drho); - bytes += memoryKK->memory_usage(weights); - bytes += memoryKK->memory_usage(weights_rank1); - bytes += memoryKK->memory_usage(rho_core); - bytes += memoryKK->memory_usage(dF_drho_core); - bytes += memoryKK->memory_usage(dB_flatten); - bytes += memoryKK->memory_usage(fr); - bytes += memoryKK->memory_usage(dfr); - bytes += memoryKK->memory_usage(gr); - bytes += memoryKK->memory_usage(dgr); - bytes += memoryKK->memory_usage(d_values); - bytes += memoryKK->memory_usage(d_derivatives); - bytes += memoryKK->memory_usage(cr); - bytes += memoryKK->memory_usage(dcr); - bytes += memoryKK->memory_usage(plm); - bytes += memoryKK->memory_usage(dplm); - bytes += memoryKK->memory_usage(ylm); - bytes += memoryKK->memory_usage(dylm); - bytes += memoryKK->memory_usage(d_ncount); - bytes += memoryKK->memory_usage(d_mu); - bytes += memoryKK->memory_usage(d_rhats); - bytes += memoryKK->memory_usage(d_rnorms); - bytes += memoryKK->memory_usage(d_nearest); - bytes += memoryKK->memory_usage(f_ij); - bytes += memoryKK->memory_usage(d_rho_core_cutoff); - bytes += memoryKK->memory_usage(d_drho_core_cutoff); - bytes += memoryKK->memory_usage(d_E0vals); - bytes += memoryKK->memory_usage(d_ndensity); - bytes += memoryKK->memory_usage(d_npoti); - bytes += memoryKK->memory_usage(d_wpre); - bytes += memoryKK->memory_usage(d_mexp); - bytes += memoryKK->memory_usage(d_idx_rho_count); - bytes += memoryKK->memory_usage(d_rank); - bytes += memoryKK->memory_usage(d_num_ms_combs); - bytes += memoryKK->memory_usage(d_offsets); - bytes += memoryKK->memory_usage(d_mus); - bytes += memoryKK->memory_usage(d_ns); - bytes += memoryKK->memory_usage(d_ls); - bytes += memoryKK->memory_usage(d_ms_combs); - bytes += memoryKK->memory_usage(d_ctildes); - bytes += memoryKK->memory_usage(alm); - bytes += memoryKK->memory_usage(blm); - bytes += memoryKK->memory_usage(cl); - bytes += memoryKK->memory_usage(dl); + bytes += MemKK::memory_usage(A); + bytes += MemKK::memory_usage(A_rank1); + bytes += MemKK::memory_usage(A_list); + bytes += MemKK::memory_usage(A_forward_prod); + bytes += MemKK::memory_usage(e_atom); + bytes += MemKK::memory_usage(rhos); + bytes += MemKK::memory_usage(dF_drho); + bytes += MemKK::memory_usage(weights); + bytes += MemKK::memory_usage(weights_rank1); + bytes += MemKK::memory_usage(rho_core); + bytes += MemKK::memory_usage(dF_drho_core); + bytes += MemKK::memory_usage(dB_flatten); + bytes += MemKK::memory_usage(fr); + bytes += MemKK::memory_usage(dfr); + bytes += MemKK::memory_usage(gr); + bytes += MemKK::memory_usage(dgr); + bytes += MemKK::memory_usage(d_values); + bytes += MemKK::memory_usage(d_derivatives); + bytes += MemKK::memory_usage(cr); + bytes += MemKK::memory_usage(dcr); + bytes += MemKK::memory_usage(plm); + bytes += MemKK::memory_usage(dplm); + bytes += MemKK::memory_usage(ylm); + bytes += MemKK::memory_usage(dylm); + bytes += MemKK::memory_usage(d_ncount); + bytes += MemKK::memory_usage(d_mu); + bytes += MemKK::memory_usage(d_rhats); + bytes += MemKK::memory_usage(d_rnorms); + bytes += MemKK::memory_usage(d_nearest); + bytes += MemKK::memory_usage(f_ij); + bytes += MemKK::memory_usage(d_rho_core_cutoff); + bytes += MemKK::memory_usage(d_drho_core_cutoff); + bytes += MemKK::memory_usage(d_E0vals); + bytes += MemKK::memory_usage(d_ndensity); + bytes += MemKK::memory_usage(d_npoti); + bytes += MemKK::memory_usage(d_wpre); + bytes += MemKK::memory_usage(d_mexp); + bytes += MemKK::memory_usage(d_idx_rho_count); + bytes += MemKK::memory_usage(d_rank); + bytes += MemKK::memory_usage(d_num_ms_combs); + bytes += MemKK::memory_usage(d_offsets); + bytes += MemKK::memory_usage(d_mus); + bytes += MemKK::memory_usage(d_ns); + bytes += MemKK::memory_usage(d_ls); + bytes += MemKK::memory_usage(d_ms_combs); + bytes += MemKK::memory_usage(d_ctildes); + bytes += MemKK::memory_usage(alm); + bytes += MemKK::memory_usage(blm); + bytes += MemKK::memory_usage(cl); + bytes += MemKK::memory_usage(dl); if (k_splines_gk.h_view.data()) { for (int i = 0; i < nelements; i++) { diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index 192300d642..7f905c069f 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -36,6 +36,7 @@ #include "kokkos.h" #include "math_const.h" #include "math_special.h" +#include "memory_kokkos.h" #include "neigh_request.h" #include "neighbor.h" @@ -78,8 +79,8 @@ PairReaxFFKokkos::PairReaxFFKokkos(LAMMPS *lmp) : PairReaxFF(lmp) k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); k_nbuf_local = DAT::tdual_int_scalar("pair:nbuf_local"); - d_torsion_pack = t_reax_int4_2d("reaxff:torsion_pack",1,2); - d_angular_pack = t_reax_int4_2d("reaxff:angular_pack",1,2); + MemKK::realloc_kokkos(d_torsion_pack,"reaxff:torsion_pack",1,2); + MemKK::realloc_kokkos(d_angular_pack,"reaxff:angular_pack",1,2); k_count_angular_torsion = DAT::tdual_int_1d("PairReaxFF::count_angular_torsion",2); d_count_angular_torsion = k_count_angular_torsion.template view(); @@ -959,9 +960,9 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) count_torsion = h_count_angular_torsion(1); if (count_angular > (int)d_angular_pack.extent(0)) - d_angular_pack = t_reax_int4_2d("reaxff:angular_pack",(int)(count_angular * 1.1),2); + MemKK::realloc_kokkos(d_angular_pack,"reaxff:angular_pack",(int)(count_angular * 1.1),2); if (count_torsion > (int)d_torsion_pack.extent(0)) - d_torsion_pack = t_reax_int4_2d("reaxff:torsion_pack",(int)(count_torsion * 1.1),2); + MemKK::realloc_kokkos(d_torsion_pack,"reaxff:torsion_pack",(int)(count_torsion * 1.1),2); // need to zero to re-count h_count_angular_torsion(0) = 0; @@ -1512,131 +1513,63 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulo template void PairReaxFFKokkos::allocate_array() { - // deallocate first to reduce memory overhead - - deallocate_array(); - if (cut_hbsq > 0.0) { - d_hb_first = typename AT::t_int_1d("reaxff/kk:hb_first",nmax); - d_hb_num = typename AT::t_int_1d("reaxff/kk:hb_num",nmax); - d_hb_list = typename AT::t_int_1d("reaxff/kk:hb_list",nmax*maxhb); + MemKK::realloc_kokkos(d_hb_first,"reaxff/kk:hb_first",nmax); + MemKK::realloc_kokkos(d_hb_num,"reaxff/kk:hb_num",nmax); + MemKK::realloc_kokkos(d_hb_list,"reaxff/kk:hb_list",nmax*maxhb); } - d_bo_first = typename AT::t_int_1d("reaxff/kk:bo_first",nmax); - d_bo_num = typename AT::t_int_1d("reaxff/kk:bo_num",nmax); - d_bo_list = typename AT::t_int_1d("reaxff/kk:bo_list",nmax*maxbo); + MemKK::realloc_kokkos(d_bo_first,"reaxff/kk:bo_first",nmax); + MemKK::realloc_kokkos(d_bo_num,"reaxff/kk:bo_num",nmax); + MemKK::realloc_kokkos(d_bo_list,"reaxff/kk:bo_list",nmax*maxbo); - d_BO = typename AT::t_ffloat_2d_dl("reaxff/kk:BO",nmax,maxbo); - d_BO_s = typename AT::t_ffloat_2d_dl("reaxff/kk:BO",nmax,maxbo); - d_BO_pi = typename AT::t_ffloat_2d_dl("reaxff/kk:BO_pi",nmax,maxbo); - d_BO_pi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:BO_pi2",nmax,maxbo); + MemKK::realloc_kokkos(d_BO,"reaxff/kk:BO",nmax,maxbo); + MemKK::realloc_kokkos(d_BO_s,"reaxff/kk:BO",nmax,maxbo); + MemKK::realloc_kokkos(d_BO_pi,"reaxff/kk:BO_pi",nmax,maxbo); + MemKK::realloc_kokkos(d_BO_pi2,"reaxff/kk:BO_pi2",nmax,maxbo); - d_dln_BOp_pi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_dln_BOp_pi",nmax,maxbo); - d_dln_BOp_pi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_dln_BOp_pi2",nmax,maxbo); + MemKK::realloc_kokkos(d_dln_BOp_pi,"reaxff/kk:d_dln_BOp_pi",nmax,maxbo); + MemKK::realloc_kokkos(d_dln_BOp_pi2,"reaxff/kk:d_dln_BOp_pi2",nmax,maxbo); - d_C1dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbo",nmax,maxbo); - d_C2dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbo",nmax,maxbo); - d_C3dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbo",nmax,maxbo); + MemKK::realloc_kokkos(d_C1dbo,"reaxff/kk:d_C1dbo",nmax,maxbo); + MemKK::realloc_kokkos(d_C2dbo,"reaxff/kk:d_C2dbo",nmax,maxbo); + MemKK::realloc_kokkos(d_C3dbo,"reaxff/kk:d_C3dbo",nmax,maxbo); - d_C1dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbopi",nmax,maxbo); - d_C2dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbopi",nmax,maxbo); - d_C3dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbopi",nmax,maxbo); - d_C4dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C4dbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_C1dbopi,"reaxff/kk:d_C1dbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_C2dbopi,"reaxff/kk:d_C2dbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_C3dbopi,"reaxff/kk:d_C3dbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_C4dbopi,"reaxff/kk:d_C4dbopi",nmax,maxbo); - d_C1dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbopi2",nmax,maxbo); - d_C2dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbopi2",nmax,maxbo); - d_C3dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbopi2",nmax,maxbo); - d_C4dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C4dbopi2",nmax,maxbo); + MemKK::realloc_kokkos(d_C1dbopi2,"reaxff/kk:d_C1dbopi2",nmax,maxbo); + MemKK::realloc_kokkos(d_C2dbopi2,"reaxff/kk:d_C2dbopi2",nmax,maxbo); + MemKK::realloc_kokkos(d_C3dbopi2,"reaxff/kk:d_C3dbopi2",nmax,maxbo); + MemKK::realloc_kokkos(d_C4dbopi2,"reaxff/kk:d_C4dbopi2",nmax,maxbo); - d_dBOp = typename AT::t_ffloat_2d_dl("reaxff/kk:dBOp",nmax,maxbo); + MemKK::realloc_kokkos(d_dBOp,"reaxff/kk:dBOp",nmax,maxbo); - d_dDeltap_self = typename AT::t_ffloat_2d_dl("reaxff/kk:dDeltap_self",nmax,3); - d_Deltap_boc = typename AT::t_ffloat_1d("reaxff/kk:Deltap_boc",nmax); - d_Deltap = typename AT::t_ffloat_1d("reaxff/kk:Deltap",nmax); - d_total_bo = typename AT::t_ffloat_1d("reaxff/kk:total_bo",nmax); + MemKK::realloc_kokkos(d_dDeltap_self,"reaxff/kk:dDeltap_self",nmax,3); + MemKK::realloc_kokkos(d_Deltap_boc,"reaxff/kk:Deltap_boc",nmax); + MemKK::realloc_kokkos(d_Deltap,"reaxff/kk:Deltap",nmax); + MemKK::realloc_kokkos(d_total_bo,"reaxff/kk:total_bo",nmax); - d_Cdbo = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbo",nmax,3*maxbo); - d_Cdbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbopi",nmax,3*maxbo); - d_Cdbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbopi2",nmax,3*maxbo); + MemKK::realloc_kokkos(d_Cdbo,"reaxff/kk:Cdbo",nmax,3*maxbo); + MemKK::realloc_kokkos(d_Cdbopi,"reaxff/kk:Cdbopi",nmax,3*maxbo); + MemKK::realloc_kokkos(d_Cdbopi2,"reaxff/kk:Cdbopi2",nmax,3*maxbo); - d_Delta = typename AT::t_ffloat_1d("reaxff/kk:Delta",nmax); - d_Delta_boc = typename AT::t_ffloat_1d("reaxff/kk:Delta_boc",nmax); - d_dDelta_lp = typename AT::t_ffloat_1d("reaxff/kk:dDelta_lp",nmax); - d_Delta_lp = typename AT::t_ffloat_1d("reaxff/kk:Delta_lp",nmax); - d_Delta_lp_temp = typename AT::t_ffloat_1d("reaxff/kk:Delta_lp_temp",nmax); - d_CdDelta = typename AT::t_ffloat_1d("reaxff/kk:CdDelta",nmax); - d_sum_ovun = typename AT::t_ffloat_2d_dl("reaxff/kk:sum_ovun",nmax,3); + MemKK::realloc_kokkos(d_Delta,"reaxff/kk:Delta",nmax); + MemKK::realloc_kokkos(d_Delta_boc,"reaxff/kk:Delta_boc",nmax); + MemKK::realloc_kokkos(d_dDelta_lp,"reaxff/kk:dDelta_lp",nmax); + MemKK::realloc_kokkos(d_Delta_lp,"reaxff/kk:Delta_lp",nmax); + MemKK::realloc_kokkos(d_Delta_lp_temp,"reaxff/kk:Delta_lp_temp",nmax); + MemKK::realloc_kokkos(d_CdDelta,"reaxff/kk:CdDelta",nmax); + MemKK::realloc_kokkos(d_sum_ovun,"reaxff/kk:sum_ovun",nmax,3); // FixReaxFFBonds - d_abo = typename AT::t_ffloat_2d("reaxff/kk:abo",nmax,maxbo); - d_neighid = typename AT::t_tagint_2d("reaxff/kk:neighid",nmax,maxbo); - d_numneigh_bonds = typename AT::t_int_1d("reaxff/kk:numneigh_bonds",nmax); + MemKK::realloc_kokkos(d_abo,"reaxff/kk:abo",nmax,maxbo); + MemKK::realloc_kokkos(d_neighid,"reaxff/kk:neighid",nmax,maxbo); + MemKK::realloc_kokkos(d_numneigh_bonds,"reaxff/kk:numneigh_bonds",nmax); // ComputeAngular intermediates - d_angular_intermediates = typename AT::t_ffloat_2d("reaxff/kk:angular_intermediates",nmax,4); -} - -/* ---------------------------------------------------------------------- */ - -template -void PairReaxFFKokkos::deallocate_array() -{ - if (cut_hbsq > 0.0) { - d_hb_first = typename AT::t_int_1d(); - d_hb_num = typename AT::t_int_1d(); - d_hb_list = typename AT::t_int_1d(); - } - d_bo_first = typename AT::t_int_1d(); - d_bo_num = typename AT::t_int_1d(); - d_bo_list = typename AT::t_int_1d(); - - d_BO = typename AT::t_ffloat_2d_dl(); - d_BO_s = typename AT::t_ffloat_2d_dl(); - d_BO_pi = typename AT::t_ffloat_2d_dl(); - d_BO_pi2 = typename AT::t_ffloat_2d_dl(); - - d_dln_BOp_pi = typename AT::t_ffloat_2d_dl(); - d_dln_BOp_pi2 = typename AT::t_ffloat_2d_dl(); - - d_C1dbo = typename AT::t_ffloat_2d_dl(); - d_C2dbo = typename AT::t_ffloat_2d_dl(); - d_C3dbo = typename AT::t_ffloat_2d_dl(); - - d_C1dbopi = typename AT::t_ffloat_2d_dl(); - d_C2dbopi = typename AT::t_ffloat_2d_dl(); - d_C3dbopi = typename AT::t_ffloat_2d_dl(); - d_C4dbopi = typename AT::t_ffloat_2d_dl(); - - d_C1dbopi2 = typename AT::t_ffloat_2d_dl(); - d_C2dbopi2 = typename AT::t_ffloat_2d_dl(); - d_C3dbopi2 = typename AT::t_ffloat_2d_dl(); - d_C4dbopi2 = typename AT::t_ffloat_2d_dl(); - - d_dBOp = typename AT::t_ffloat_2d_dl(); - - d_dDeltap_self = typename AT::t_ffloat_2d_dl(); - d_Deltap_boc = typename AT::t_ffloat_1d(); - d_Deltap = typename AT::t_ffloat_1d(); - d_total_bo = typename AT::t_ffloat_1d(); - - d_Cdbo = typename AT::t_ffloat_2d_dl(); - d_Cdbopi = typename AT::t_ffloat_2d_dl(); - d_Cdbopi2 = typename AT::t_ffloat_2d_dl(); - - d_Delta = typename AT::t_ffloat_1d(); - d_Delta_boc = typename AT::t_ffloat_1d(); - d_dDelta_lp = typename AT::t_ffloat_1d(); - d_Delta_lp = typename AT::t_ffloat_1d(); - d_Delta_lp_temp = typename AT::t_ffloat_1d(); - d_CdDelta = typename AT::t_ffloat_1d(); - d_sum_ovun = typename AT::t_ffloat_2d_dl(); - - // FixReaxFFBonds - d_abo = typename AT::t_ffloat_2d(); - d_neighid = typename AT::t_tagint_2d(); - d_numneigh_bonds = typename AT::t_int_1d(); - - // ComputeAngular intermediates - d_angular_intermediates = typename AT::t_ffloat_2d(); + MemKK::realloc_kokkos(d_angular_intermediates,"reaxff/kk:angular_intermediates",nmax,4); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h index 64d9ced875..434f3c63fa 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.h +++ b/src/KOKKOS/pair_reaxff_kokkos.h @@ -384,7 +384,6 @@ class PairReaxFFKokkos : public PairReaxFF { protected: void allocate(); void allocate_array(); - void deallocate_array(); void setup(); void init_md(); int Init_Lookup_Tables(); diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index 2cd0003478..2937359822 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -238,44 +238,14 @@ class PairSNAPKokkos : public PairSNAP { typename AT::t_efloat_1d d_eatom; typename AT::t_virial_array d_vatom; - typedef Kokkos::View t_bvec; - t_bvec bvec; - typedef Kokkos::View t_dbvec; - t_dbvec dbvec; SNAKokkos snaKK; int inum,max_neighs,chunk_size,chunk_offset; - int host_flag; + int host_flag,neighflag; int eflag,vflag; void allocate() override; - //void read_files(char *, char *); - /*template - inline int equal(double* x,double* y); - template - inline double dist2(double* x,double* y); - double extra_cutoff(); - void load_balance(); - void set_sna_to_shared(int snaid,int i); - void build_per_atom_arrays();*/ - - int neighflag; - - Kokkos::View ilistmast; - Kokkos::View ghostilist; - Kokkos::View ghostnumneigh; - Kokkos::View ghostneighs; - Kokkos::View ghostfirstneigh; - - Kokkos::View i_pairs; - Kokkos::View i_rij; - Kokkos::View i_inside; - Kokkos::View i_wj; - Kokkos::Viewi_rcutij; - Kokkos::View i_ninside; - Kokkos::View i_uarraytot_r, i_uarraytot_i; - Kokkos::View i_zarray_r, i_zarray_i; Kokkos::View d_radelem; // element radii Kokkos::View d_wjelem; // elements weights @@ -286,7 +256,6 @@ class PairSNAPKokkos : public PairSNAP { Kokkos::View d_ninside; // ninside for all atoms in list Kokkos::View d_beta; // betas for all atoms in list Kokkos::View d_beta_pack; // betas for all atoms in list, GPU - Kokkos::View d_bispectrum; // bispectrum components for all atoms in list typedef Kokkos::DualView tdual_fparams; tdual_fparams k_cutsq; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index c2657f8534..0656a1f9ee 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -206,10 +206,10 @@ void PairSNAPKokkos::compute(int eflag_in, if (beta_max < inum) { beta_max = inum; - d_beta = Kokkos::View("PairSNAPKokkos:beta",ncoeff,inum); + MemKK::realloc_kokkos(d_beta,"PairSNAPKokkos:beta",ncoeff,inum); if (!host_flag) - d_beta_pack = Kokkos::View("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length); - d_ninside = Kokkos::View("PairSNAPKokkos:ninside",inum); + MemKK::realloc_kokkos(d_beta_pack,"PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length); + MemKK::realloc_kokkos(d_ninside,"PairSNAPKokkos:ninside",inum); } chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user @@ -545,7 +545,7 @@ void PairSNAPKokkos::allocate() PairSNAP::allocate(); int n = atom->ntypes; - d_map = Kokkos::View("PairSNAPKokkos::map",n+1); + MemKK::realloc_kokkos(d_map,"PairSNAPKokkos::map",n+1); } @@ -574,11 +574,11 @@ void PairSNAPKokkos::coeff(int narg, char // Set up element lists - d_radelem = Kokkos::View("pair:radelem",nelements); - d_wjelem = Kokkos::View("pair:wjelem",nelements); - d_coeffelem = Kokkos::View("pair:coeffelem",nelements,ncoeffall); - d_sinnerelem = Kokkos::View("pair:sinnerelem",nelements); - d_dinnerelem = Kokkos::View("pair:dinnerelem",nelements); + MemKK::realloc_kokkos(d_radelem,"pair:radelem",nelements); + MemKK::realloc_kokkos(d_wjelem,"pair:wjelem",nelements); + MemKK::realloc_kokkos(d_coeffelem,"pair:coeffelem",nelements,ncoeffall); + MemKK::realloc_kokkos(d_sinnerelem,"pair:sinnerelem",nelements); + MemKK::realloc_kokkos(d_dinnerelem,"pair:dinnerelem",nelements); auto h_radelem = Kokkos::create_mirror_view(d_radelem); auto h_wjelem = Kokkos::create_mirror_view(d_wjelem); @@ -1411,12 +1411,16 @@ template double PairSNAPKokkos::memory_usage() { double bytes = Pair::memory_usage(); - int n = atom->ntypes+1; - bytes += n*n*sizeof(int); - bytes += n*n*sizeof(real_type); - bytes += (2*ncoeffall)*sizeof(real_type); - bytes += (ncoeff*3)*sizeof(real_type); - bytes += snaKK.memory_usage(); + bytes += MemKK::memory_usage(d_beta); + if (!host_flag) + bytes += MemKK::memory_usage(d_beta_pack); + bytes += MemKK::memory_usage(d_ninside); + bytes += MemKK::memory_usage(d_map); + bytes += MemKK::memory_usage(d_radelem); + bytes += MemKK::memory_usage(d_wjelem); + bytes += MemKK::memory_usage(d_coeffelem); + bytes += MemKK::memory_usage(d_sinnerelem); + bytes += MemKK::memory_usage(d_dinnerelem); return bytes; } diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 77130b9781..8d81a4a65b 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -17,6 +17,7 @@ ------------------------------------------------------------------------- */ #include "sna_kokkos.h" +#include "memory_kokkos.h" #include #include #include @@ -62,12 +63,12 @@ SNAKokkos::SNAKokkos(real_type rfac0_in, build_indexlist(); int jdimpq = twojmax + 2; - rootpqarray = t_sna_2d("SNAKokkos::rootpqarray",jdimpq,jdimpq); + MemKK::realloc_kokkos(rootpqarray,"SNAKokkos::rootpqarray",jdimpq,jdimpq); - cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max); + MemKK::realloc_kokkos(cglist,"SNAKokkos::cglist",idxcg_max); if (bzero_flag) { - bzero = Kokkos::View("sna:bzero",twojmax+1); + MemKK::realloc_kokkos(bzero,"sna:bzero",twojmax+1); auto h_bzero = Kokkos::create_mirror_view(bzero); double www = wself*wself*wself; @@ -95,7 +96,7 @@ void SNAKokkos::build_indexlist() // index list for cglist int jdim = twojmax + 1; - idxcg_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxcg_block"),jdim,jdim,jdim); + MemKK::realloc_kokkos(idxcg_block,"SNAKokkos::idxcg_block",jdim,jdim,jdim); auto h_idxcg_block = Kokkos::create_mirror_view(idxcg_block); int idxcg_count = 0; @@ -113,7 +114,7 @@ void SNAKokkos::build_indexlist() // index list for uarray // need to include both halves - idxu_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxu_block"),jdim); + MemKK::realloc_kokkos(idxu_block,"SNAKokkos::idxu_block",jdim); auto h_idxu_block = Kokkos::create_mirror_view(idxu_block); int idxu_count = 0; @@ -128,7 +129,7 @@ void SNAKokkos::build_indexlist() Kokkos::deep_copy(idxu_block,h_idxu_block); // index list for half uarray - idxu_half_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxu_half_block"),jdim); + MemKK::realloc_kokkos(idxu_half_block,"SNAKokkos::idxu_half_block",jdim); auto h_idxu_half_block = Kokkos::create_mirror_view(idxu_half_block); int idxu_half_count = 0; @@ -142,7 +143,7 @@ void SNAKokkos::build_indexlist() Kokkos::deep_copy(idxu_half_block, h_idxu_half_block); // mapping between full and half indexing, encoding flipping - idxu_full_half = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxu_full_half"),idxu_max); + MemKK::realloc_kokkos(idxu_full_half,"SNAKokkos::idxu_full_half",idxu_max); auto h_idxu_full_half = Kokkos::create_mirror_view(idxu_full_half); idxu_count = 0; @@ -169,7 +170,7 @@ void SNAKokkos::build_indexlist() // index list for "cache" uarray // this is the GPU scratch memory requirements // applied the CPU structures - idxu_cache_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxu_cache_block"),jdim); + MemKK::realloc_kokkos(idxu_cache_block,"SNAKokkos::idxu_cache_block",jdim); auto h_idxu_cache_block = Kokkos::create_mirror_view(idxu_cache_block); int idxu_cache_count = 0; @@ -191,7 +192,7 @@ void SNAKokkos::build_indexlist() if (j >= j1) idxb_count++; idxb_max = idxb_count; - idxb = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxb"),idxb_max); + MemKK::realloc_kokkos(idxb,"SNAKokkos::idxb",idxb_max); auto h_idxb = Kokkos::create_mirror_view(idxb); idxb_count = 0; @@ -208,7 +209,7 @@ void SNAKokkos::build_indexlist() // reverse index list for beta and b - idxb_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxb_block"),jdim,jdim,jdim); + MemKK::realloc_kokkos(idxb_block,"SNAKokkos::idxb_block",jdim,jdim,jdim); auto h_idxb_block = Kokkos::create_mirror_view(idxb_block); idxb_count = 0; @@ -234,10 +235,10 @@ void SNAKokkos::build_indexlist() idxz_count++; idxz_max = idxz_count; - idxz = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxz"),idxz_max); + MemKK::realloc_kokkos(idxz,"SNAKokkos::idxz",idxz_max); auto h_idxz = Kokkos::create_mirror_view(idxz); - idxz_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxz_block"), jdim,jdim,jdim); + MemKK::realloc_kokkos(idxz_block,"SNAKokkos::idxz_block", jdim,jdim,jdim); auto h_idxz_block = Kokkos::create_mirror_view(idxz_block); idxz_count = 0; @@ -294,59 +295,59 @@ void SNAKokkos::grow_rij(int newnatom, int natom = newnatom; nmax = newnmax; - rij = t_sna_3d(Kokkos::NoInit("sna:rij"),natom,nmax,3); - wj = t_sna_2d(Kokkos::NoInit("sna:wj"),natom,nmax); - rcutij = t_sna_2d(Kokkos::NoInit("sna:rcutij"),natom,nmax); - sinnerij = t_sna_2d(Kokkos::NoInit("sna:sinnerij"),natom,nmax); - dinnerij = t_sna_2d(Kokkos::NoInit("sna:dinnerij"),natom,nmax); - inside = t_sna_2i(Kokkos::NoInit("sna:inside"),natom,nmax); - element = t_sna_2i(Kokkos::NoInit("sna:element"),natom,nmax); - dedr = t_sna_3d(Kokkos::NoInit("sna:dedr"),natom,nmax,3); + MemKK::realloc_kokkos(rij,"sna:rij",natom,nmax,3); + MemKK::realloc_kokkos(wj,"sna:wj",natom,nmax); + MemKK::realloc_kokkos(rcutij,"sna:rcutij",natom,nmax); + MemKK::realloc_kokkos(sinnerij,"sna:sinnerij",natom,nmax); + MemKK::realloc_kokkos(dinnerij,"sna:dinnerij",natom,nmax); + MemKK::realloc_kokkos(inside,"sna:inside",natom,nmax); + MemKK::realloc_kokkos(element,"sna:element",natom,nmax); + MemKK::realloc_kokkos(dedr,"sna:dedr",natom,nmax,3); #ifdef LMP_KOKKOS_GPU if (!host_flag) { const int natom_div = (natom + vector_length - 1) / vector_length; - a_pack = t_sna_3c_ll(Kokkos::NoInit("sna:a_pack"),vector_length,nmax,natom_div); - b_pack = t_sna_3c_ll(Kokkos::NoInit("sna:b_pack"),vector_length,nmax,natom_div); - da_pack = t_sna_4c_ll(Kokkos::NoInit("sna:da_pack"),vector_length,nmax,natom_div,3); - db_pack = t_sna_4c_ll(Kokkos::NoInit("sna:db_pack"),vector_length,nmax,natom_div,3); - sfac_pack = t_sna_4d_ll(Kokkos::NoInit("sna:sfac_pack"),vector_length,nmax,natom_div,4); - ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1); // dummy allocation - ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1); - ulisttot_re_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_re_pack"),vector_length,idxu_half_max,nelements,natom_div); - ulisttot_im_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_im_pack"),vector_length,idxu_half_max,nelements,natom_div); - ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),vector_length,idxu_max,nelements,natom_div); - ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),1,1,1); - zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),1,1,1); - zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),vector_length,idxz_max,ndoubles,natom_div); - blist = t_sna_3d(Kokkos::NoInit("sna:blist"),natom,ntriples,idxb_max); - blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),vector_length,idxb_max,ntriples,natom_div); - ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),1,1,1); - ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),vector_length,idxu_half_max,nelements,natom_div); - ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),vector_length,idxu_half_max,nelements,natom_div); - dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),1,1,1); + MemKK::realloc_kokkos(a_pack,"sna:a_pack",vector_length,nmax,natom_div); + MemKK::realloc_kokkos(b_pack,"sna:b_pack",vector_length,nmax,natom_div); + MemKK::realloc_kokkos(da_pack,"sna:da_pack",vector_length,nmax,natom_div,3); + MemKK::realloc_kokkos(db_pack,"sna:db_pack",vector_length,nmax,natom_div,3); + MemKK::realloc_kokkos(sfac_pack,"sna:sfac_pack",vector_length,nmax,natom_div,4); + MemKK::realloc_kokkos(ulisttot,"sna:ulisttot",1,1,1); // dummy allocation + MemKK::realloc_kokkos(ulisttot_full,"sna:ulisttot",1,1,1); + MemKK::realloc_kokkos(ulisttot_re_pack,"sna:ulisttot_re_pack",vector_length,idxu_half_max,nelements,natom_div); + MemKK::realloc_kokkos(ulisttot_im_pack,"sna:ulisttot_im_pack",vector_length,idxu_half_max,nelements,natom_div); + MemKK::realloc_kokkos(ulisttot_pack,"sna:ulisttot_pack",vector_length,idxu_max,nelements,natom_div); + MemKK::realloc_kokkos(ulist,"sna:ulist",1,1,1); + MemKK::realloc_kokkos(zlist,"sna:zlist",1,1,1); + MemKK::realloc_kokkos(zlist_pack,"sna:zlist_pack",vector_length,idxz_max,ndoubles,natom_div); + MemKK::realloc_kokkos(blist,"sna:blist",natom,ntriples,idxb_max); + MemKK::realloc_kokkos(blist_pack,"sna:blist_pack",vector_length,idxb_max,ntriples,natom_div); + MemKK::realloc_kokkos(ylist,"sna:ylist",1,1,1); + MemKK::realloc_kokkos(ylist_pack_re,"sna:ylist_pack_re",vector_length,idxu_half_max,nelements,natom_div); + MemKK::realloc_kokkos(ylist_pack_im,"sna:ylist_pack_im",vector_length,idxu_half_max,nelements,natom_div); + MemKK::realloc_kokkos(dulist,"sna:dulist",1,1,1); } else { #endif - a_pack = t_sna_3c_ll(Kokkos::NoInit("sna:a_pack"),1,1,1); - b_pack = t_sna_3c_ll(Kokkos::NoInit("sna:b_pack"),1,1,1); - da_pack = t_sna_4c_ll(Kokkos::NoInit("sna:da_pack"),1,1,1,1); - db_pack = t_sna_4c_ll(Kokkos::NoInit("sna:db_pack"),1,1,1,1); - sfac_pack = t_sna_4d_ll(Kokkos::NoInit("sna:sfac_pack"),1,1,1,1); - ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),idxu_half_max,nelements,natom); - ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot_full"),idxu_max,nelements,natom); - ulisttot_re_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_re"),1,1,1,1); - ulisttot_im_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_im"),1,1,1,1); - ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),1,1,1,1); - ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),idxu_cache_max,natom,nmax); - zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),idxz_max,ndoubles,natom); - zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),1,1,1,1); - blist = t_sna_3d(Kokkos::NoInit("sna:blist"),natom,ntriples,idxb_max); - blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),1,1,1,1); - ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),idxu_half_max,nelements,natom); - ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),1,1,1,1); - ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),1,1,1,1); - dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),idxu_cache_max,natom,nmax); + MemKK::realloc_kokkos(a_pack,"sna:a_pack",1,1,1); + MemKK::realloc_kokkos(b_pack,"sna:b_pack",1,1,1); + MemKK::realloc_kokkos(da_pack,"sna:da_pack",1,1,1,1); + MemKK::realloc_kokkos(db_pack,"sna:db_pack",1,1,1,1); + MemKK::realloc_kokkos(sfac_pack,"sna:sfac_pack",1,1,1,1); + MemKK::realloc_kokkos(ulisttot,"sna:ulisttot",idxu_half_max,nelements,natom); + MemKK::realloc_kokkos(ulisttot_full,"sna:ulisttot_full",idxu_max,nelements,natom); + MemKK::realloc_kokkos(ulisttot_re_pack,"sna:ulisttot_re",1,1,1,1); + MemKK::realloc_kokkos(ulisttot_im_pack,"sna:ulisttot_im",1,1,1,1); + MemKK::realloc_kokkos(ulisttot_pack,"sna:ulisttot_pack",1,1,1,1); + MemKK::realloc_kokkos(ulist,"sna:ulist",idxu_cache_max,natom,nmax); + MemKK::realloc_kokkos(zlist,"sna:zlist",idxz_max,ndoubles,natom); + MemKK::realloc_kokkos(zlist_pack,"sna:zlist_pack",1,1,1,1); + MemKK::realloc_kokkos(blist,"sna:blist",natom,ntriples,idxb_max); + MemKK::realloc_kokkos(blist_pack,"sna:blist_pack",1,1,1,1); + MemKK::realloc_kokkos(ylist,"sna:ylist",idxu_half_max,nelements,natom); + MemKK::realloc_kokkos(ylist_pack_re,"sna:ylist_pack_re",1,1,1,1); + MemKK::realloc_kokkos(ylist_pack_im,"sna:ylist_pack_im",1,1,1,1); + MemKK::realloc_kokkos(dulist,"sna:dulist",idxu_cache_max,natom,nmax); #ifdef LMP_KOKKOS_GPU } @@ -2356,74 +2357,68 @@ void SNAKokkos::compute_s_dsfac(const real template double SNAKokkos::memory_usage() { - int jdimpq = twojmax + 2; - int jdim = twojmax + 1; - double bytes; + double bytes = 0; - bytes = 0; - - bytes += jdimpq*jdimpq * sizeof(real_type); // pqarray - bytes += idxcg_max * sizeof(real_type); // cglist + bytes += MemKK::memory_usage(rootpqarray); + bytes += MemKK::memory_usage(cglist); #ifdef LMP_KOKKOS_GPU if (!host_flag) { - auto natom_pad = (natom+vector_length-1)/vector_length; - - bytes += natom_pad * nmax * sizeof(real_type) * 2; // a_pack - bytes += natom_pad * nmax * sizeof(real_type) * 2; // b_pack - bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // da_pack - bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // db_pack - bytes += natom_pad * nmax * 4 * sizeof(real_type); // sfac_pack + bytes += MemKK::memory_usage(a_pack); + bytes += MemKK::memory_usage(b_pack); + bytes += MemKK::memory_usage(da_pack); + bytes += MemKK::memory_usage(db_pack); + bytes += MemKK::memory_usage(sfac_pack); - bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_re_pack - bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_im_pack - bytes += natom_pad * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_pack + bytes += MemKK::memory_usage(ulisttot_re_pack); + bytes += MemKK::memory_usage(ulisttot_im_pack); + bytes += MemKK::memory_usage(ulisttot_pack); - bytes += natom_pad * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist_pack - bytes += natom_pad * idxb_max * ntriples * sizeof(real_type); // blist_pack + bytes += MemKK::memory_usage(zlist_pack); + bytes += MemKK::memory_usage(blist_pack); - bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_re - bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_im + bytes += MemKK::memory_usage(ylist_pack_re); + bytes += MemKK::memory_usage(ylist_pack_im); } else { #endif - bytes += natom * nmax * idxu_cache_max * sizeof(real_type) * 2; // ulist - bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ulisttot - bytes += natom * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_full + bytes += MemKK::memory_usage(ulist); + bytes += MemKK::memory_usage(ulisttot); + bytes += MemKK::memory_usage(ulisttot_full); - bytes += natom * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist - bytes += natom * idxb_max * ntriples * sizeof(real_type); // blist + bytes += MemKK::memory_usage(zlist); + bytes += MemKK::memory_usage(blist); - bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ylist + bytes += MemKK::memory_usage(ylist); - bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real_type) * 2; // dulist + bytes += MemKK::memory_usage(dulist); #ifdef LMP_KOKKOS_GPU } #endif - bytes += natom * nmax * 3 * sizeof(real_type); // dedr + bytes += MemKK::memory_usage(dedr); - bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block - bytes += jdim * sizeof(int); // idxu_block - bytes += jdim * sizeof(int); // idxu_half_block - bytes += idxu_max * sizeof(FullHalfMapper); // idxu_full_half - bytes += jdim * sizeof(int); // idxu_cache_block - bytes += jdim * jdim * jdim * sizeof(int); // idxz_block - bytes += jdim * jdim * jdim * sizeof(int); // idxb_block + bytes += MemKK::memory_usage(idxcg_block); + bytes += MemKK::memory_usage(idxu_block); + bytes += MemKK::memory_usage(idxu_half_block); + bytes += MemKK::memory_usage(idxu_full_half); + bytes += MemKK::memory_usage(idxu_cache_block); + bytes += MemKK::memory_usage(idxz_block); + bytes += MemKK::memory_usage(idxb_block); - bytes += idxz_max * 10 * sizeof(int); // idxz - bytes += idxb_max * 3 * sizeof(int); // idxb + bytes += MemKK::memory_usage(idxz); + bytes += MemKK::memory_usage(idxb); - bytes += jdim * sizeof(real_type); // bzero + bytes += MemKK::memory_usage(bzero); - bytes += natom * nmax * 3 * sizeof(real_type); // rij - bytes += natom * nmax * sizeof(real_type); // inside - bytes += natom * nmax * sizeof(real_type); // wj - bytes += natom * nmax * sizeof(real_type); // rcutij - bytes += natom * nmax * sizeof(real_type); // sinnerij - bytes += natom * nmax * sizeof(real_type); // dinnerij + bytes += MemKK::memory_usage(rij); + bytes += MemKK::memory_usage(inside); + bytes += MemKK::memory_usage(wj); + bytes += MemKK::memory_usage(rcutij); + bytes += MemKK::memory_usage(sinnerij); + bytes += MemKK::memory_usage(dinnerij); return bytes; } From d8d6884defef6c5b08c91c98c8259a6eb3599724 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 19 May 2022 09:09:57 -0600 Subject: [PATCH 2/7] Remove use_count check, reduce memory use in ReaxFF --- src/KOKKOS/memory_kokkos.h | 6 ---- src/KOKKOS/pair_reaxff_kokkos.cpp | 58 +++++++++++-------------------- src/KOKKOS/pair_reaxff_kokkos.h | 22 +++++------- 3 files changed, 29 insertions(+), 57 deletions(-) diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index 092e20b6c7..4d4d200745 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -284,7 +284,6 @@ template static void realloc_kokkos(TYPE &data, const char *name, int n1) { data = TYPE(); - assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1); } @@ -292,7 +291,6 @@ template static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2) { data = TYPE(); - assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2); } @@ -300,7 +298,6 @@ template static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3) { data = TYPE(); - assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3); } @@ -308,7 +305,6 @@ template static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4) { data = TYPE(); - assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4); } @@ -316,7 +312,6 @@ template static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5) { data = TYPE(); - assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5); } @@ -324,7 +319,6 @@ template static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6) { data = TYPE(); - assert(data.use_count() == 0); data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6); } diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index 7f905c069f..61f3bb3037 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -884,17 +884,10 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy(0,ignum),*this); // allocate duplicated memory - if (need_dup) { + if (need_dup) dup_CdDelta = Kokkos::Experimental::create_scatter_view(d_CdDelta); - //dup_Cdbo = Kokkos::Experimental::create_scatter_view(d_Cdbo); - //dup_Cdbopi = Kokkos::Experimental::create_scatter_view(d_Cdbopi); - //dup_Cdbopi2 = Kokkos::Experimental::create_scatter_view(d_Cdbopi2); - } else { + else ndup_CdDelta = Kokkos::Experimental::create_scatter_view(d_CdDelta); - //ndup_Cdbo = Kokkos::Experimental::create_scatter_view(d_Cdbo); - //ndup_Cdbopi = Kokkos::Experimental::create_scatter_view(d_Cdbopi); - //ndup_Cdbopi2 = Kokkos::Experimental::create_scatter_view(d_Cdbopi2); - } // reduction over duplicated memory if (need_dup) @@ -1034,26 +1027,12 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) if (need_dup) { Kokkos::Experimental::contribute(d_dDeltap_self, dup_dDeltap_self); // needed in ComputeBond2 Kokkos::Experimental::contribute(d_CdDelta, dup_CdDelta); // needed in ComputeBond2 - - //Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in UpdateBond, but also used in UpdateBond - //Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in UpdateBond, but also used in UpdateBond - //Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in UpdateBond, but also used in UpdateBond - //dup_Cdbo.reset_except(d_Cdbo); - //dup_Cdbopi.reset_except(d_Cdbopi); - //dup_Cdbopi2.reset_except(d_Cdbopi2); } // Bond force if (neighflag == HALF) { Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); - // reduction over duplicated memory - //if (need_dup) { - // Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2 - // Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2 - // Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2 - //} - if (vflag_either) Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,ignum),*this,ev); else @@ -1063,13 +1042,6 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) } else { //if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); - // reduction over duplicated memory - //if (need_dup) { - // Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2 - // Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2 - // Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2 - //} - if (vflag_either) Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,ignum),*this,ev); else @@ -1117,7 +1089,7 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) copymode = 0; - // free duplicated memory + // free scatterview memory if (need_dup) { dup_f = decltype(dup_f)(); dup_eatom = decltype(dup_eatom)(); @@ -1125,9 +1097,13 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) dup_dDeltap_self = decltype(dup_dDeltap_self)(); dup_total_bo = decltype(dup_total_bo)(); dup_CdDelta = decltype(dup_CdDelta)(); - //dup_Cdbo = decltype(dup_Cdbo)(); - //dup_Cdbopi = decltype(dup_Cdbopi)(); - //dup_Cdbopi2 = decltype(dup_Cdbopi2)(); + } else { + ndup_f = decltype(ndup_f)(); + ndup_eatom = decltype(ndup_eatom)(); + ndup_vatom = decltype(ndup_vatom)(); + ndup_dDeltap_self = decltype(ndup_dDeltap_self)(); + ndup_total_bo = decltype(ndup_total_bo)(); + ndup_CdDelta = decltype(ndup_CdDelta)(); } d_neighbors = typename AT::t_neighbors_2d(); @@ -1513,6 +1489,17 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulo template void PairReaxFFKokkos::allocate_array() { + // free scatterview memory + if (need_dup) { + dup_dDeltap_self = decltype(dup_dDeltap_self)(); + dup_total_bo = decltype(dup_total_bo)(); + dup_CdDelta = decltype(dup_CdDelta)(); + } else { + ndup_dDeltap_self = decltype(ndup_dDeltap_self)(); + ndup_total_bo = decltype(ndup_total_bo)(); + ndup_CdDelta = decltype(ndup_CdDelta)(); + } + if (cut_hbsq > 0.0) { MemKK::realloc_kokkos(d_hb_first,"reaxff/kk:hb_first",nmax); MemKK::realloc_kokkos(d_hb_num,"reaxff/kk:hb_num",nmax); @@ -3482,9 +3469,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxUpdateBond, Kokkos::View::value>> a_Cdbo = d_Cdbo; Kokkos::View::value>> a_Cdbopi = d_Cdbopi; Kokkos::View::value>> a_Cdbopi2 = d_Cdbopi2; - //auto a_Cdbo = dup_Cdbo.template access>(); - //auto a_Cdbopi = dup_Cdbopi.template access>(); - //auto a_Cdbopi2 = dup_Cdbopi2.template access>(); const int i = d_ilist[ii]; const tagint itag = tag(i); diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h index 434f3c63fa..39b323a0fe 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.h +++ b/src/KOKKOS/pair_reaxff_kokkos.h @@ -435,6 +435,8 @@ class PairReaxFFKokkos : public PairReaxFF { typename AT::t_ffloat_2d_dl d_C1dbopi2, d_C2dbopi2, d_C3dbopi2, d_C4dbopi2; typename AT::t_ffloat_2d_dl d_Cdbo, d_Cdbopi, d_Cdbopi2, d_dDeltap_self; + int need_dup; + using KKDeviceType = typename KKDevice::value; template @@ -443,27 +445,19 @@ class PairReaxFFKokkos : public PairReaxFF { template using NonDupScatterView = KKScatterView; - DupScatterView dup_total_bo; - DupScatterView dup_CdDelta; - DupScatterView dup_eatom; DupScatterView dup_f; + DupScatterView dup_eatom; DupScatterView dup_vatom; DupScatterView dup_dDeltap_self; - DupScatterView dup_Cdbo; - DupScatterView dup_Cdbopi; - DupScatterView dup_Cdbopi2; + DupScatterView dup_total_bo; + DupScatterView dup_CdDelta; - NonDupScatterView ndup_total_bo; - NonDupScatterView ndup_CdDelta; - NonDupScatterView ndup_eatom; NonDupScatterView ndup_f; + NonDupScatterView ndup_eatom; NonDupScatterView ndup_vatom; NonDupScatterView ndup_dDeltap_self; - NonDupScatterView ndup_Cdbo; - NonDupScatterView ndup_Cdbopi; - NonDupScatterView ndup_Cdbopi2; - - int need_dup; + NonDupScatterView ndup_total_bo; + NonDupScatterView ndup_CdDelta; typedef Kokkos::DualView tdual_ffloat_2d_n7; typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; From a9431208a267737215ab22bb1507b9d0df8c8843 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 19 May 2022 10:35:10 -0600 Subject: [PATCH 3/7] Use c++11 variadic magic, suggested by @weinbe2 --- src/KOKKOS/memory_kokkos.h | 41 +++----------------------------------- 1 file changed, 3 insertions(+), 38 deletions(-) diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index 4d4d200745..b421bcc825 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -280,46 +280,11 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type** &array) deallocate first to reduce memory use ------------------------------------------------------------------------- */ -template -static void realloc_kokkos(TYPE &data, const char *name, int n1) +template +static void realloc_kokkos(TYPE &data, const char *name, Indices... ns) { data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1); -} - -template -static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2); -} - -template -static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3); -} - -template -static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4); -} - -template -static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5); -} - -template -static void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6); + data = TYPE(Kokkos::NoInit(std::string(name)), ns...); } /* ---------------------------------------------------------------------- From ef80bb194a6aa1c3c41a06f0bf0c4a87c93a6bc8 Mon Sep 17 00:00:00 2001 From: Ravishankar Sundararaman Date: Mon, 23 May 2022 17:28:32 -0400 Subject: [PATCH 4/7] Fixed TIP4P handling in PPPM slab correction. Overridded the slabcorr() function to handle the charge being on the TIP4P "M" site in the dipole calculation (and corresponding force correction). This is important for any long-range electrostatics in aqueous interfaces with the TIP4P model. --- src/KSPACE/pppm_disp.h | 2 +- src/KSPACE/pppm_disp_tip4p.cpp | 79 +++++++++++++++++++++++++++++++++ src/KSPACE/pppm_disp_tip4p.h | 1 + src/KSPACE/pppm_tip4p.cpp | 80 ++++++++++++++++++++++++++++++++++ src/KSPACE/pppm_tip4p.h | 1 + 5 files changed, 162 insertions(+), 1 deletion(-) diff --git a/src/KSPACE/pppm_disp.h b/src/KSPACE/pppm_disp.h index 87265bee2f..1f254e772d 100644 --- a/src/KSPACE/pppm_disp.h +++ b/src/KSPACE/pppm_disp.h @@ -320,7 +320,7 @@ class PPPMDisp : public KSpace { void compute_drho1d(const FFT_SCALAR &, const FFT_SCALAR &, const FFT_SCALAR &, int, FFT_SCALAR **, FFT_SCALAR **); void compute_rho_coeff(FFT_SCALAR **, FFT_SCALAR **, int); - void slabcorr(int); + virtual void slabcorr(int); // grid communication diff --git a/src/KSPACE/pppm_disp_tip4p.cpp b/src/KSPACE/pppm_disp_tip4p.cpp index 902b512bc3..9ebc32ad05 100644 --- a/src/KSPACE/pppm_disp_tip4p.cpp +++ b/src/KSPACE/pppm_disp_tip4p.cpp @@ -486,6 +486,85 @@ void PPPMDispTIP4P::fieldforce_c_peratom() } } +/* ---------------------------------------------------------------------- + Fix handling of TIP4P dipole compared to PPPMDisp::slabcorr +------------------------------------------------------------------------- */ + +#define SMALL 0.00001 + +void PPPMDispTIP4P::slabcorr(int /*eflag*/) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + int *type = atom->type; + double *xi, xM[3]; int iH1, iH2; //for TIP4P virtual site + + // sum local contributions to get global dipole moment + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + dipole += q[i]*xi[2]; + } + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy_1 += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) { + double fzi_corr = ffact * q[i]*(dipole_all - qsum*x[i][2]); + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + f[i][2] += fzi_corr*(1 - alpha); + f[iH1][2] += 0.5*alpha*fzi_corr; + f[iH2][2] += 0.5*alpha*fzi_corr; + } + else f[i][2] += fzi_corr; + } +} + /* ---------------------------------------------------------------------- find 2 H atoms bonded to O atom i compute position xM of fictitious charge site for O atom diff --git a/src/KSPACE/pppm_disp_tip4p.h b/src/KSPACE/pppm_disp_tip4p.h index a432a7eeeb..e9d5babe0f 100644 --- a/src/KSPACE/pppm_disp_tip4p.h +++ b/src/KSPACE/pppm_disp_tip4p.h @@ -37,6 +37,7 @@ class PPPMDispTIP4P : public PPPMDisp { void fieldforce_c_ik() override; void fieldforce_c_ad() override; void fieldforce_c_peratom() override; + void slabcorr(int) override; private: void find_M(int, int &, int &, double *); diff --git a/src/KSPACE/pppm_tip4p.cpp b/src/KSPACE/pppm_tip4p.cpp index 1cb7bf462f..e9d72bcefe 100644 --- a/src/KSPACE/pppm_tip4p.cpp +++ b/src/KSPACE/pppm_tip4p.cpp @@ -477,6 +477,86 @@ void PPPMTIP4P::fieldforce_peratom() } } + +/* ---------------------------------------------------------------------- + Fix handling of TIP4P dipole compared to PPPMDisp::slabcorr +------------------------------------------------------------------------- */ + +#define SMALL 0.00001 + +void PPPMTIP4P::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + int *type = atom->type; + double *xi, xM[3]; int iH1, iH2; //for TIP4P virtual site + + // sum local contributions to get global dipole moment + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + dipole += q[i]*xi[2]; + } + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy_1 += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) { + double fzi_corr = ffact * q[i]*(dipole_all - qsum*x[i][2]); + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + f[i][2] += fzi_corr*(1 - alpha); + f[iH1][2] += 0.5*alpha*fzi_corr; + f[iH2][2] += 0.5*alpha*fzi_corr; + } + else f[i][2] += fzi_corr; + } +} + /* ---------------------------------------------------------------------- find 2 H atoms bonded to O atom i compute position xM of fictitious charge site for O atom diff --git a/src/KSPACE/pppm_tip4p.h b/src/KSPACE/pppm_tip4p.h index cf63af0d81..0e6b2cc91b 100644 --- a/src/KSPACE/pppm_tip4p.h +++ b/src/KSPACE/pppm_tip4p.h @@ -35,6 +35,7 @@ class PPPMTIP4P : public PPPM { void fieldforce_ik() override; void fieldforce_ad() override; void fieldforce_peratom() override; + void slabcorr() override; private: void find_M(int, int &, int &, double *); From 43b57b8aa556ee4f644d1034005ddd83b23b670c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 23 May 2022 20:14:10 -0400 Subject: [PATCH 5/7] update unit test reference data --- .../tests/kspace-pppm_tip4p_slab.yaml | 120 +++++++++--------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/unittest/force-styles/tests/kspace-pppm_tip4p_slab.yaml b/unittest/force-styles/tests/kspace-pppm_tip4p_slab.yaml index 51c5a45941..9b9806df5b 100644 --- a/unittest/force-styles/tests/kspace-pppm_tip4p_slab.yaml +++ b/unittest/force-styles/tests/kspace-pppm_tip4p_slab.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 10 Feb 2021 +lammps_version: 4 May 2022 tags: slow -date_generated: Fri Feb 26 23:09:34 2021 +date_generated: Mon May 23 20:12:38 2022 epsilon: 5e-13 prerequisites: ! | atom full @@ -27,67 +27,67 @@ init_coul: 0 init_stress: ! |2- 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 init_forces: ! |2 - 1 -5.1875059319876482e-01 5.1660852898514097e-02 -5.7568746684221916e-01 - 2 2.1689992534273889e-01 -2.6058788219741147e-01 4.1798750467672274e-01 - 3 -3.2123603964927645e-02 -8.8740124548059782e-03 -1.3836110655816162e-02 - 4 1.5801256389492813e-01 2.1126659375654909e-02 7.8030101326860571e-02 - 5 1.4630529362834138e-01 7.0541031372980123e-02 1.0367534136208822e-01 - 6 4.5271336509733173e-01 4.3434073283904712e-01 1.9672609182540585e-01 - 7 -2.5202832073509374e-01 -4.0972860647817611e-01 -5.1025448731009770e-01 - 8 -2.7733728801934087e-03 -6.6285305944413486e-01 -4.2259182161309783e-01 - 9 8.7284510947996261e-02 3.4644067741232221e-01 5.2649415280028289e-01 - 10 -7.0358238250045405e-02 1.2324438068757468e-01 9.7430764543568638e-02 - 11 -1.0611397012429427e-01 1.6778028608628817e-01 1.0243492986761592e-01 - 12 4.7522165036946828e-01 -4.7569230861934814e-01 -3.7620104501978141e-01 - 13 -1.5084555220571103e-01 1.3477824777242514e-01 1.5530580320476042e-01 - 14 -1.7331931611645846e-01 1.4489217533438961e-01 1.5187450400216165e-01 - 15 -1.4457383479163205e-01 1.0191957555196750e-01 1.2647045003724980e-01 - 16 -4.6380086833284712e-01 5.1725747136642608e-01 1.3944164623658954e+00 - 17 2.5830539911922701e-01 -4.4001348824516739e-01 -1.6368568925682263e+00 - 18 7.1711828509507158e-01 1.5135594083235546e+00 -3.0000511375094279e+00 - 19 -2.6526242862377652e-01 -7.5615232036394919e-01 1.6174668711138471e+00 - 20 -3.6412116833354524e-01 -6.8003985707517800e-01 1.5219834320490606e+00 - 21 2.1913962201245335e-01 2.8436767546323427e-01 -1.5830999741451532e+00 - 22 -1.1342236771135741e-01 -2.1501547367331550e-02 8.1828468024256407e-01 - 23 -1.3904196539005109e-01 -1.8302644136991242e-01 7.6537752144201421e-01 - 24 9.9781913572244155e-02 9.7885535038983940e-01 -1.2668572125502537e+00 - 25 1.3435697022842072e-01 -3.4498868790162351e-01 6.9488200140438949e-01 - 26 -2.4062268840602163e-01 -6.1062269338471520e-01 5.8807795407966368e-01 - 27 -6.0379577180174060e-01 9.0922264678215248e-01 -1.6721200413729438e+00 - 28 4.2997465966368542e-01 -5.0965848067708741e-01 9.0674794598209418e-01 - 29 2.4583990189455251e-01 -4.3624778607752923e-01 7.9388967726077175e-01 + 1 -5.1875059319876493e-01 5.1660852898514124e-02 -5.3139324142026889e-01 + 2 2.1689992534273903e-01 -2.6058788219741147e-01 3.8877216450479823e-01 + 3 -3.2123603964927645e-02 -8.8740124548059712e-03 -1.1951249999562964e-02 + 4 1.5801256389492810e-01 2.1126659375654853e-02 6.9548228373721183e-02 + 5 1.4630529362834138e-01 7.0541031372980081e-02 9.5193468408948823e-02 + 6 4.5271336509733173e-01 4.3434073283904689e-01 1.4866214509094927e-01 + 7 -2.5202832073509379e-01 -4.0972860647817605e-01 -4.6219054057564113e-01 + 8 -2.7733728801934924e-03 -6.6285305944413497e-01 -3.7829759619114767e-01 + 9 8.7284510947996316e-02 3.4644067741232221e-01 4.9727881262835838e-01 + 10 -7.0358238250045405e-02 1.2324438068757469e-01 9.0833752246682456e-02 + 11 -1.0611397012429427e-01 1.6778028608628820e-01 9.3953056914476557e-02 + 12 4.7522165036946817e-01 -4.7569230861934830e-01 -3.5075542616036326e-01 + 13 -1.5084555220571100e-01 1.3477824777242520e-01 1.4682393025162105e-01 + 14 -1.7331931611645843e-01 1.4489217533438964e-01 1.4339263104902228e-01 + 15 -1.4457383479163199e-01 1.0191957555196751e-01 1.1798857708411041e-01 + 16 -4.6380086833284706e-01 5.1725747136642608e-01 1.3463525156314389e+00 + 17 2.5830539911922706e-01 -4.4001348824516717e-01 -1.5887929458337697e+00 + 18 7.1711828509507103e-01 1.5135594083235548e+00 -2.9202084401105424e+00 + 19 -2.6526242862377630e-01 -7.5615232036394919e-01 1.5775455224144039e+00 + 20 -3.6412116833354502e-01 -6.8003985707517844e-01 1.4820620833496179e+00 + 21 2.1913962201245299e-01 2.8436767546323433e-01 -1.3892564908575578e+00 + 22 -1.1342236771135721e-01 -2.1501547367331557e-02 7.2136293859876610e-01 + 23 -1.3904196539005081e-01 -1.8302644136991245e-01 6.6845577979821647e-01 + 24 9.9781913572244044e-02 9.7885535038983906e-01 -1.0730137292626583e+00 + 25 1.3435697022842086e-01 -3.4498868790162335e-01 5.9796025976059175e-01 + 26 -2.4062268840602169e-01 -6.1062269338471475e-01 4.9115621243586605e-01 + 27 -6.0379577180174060e-01 9.0922264678215248e-01 -1.4782765580853483e+00 + 28 4.2997465966368531e-01 -5.0965848067708730e-01 8.0982620433829633e-01 + 29 2.4583990189455257e-01 -4.3624778607752923e-01 6.9696793561697401e-01 run_vdwl: 0 run_coul: 0 run_stress: ! |2- 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 run_forces: ! |2 - 1 -5.1741021308022928e-01 5.2164463970109785e-02 -5.7035795586691229e-01 - 2 2.1552845355081662e-01 -2.6145885390966006e-01 4.1455932829319120e-01 - 3 -3.2103136898117973e-02 -8.8506517835662529e-03 -1.3601661681107857e-02 - 4 1.5813149106431929e-01 2.0998590621170252e-02 7.7085061898295948e-02 - 5 1.4611782148177443e-01 7.0495101424978557e-02 1.0262211863021993e-01 - 6 4.5237682723361744e-01 4.3384953789975256e-01 1.9026464391810138e-01 - 7 -2.5220238048340610e-01 -4.1025291093818367e-01 -5.0478860778438306e-01 - 8 -1.8080584227147985e-03 -6.6262062885438788e-01 -4.1641859631215317e-01 - 9 8.6463014670901714e-02 3.4599477171923032e-01 5.2262452042463015e-01 - 10 -7.0489736534953079e-02 1.2334020200473024e-01 9.6566992716111827e-02 - 11 -1.0626441695286094e-01 1.6812163421601856e-01 1.0149591994495212e-01 - 12 4.7577991930135149e-01 -4.7567916757872036e-01 -3.7271573985070183e-01 - 13 -1.5102820761813157e-01 1.3482444566331483e-01 1.5423678933941967e-01 - 14 -1.7348673211954843e-01 1.4502141845434144e-01 1.5087187633492033e-01 - 15 -1.4462875081155377e-01 1.0177069356565076e-01 1.2517182091704590e-01 - 16 -4.6460162929294996e-01 5.1779385845248271e-01 1.3893654572043623e+00 - 17 2.5891442131187525e-01 -4.3950548671559936e-01 -1.6317550757697421e+00 - 18 7.2168431617151652e-01 1.5193451885286362e+00 -2.9893884981487782e+00 - 19 -2.6652798165736163e-01 -7.5811338734451528e-01 1.6128149463359120e+00 - 20 -3.6685702835539502e-01 -6.8322050954334113e-01 1.5165955023073157e+00 - 21 2.1951624771560643e-01 2.7908195130062186e-01 -1.5761953363577819e+00 - 22 -1.1336951380799791e-01 -1.8808477419676436e-02 8.1504600690354212e-01 - 23 -1.3925079650238253e-01 -1.8099002366078934e-01 7.6197026590425576e-01 - 24 1.0040861588721736e-01 9.7605298874640822e-01 -1.2616161159601158e+00 - 25 1.3326916759186308e-01 -3.4383632694371846e-01 6.9190323110048380e-01 - 26 -2.4021815268192853e-01 -6.0890409178886584e-01 5.8556538656853796e-01 - 27 -6.0409904237121848e-01 9.0912495784502378e-01 -1.6654694276801689e+00 - 28 4.3013936095125799e-01 -5.0934391123833689e-01 9.0325113244229294e-01 - 29 2.4601612065863304e-01 -4.3639537669310857e-01 7.9029601422825357e-01 + 1 -5.1741014216891323e-01 5.2164635296042393e-02 -5.2623836978475858e-01 + 2 2.1552846412199758e-01 -2.6145894539516673e-01 3.8545898349593888e-01 + 3 -3.2103138095670579e-02 -8.8506538753520981e-03 -1.1724235859914865e-02 + 4 1.5813145650026225e-01 2.0998621233542170e-02 6.8636593918036282e-02 + 5 1.4611784388196503e-01 7.0495096056163456e-02 9.4173752375773245e-02 + 6 4.5237719221599776e-01 4.3384962277065459e-01 1.4239025998317195e-01 + 7 -2.5220268493187059e-01 -4.1025301686518484e-01 -4.5691379591079689e-01 + 8 -1.8087303830687639e-03 -6.6262053829478962e-01 -3.7229928601228018e-01 + 9 8.6463301781221830e-02 3.4599465684255803e-01 4.9352454715915695e-01 + 10 -7.0489594513774040e-02 1.2334016665695119e-01 8.9996005394677253e-02 + 11 -1.0626421813039562e-01 1.6812158323291176e-01 9.3047478449378010e-02 + 12 4.7577947092697664e-01 -4.7567891996498934e-01 -3.4737046113988318e-01 + 13 -1.5102808666764547e-01 1.3482433032858601e-01 1.4578835246021055e-01 + 14 -1.7348659325588509e-01 1.4502136217343140e-01 1.4242345028236647e-01 + 15 -1.4462864359019598e-01 1.0177061950990737e-01 1.1672337003180658e-01 + 16 -4.6460067645864556e-01 5.1779370227303589e-01 1.3414908637759566e+00 + 17 2.5891386126470772e-01 -4.3950547582143235e-01 -1.5838807201814791e+00 + 18 7.2168370323432252e-01 1.5193447526996697e+00 -2.9098598385970016e+00 + 19 -2.6652787885830853e-01 -7.5811315791679745e-01 1.5730504781450976e+00 + 20 -3.6685665590502275e-01 -6.8322021768929220e-01 1.4768311717846097e+00 + 21 2.1951586762660091e-01 2.7908220235205461e-01 -1.3830824926773011e+00 + 22 -1.1336938042579009e-01 -1.8808601624035179e-02 7.1848935671661196e-01 + 23 -1.3925069633951992e-01 -1.8099007738613887e-01 6.6541382825713724e-01 + 24 1.0040887040966230e-01 9.7605345725512060e-01 -1.0685029118802341e+00 + 25 1.3326911366323715e-01 -3.4383656089990644e-01 5.9534683106490160e-01 + 26 -2.4021835151581941e-01 -6.0890428532016694e-01 4.8900886112265285e-01 + 27 -6.0409926106451306e-01 9.0912468504667421e-01 -1.4723570251596647e+00 + 28 4.3013929998720690e-01 -5.0934380660262080e-01 8.0669499930969779e-01 + 29 2.4601628669087905e-01 -4.3639523607143099e-01 6.9373995347613393e-01 ... From b66702c285fc16e04bb3dff558d8d64921c9400c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugen=20Ro=C5=BEi=C4=87?= Date: Tue, 24 May 2022 11:37:44 +0200 Subject: [PATCH 6/7] Added verbose class option + removed empty output lines --- python/lammps/pylammps.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/lammps/pylammps.py b/python/lammps/pylammps.py index cdb6620c27..1a64adb6ed 100644 --- a/python/lammps/pylammps.py +++ b/python/lammps/pylammps.py @@ -457,8 +457,9 @@ class PyLammps(object): :vartype run: list """ - def __init__(self, name="", cmdargs=None, ptr=None, comm=None): + def __init__(self, name="", cmdargs=None, ptr=None, comm=None, verbose=False): self.has_echo = False + self.verbose = verbose if cmdargs: if '-echo' in cmdargs: @@ -869,8 +870,8 @@ class PyLammps(object): if comm: output = self.lmp.comm.bcast(output, root=0) - if 'verbose' in kwargs and kwargs['verbose']: - print(output) + if self.verbose or ('verbose' in kwargs and kwargs['verbose']): + print(output, end = '') lines = output.splitlines() @@ -984,4 +985,4 @@ class IPyLammps(PyLammps): :rtype: :py:class:`IPython.display.HTML` """ from IPython.display import HTML - return HTML("") + return HTML("") \ No newline at end of file From 1aaa9ca4ae0bbb64a269a31301f78b6e9709016e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugen=20Ro=C5=BEi=C4=87?= Date: Tue, 24 May 2022 12:02:34 +0200 Subject: [PATCH 7/7] Added documentation for verbose option --- python/lammps/pylammps.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/lammps/pylammps.py b/python/lammps/pylammps.py index 1a64adb6ed..1fe1f2452b 100644 --- a/python/lammps/pylammps.py +++ b/python/lammps/pylammps.py @@ -449,6 +449,8 @@ class PyLammps(object): :type ptr: pointer :param comm: MPI communicator (as provided by `mpi4py `_). ``None`` means use ``MPI_COMM_WORLD`` implicitly. :type comm: MPI_Comm + :param verbose: print all LAMMPS output to stdout + :type verbose: bool :ivar lmp: instance of original LAMMPS Python interface :vartype lmp: :py:class:`lammps` @@ -985,4 +987,4 @@ class IPyLammps(PyLammps): :rtype: :py:class:`IPython.display.HTML` """ from IPython.display import HTML - return HTML("") \ No newline at end of file + return HTML("")