diff --git a/python/lammps/pylammps.py b/python/lammps/pylammps.py index cdb6620c27..1fe1f2452b 100644 --- a/python/lammps/pylammps.py +++ b/python/lammps/pylammps.py @@ -449,6 +449,8 @@ class PyLammps(object): :type ptr: pointer :param comm: MPI communicator (as provided by `mpi4py `_). ``None`` means use ``MPI_COMM_WORLD`` implicitly. :type comm: MPI_Comm + :param verbose: print all LAMMPS output to stdout + :type verbose: bool :ivar lmp: instance of original LAMMPS Python interface :vartype lmp: :py:class:`lammps` @@ -457,8 +459,9 @@ class PyLammps(object): :vartype run: list """ - def __init__(self, name="", cmdargs=None, ptr=None, comm=None): + def __init__(self, name="", cmdargs=None, ptr=None, comm=None, verbose=False): self.has_echo = False + self.verbose = verbose if cmdargs: if '-echo' in cmdargs: @@ -869,8 +872,8 @@ class PyLammps(object): if comm: output = self.lmp.comm.bcast(output, root=0) - if 'verbose' in kwargs and kwargs['verbose']: - print(output) + if self.verbose or ('verbose' in kwargs and kwargs['verbose']): + print(output, end = '') lines = output.splitlines() diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index e82746f34d..b421bcc825 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -20,6 +20,8 @@ namespace LAMMPS_NS { +typedef MemoryKokkos MemKK; + class MemoryKokkos : public Memory { public: MemoryKokkos(class LAMMPS *lmp) : Memory(lmp) {} @@ -278,46 +280,11 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type** &array) deallocate first to reduce memory use ------------------------------------------------------------------------- */ -template -void realloc_kokkos(TYPE &data, const char *name, int n1) +template +static void realloc_kokkos(TYPE &data, const char *name, Indices... ns) { data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1); -} - -template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2); -} - -template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3); -} - -template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4); -} - -template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5); -} - -template -void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6) -{ - data = TYPE(); - data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6); + data = TYPE(Kokkos::NoInit(std::string(name)), ns...); } /* ---------------------------------------------------------------------- @@ -325,7 +292,7 @@ void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4 ------------------------------------------------------------------------- */ template -double memory_usage(TYPE &data) +static double memory_usage(TYPE &data) { return data.span() * sizeof(typename TYPE::value_type); } diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp index ddf3bf9107..9d78b63167 100644 --- a/src/KOKKOS/pair_pace_kokkos.cpp +++ b/src/KOKKOS/pair_pace_kokkos.cpp @@ -104,55 +104,55 @@ void PairPACEKokkos::grow(int natom, int maxneigh) if ((int)A.extent(0) < natom) { - memoryKK->realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase); + MemKK::realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase); - memoryKK->realloc_kokkos(A_list, "pace:A_list", natom, idx_rho_max, basis_set->rankmax); + MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_rho_max, basis_set->rankmax); //size is +1 of max to avoid out-of-boundary array access in double-triangular scheme - memoryKK->realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_rho_max, basis_set->rankmax + 1); + MemKK::realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_rho_max, basis_set->rankmax + 1); - memoryKK->realloc_kokkos(e_atom, "pace:e_atom", natom); - memoryKK->realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion - memoryKK->realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion + MemKK::realloc_kokkos(e_atom, "pace:e_atom", natom); + MemKK::realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion + MemKK::realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion - memoryKK->realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase); + MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase); // hard-core repulsion - memoryKK->realloc_kokkos(rho_core, "pace:rho_core", natom); - memoryKK->realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom); - memoryKK->realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_rho_max, basis_set->rankmax); + MemKK::realloc_kokkos(rho_core, "pace:rho_core", natom); + MemKK::realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom); + MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_rho_max, basis_set->rankmax); } if (((int)ylm.extent(0) < natom) || ((int)ylm.extent(1) < maxneigh)) { // radial functions - memoryKK->realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1); - memoryKK->realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1); - memoryKK->realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase); - memoryKK->realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase); + MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1); + MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1); + MemKK::realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase); + MemKK::realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase); const int max_num_functions = MAX(nradbase, nradmax*(lmax + 1)); - memoryKK->realloc_kokkos(d_values, "pace:d_values", natom, maxneigh, max_num_functions); - memoryKK->realloc_kokkos(d_derivatives, "pace:d_derivatives", natom, maxneigh, max_num_functions); + MemKK::realloc_kokkos(d_values, "pace:d_values", natom, maxneigh, max_num_functions); + MemKK::realloc_kokkos(d_derivatives, "pace:d_derivatives", natom, maxneigh, max_num_functions); // hard-core repulsion - memoryKK->realloc_kokkos(cr, "pace:cr", natom, maxneigh); - memoryKK->realloc_kokkos(dcr, "pace:dcr", natom, maxneigh); + MemKK::realloc_kokkos(cr, "pace:cr", natom, maxneigh); + MemKK::realloc_kokkos(dcr, "pace:dcr", natom, maxneigh); // spherical harmonics - memoryKK->realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); // short neigh list - memoryKK->realloc_kokkos(d_ncount, "pace:ncount", natom); - memoryKK->realloc_kokkos(d_mu, "pace:mu", natom, maxneigh); - memoryKK->realloc_kokkos(d_rhats, "pace:rhats", natom, maxneigh); - memoryKK->realloc_kokkos(d_rnorms, "pace:rnorms", natom, maxneigh); - memoryKK->realloc_kokkos(d_nearest, "pace:nearest", natom, maxneigh); + MemKK::realloc_kokkos(d_ncount, "pace:ncount", natom); + MemKK::realloc_kokkos(d_mu, "pace:mu", natom, maxneigh); + MemKK::realloc_kokkos(d_rhats, "pace:rhats", natom, maxneigh); + MemKK::realloc_kokkos(d_rnorms, "pace:rnorms", natom, maxneigh); + MemKK::realloc_kokkos(d_nearest, "pace:nearest", natom, maxneigh); - memoryKK->realloc_kokkos(f_ij, "pace:f_ij", natom, maxneigh); + MemKK::realloc_kokkos(f_ij, "pace:f_ij", natom, maxneigh); } } @@ -163,11 +163,11 @@ void PairPACEKokkos::copy_pertype() { auto basis_set = aceimpl->basis_set; - memoryKK->realloc_kokkos(d_rho_core_cutoff, "pace:rho_core_cutoff", nelements); - memoryKK->realloc_kokkos(d_drho_core_cutoff, "pace:drho_core_cutoff", nelements); - memoryKK->realloc_kokkos(d_E0vals, "pace:E0vals", nelements); - memoryKK->realloc_kokkos(d_ndensity, "pace:ndensity", nelements); - memoryKK->realloc_kokkos(d_npoti, "pace:npoti", nelements); + MemKK::realloc_kokkos(d_rho_core_cutoff, "pace:rho_core_cutoff", nelements); + MemKK::realloc_kokkos(d_drho_core_cutoff, "pace:drho_core_cutoff", nelements); + MemKK::realloc_kokkos(d_E0vals, "pace:E0vals", nelements); + MemKK::realloc_kokkos(d_ndensity, "pace:ndensity", nelements); + MemKK::realloc_kokkos(d_npoti, "pace:npoti", nelements); auto h_rho_core_cutoff = Kokkos::create_mirror_view(d_rho_core_cutoff); auto h_drho_core_cutoff = Kokkos::create_mirror_view(d_drho_core_cutoff); @@ -196,8 +196,8 @@ void PairPACEKokkos::copy_pertype() Kokkos::deep_copy(d_ndensity, h_ndensity); Kokkos::deep_copy(d_npoti, h_npoti); - memoryKK->realloc_kokkos(d_wpre, "pace:wpre", nelements, basis_set->ndensitymax); - memoryKK->realloc_kokkos(d_mexp, "pace:mexp", nelements, basis_set->ndensitymax); + MemKK::realloc_kokkos(d_wpre, "pace:wpre", nelements, basis_set->ndensitymax); + MemKK::realloc_kokkos(d_mexp, "pace:mexp", nelements, basis_set->ndensitymax); auto h_wpre = Kokkos::create_mirror_view(d_wpre); auto h_mexp = Kokkos::create_mirror_view(d_mexp); @@ -266,7 +266,7 @@ void PairPACEKokkos::copy_tilde() idx_rho_max = 0; int total_basis_size_max = 0; - memoryKK->realloc_kokkos(d_idx_rho_count, "pace:idx_rho_count", nelements); + MemKK::realloc_kokkos(d_idx_rho_count, "pace:idx_rho_count", nelements); auto h_idx_rho_count = Kokkos::create_mirror_view(d_idx_rho_count); for (int n = 0; n < nelements; n++) { @@ -295,14 +295,14 @@ void PairPACEKokkos::copy_tilde() Kokkos::deep_copy(d_idx_rho_count, h_idx_rho_count); - memoryKK->realloc_kokkos(d_rank, "pace:rank", nelements, total_basis_size_max); - memoryKK->realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_basis_size_max); - memoryKK->realloc_kokkos(d_offsets, "pace:offsets", nelements, idx_rho_max); - memoryKK->realloc_kokkos(d_mus, "pace:mus", nelements, total_basis_size_max, basis_set->rankmax); - memoryKK->realloc_kokkos(d_ns, "pace:ns", nelements, total_basis_size_max, basis_set->rankmax); - memoryKK->realloc_kokkos(d_ls, "pace:ls", nelements, total_basis_size_max, basis_set->rankmax); - memoryKK->realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_rho_max, basis_set->rankmax); - memoryKK->realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_rho_max, basis_set->ndensitymax); + MemKK::realloc_kokkos(d_rank, "pace:rank", nelements, total_basis_size_max); + MemKK::realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_basis_size_max); + MemKK::realloc_kokkos(d_offsets, "pace:offsets", nelements, idx_rho_max); + MemKK::realloc_kokkos(d_mus, "pace:mus", nelements, total_basis_size_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ns, "pace:ns", nelements, total_basis_size_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ls, "pace:ls", nelements, total_basis_size_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_rho_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ctildes, "pace:ctildes", nelements, idx_rho_max, basis_set->ndensitymax); auto h_rank = Kokkos::create_mirror_view(d_rank); auto h_num_ms_combs = Kokkos::create_mirror_view(d_num_ms_combs); @@ -418,10 +418,10 @@ void PairPACEKokkos::init_style() // spherical harmonics - memoryKK->realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1)); - memoryKK->realloc_kokkos(cl, "pace:cl", lmax + 1); - memoryKK->realloc_kokkos(dl, "pace:dl", lmax + 1); + MemKK::realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(cl, "pace:cl", lmax + 1); + MemKK::realloc_kokkos(dl, "pace:dl", lmax + 1); pre_compute_harmonics(lmax); copy_pertype(); @@ -474,12 +474,12 @@ void PairPACEKokkos::allocate() PairPACE::allocate(); int n = atom->ntypes + 1; - memoryKK->realloc_kokkos(d_map, "pace:map", n); + MemKK::realloc_kokkos(d_map, "pace:map", n); - memoryKK->realloc_kokkos(k_cutsq, "pace:cutsq", n, n); + MemKK::realloc_kokkos(k_cutsq, "pace:cutsq", n, n); d_cutsq = k_cutsq.template view(); - memoryKK->realloc_kokkos(k_scale, "pace:scale", n, n); + MemKK::realloc_kokkos(k_scale, "pace:scale", n, n); d_scale = k_scale.template view(); } @@ -1651,56 +1651,56 @@ double PairPACEKokkos::memory_usage() { double bytes = 0; - bytes += memoryKK->memory_usage(A); - bytes += memoryKK->memory_usage(A_rank1); - bytes += memoryKK->memory_usage(A_list); - bytes += memoryKK->memory_usage(A_forward_prod); - bytes += memoryKK->memory_usage(e_atom); - bytes += memoryKK->memory_usage(rhos); - bytes += memoryKK->memory_usage(dF_drho); - bytes += memoryKK->memory_usage(weights); - bytes += memoryKK->memory_usage(weights_rank1); - bytes += memoryKK->memory_usage(rho_core); - bytes += memoryKK->memory_usage(dF_drho_core); - bytes += memoryKK->memory_usage(dB_flatten); - bytes += memoryKK->memory_usage(fr); - bytes += memoryKK->memory_usage(dfr); - bytes += memoryKK->memory_usage(gr); - bytes += memoryKK->memory_usage(dgr); - bytes += memoryKK->memory_usage(d_values); - bytes += memoryKK->memory_usage(d_derivatives); - bytes += memoryKK->memory_usage(cr); - bytes += memoryKK->memory_usage(dcr); - bytes += memoryKK->memory_usage(plm); - bytes += memoryKK->memory_usage(dplm); - bytes += memoryKK->memory_usage(ylm); - bytes += memoryKK->memory_usage(dylm); - bytes += memoryKK->memory_usage(d_ncount); - bytes += memoryKK->memory_usage(d_mu); - bytes += memoryKK->memory_usage(d_rhats); - bytes += memoryKK->memory_usage(d_rnorms); - bytes += memoryKK->memory_usage(d_nearest); - bytes += memoryKK->memory_usage(f_ij); - bytes += memoryKK->memory_usage(d_rho_core_cutoff); - bytes += memoryKK->memory_usage(d_drho_core_cutoff); - bytes += memoryKK->memory_usage(d_E0vals); - bytes += memoryKK->memory_usage(d_ndensity); - bytes += memoryKK->memory_usage(d_npoti); - bytes += memoryKK->memory_usage(d_wpre); - bytes += memoryKK->memory_usage(d_mexp); - bytes += memoryKK->memory_usage(d_idx_rho_count); - bytes += memoryKK->memory_usage(d_rank); - bytes += memoryKK->memory_usage(d_num_ms_combs); - bytes += memoryKK->memory_usage(d_offsets); - bytes += memoryKK->memory_usage(d_mus); - bytes += memoryKK->memory_usage(d_ns); - bytes += memoryKK->memory_usage(d_ls); - bytes += memoryKK->memory_usage(d_ms_combs); - bytes += memoryKK->memory_usage(d_ctildes); - bytes += memoryKK->memory_usage(alm); - bytes += memoryKK->memory_usage(blm); - bytes += memoryKK->memory_usage(cl); - bytes += memoryKK->memory_usage(dl); + bytes += MemKK::memory_usage(A); + bytes += MemKK::memory_usage(A_rank1); + bytes += MemKK::memory_usage(A_list); + bytes += MemKK::memory_usage(A_forward_prod); + bytes += MemKK::memory_usage(e_atom); + bytes += MemKK::memory_usage(rhos); + bytes += MemKK::memory_usage(dF_drho); + bytes += MemKK::memory_usage(weights); + bytes += MemKK::memory_usage(weights_rank1); + bytes += MemKK::memory_usage(rho_core); + bytes += MemKK::memory_usage(dF_drho_core); + bytes += MemKK::memory_usage(dB_flatten); + bytes += MemKK::memory_usage(fr); + bytes += MemKK::memory_usage(dfr); + bytes += MemKK::memory_usage(gr); + bytes += MemKK::memory_usage(dgr); + bytes += MemKK::memory_usage(d_values); + bytes += MemKK::memory_usage(d_derivatives); + bytes += MemKK::memory_usage(cr); + bytes += MemKK::memory_usage(dcr); + bytes += MemKK::memory_usage(plm); + bytes += MemKK::memory_usage(dplm); + bytes += MemKK::memory_usage(ylm); + bytes += MemKK::memory_usage(dylm); + bytes += MemKK::memory_usage(d_ncount); + bytes += MemKK::memory_usage(d_mu); + bytes += MemKK::memory_usage(d_rhats); + bytes += MemKK::memory_usage(d_rnorms); + bytes += MemKK::memory_usage(d_nearest); + bytes += MemKK::memory_usage(f_ij); + bytes += MemKK::memory_usage(d_rho_core_cutoff); + bytes += MemKK::memory_usage(d_drho_core_cutoff); + bytes += MemKK::memory_usage(d_E0vals); + bytes += MemKK::memory_usage(d_ndensity); + bytes += MemKK::memory_usage(d_npoti); + bytes += MemKK::memory_usage(d_wpre); + bytes += MemKK::memory_usage(d_mexp); + bytes += MemKK::memory_usage(d_idx_rho_count); + bytes += MemKK::memory_usage(d_rank); + bytes += MemKK::memory_usage(d_num_ms_combs); + bytes += MemKK::memory_usage(d_offsets); + bytes += MemKK::memory_usage(d_mus); + bytes += MemKK::memory_usage(d_ns); + bytes += MemKK::memory_usage(d_ls); + bytes += MemKK::memory_usage(d_ms_combs); + bytes += MemKK::memory_usage(d_ctildes); + bytes += MemKK::memory_usage(alm); + bytes += MemKK::memory_usage(blm); + bytes += MemKK::memory_usage(cl); + bytes += MemKK::memory_usage(dl); if (k_splines_gk.h_view.data()) { for (int i = 0; i < nelements; i++) { diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index 192300d642..61f3bb3037 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -36,6 +36,7 @@ #include "kokkos.h" #include "math_const.h" #include "math_special.h" +#include "memory_kokkos.h" #include "neigh_request.h" #include "neighbor.h" @@ -78,8 +79,8 @@ PairReaxFFKokkos::PairReaxFFKokkos(LAMMPS *lmp) : PairReaxFF(lmp) k_error_flag = DAT::tdual_int_scalar("pair:error_flag"); k_nbuf_local = DAT::tdual_int_scalar("pair:nbuf_local"); - d_torsion_pack = t_reax_int4_2d("reaxff:torsion_pack",1,2); - d_angular_pack = t_reax_int4_2d("reaxff:angular_pack",1,2); + MemKK::realloc_kokkos(d_torsion_pack,"reaxff:torsion_pack",1,2); + MemKK::realloc_kokkos(d_angular_pack,"reaxff:angular_pack",1,2); k_count_angular_torsion = DAT::tdual_int_1d("PairReaxFF::count_angular_torsion",2); d_count_angular_torsion = k_count_angular_torsion.template view(); @@ -883,17 +884,10 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy(0,ignum),*this); // allocate duplicated memory - if (need_dup) { + if (need_dup) dup_CdDelta = Kokkos::Experimental::create_scatter_view(d_CdDelta); - //dup_Cdbo = Kokkos::Experimental::create_scatter_view(d_Cdbo); - //dup_Cdbopi = Kokkos::Experimental::create_scatter_view(d_Cdbopi); - //dup_Cdbopi2 = Kokkos::Experimental::create_scatter_view(d_Cdbopi2); - } else { + else ndup_CdDelta = Kokkos::Experimental::create_scatter_view(d_CdDelta); - //ndup_Cdbo = Kokkos::Experimental::create_scatter_view(d_Cdbo); - //ndup_Cdbopi = Kokkos::Experimental::create_scatter_view(d_Cdbopi); - //ndup_Cdbopi2 = Kokkos::Experimental::create_scatter_view(d_Cdbopi2); - } // reduction over duplicated memory if (need_dup) @@ -959,9 +953,9 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) count_torsion = h_count_angular_torsion(1); if (count_angular > (int)d_angular_pack.extent(0)) - d_angular_pack = t_reax_int4_2d("reaxff:angular_pack",(int)(count_angular * 1.1),2); + MemKK::realloc_kokkos(d_angular_pack,"reaxff:angular_pack",(int)(count_angular * 1.1),2); if (count_torsion > (int)d_torsion_pack.extent(0)) - d_torsion_pack = t_reax_int4_2d("reaxff:torsion_pack",(int)(count_torsion * 1.1),2); + MemKK::realloc_kokkos(d_torsion_pack,"reaxff:torsion_pack",(int)(count_torsion * 1.1),2); // need to zero to re-count h_count_angular_torsion(0) = 0; @@ -1033,26 +1027,12 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) if (need_dup) { Kokkos::Experimental::contribute(d_dDeltap_self, dup_dDeltap_self); // needed in ComputeBond2 Kokkos::Experimental::contribute(d_CdDelta, dup_CdDelta); // needed in ComputeBond2 - - //Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in UpdateBond, but also used in UpdateBond - //Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in UpdateBond, but also used in UpdateBond - //Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in UpdateBond, but also used in UpdateBond - //dup_Cdbo.reset_except(d_Cdbo); - //dup_Cdbopi.reset_except(d_Cdbopi); - //dup_Cdbopi2.reset_except(d_Cdbopi2); } // Bond force if (neighflag == HALF) { Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); - // reduction over duplicated memory - //if (need_dup) { - // Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2 - // Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2 - // Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2 - //} - if (vflag_either) Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,ignum),*this,ev); else @@ -1062,13 +1042,6 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) } else { //if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy>(0,ignum),*this); - // reduction over duplicated memory - //if (need_dup) { - // Kokkos::Experimental::contribute(d_Cdbo, dup_Cdbo); // needed in ComputeBond2 - // Kokkos::Experimental::contribute(d_Cdbopi, dup_Cdbopi); // needed in ComputeBond2 - // Kokkos::Experimental::contribute(d_Cdbopi2, dup_Cdbopi2); // needed in ComputeBond2 - //} - if (vflag_either) Kokkos::parallel_reduce(Kokkos::RangePolicy>(0,ignum),*this,ev); else @@ -1116,7 +1089,7 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) copymode = 0; - // free duplicated memory + // free scatterview memory if (need_dup) { dup_f = decltype(dup_f)(); dup_eatom = decltype(dup_eatom)(); @@ -1124,9 +1097,13 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) dup_dDeltap_self = decltype(dup_dDeltap_self)(); dup_total_bo = decltype(dup_total_bo)(); dup_CdDelta = decltype(dup_CdDelta)(); - //dup_Cdbo = decltype(dup_Cdbo)(); - //dup_Cdbopi = decltype(dup_Cdbopi)(); - //dup_Cdbopi2 = decltype(dup_Cdbopi2)(); + } else { + ndup_f = decltype(ndup_f)(); + ndup_eatom = decltype(ndup_eatom)(); + ndup_vatom = decltype(ndup_vatom)(); + ndup_dDeltap_self = decltype(ndup_dDeltap_self)(); + ndup_total_bo = decltype(ndup_total_bo)(); + ndup_CdDelta = decltype(ndup_CdDelta)(); } d_neighbors = typename AT::t_neighbors_2d(); @@ -1512,131 +1489,74 @@ void PairReaxFFKokkos::operator()(TagPairReaxComputeTabulatedLJCoulo template void PairReaxFFKokkos::allocate_array() { - // deallocate first to reduce memory overhead - - deallocate_array(); + // free scatterview memory + if (need_dup) { + dup_dDeltap_self = decltype(dup_dDeltap_self)(); + dup_total_bo = decltype(dup_total_bo)(); + dup_CdDelta = decltype(dup_CdDelta)(); + } else { + ndup_dDeltap_self = decltype(ndup_dDeltap_self)(); + ndup_total_bo = decltype(ndup_total_bo)(); + ndup_CdDelta = decltype(ndup_CdDelta)(); + } if (cut_hbsq > 0.0) { - d_hb_first = typename AT::t_int_1d("reaxff/kk:hb_first",nmax); - d_hb_num = typename AT::t_int_1d("reaxff/kk:hb_num",nmax); - d_hb_list = typename AT::t_int_1d("reaxff/kk:hb_list",nmax*maxhb); + MemKK::realloc_kokkos(d_hb_first,"reaxff/kk:hb_first",nmax); + MemKK::realloc_kokkos(d_hb_num,"reaxff/kk:hb_num",nmax); + MemKK::realloc_kokkos(d_hb_list,"reaxff/kk:hb_list",nmax*maxhb); } - d_bo_first = typename AT::t_int_1d("reaxff/kk:bo_first",nmax); - d_bo_num = typename AT::t_int_1d("reaxff/kk:bo_num",nmax); - d_bo_list = typename AT::t_int_1d("reaxff/kk:bo_list",nmax*maxbo); + MemKK::realloc_kokkos(d_bo_first,"reaxff/kk:bo_first",nmax); + MemKK::realloc_kokkos(d_bo_num,"reaxff/kk:bo_num",nmax); + MemKK::realloc_kokkos(d_bo_list,"reaxff/kk:bo_list",nmax*maxbo); - d_BO = typename AT::t_ffloat_2d_dl("reaxff/kk:BO",nmax,maxbo); - d_BO_s = typename AT::t_ffloat_2d_dl("reaxff/kk:BO",nmax,maxbo); - d_BO_pi = typename AT::t_ffloat_2d_dl("reaxff/kk:BO_pi",nmax,maxbo); - d_BO_pi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:BO_pi2",nmax,maxbo); + MemKK::realloc_kokkos(d_BO,"reaxff/kk:BO",nmax,maxbo); + MemKK::realloc_kokkos(d_BO_s,"reaxff/kk:BO",nmax,maxbo); + MemKK::realloc_kokkos(d_BO_pi,"reaxff/kk:BO_pi",nmax,maxbo); + MemKK::realloc_kokkos(d_BO_pi2,"reaxff/kk:BO_pi2",nmax,maxbo); - d_dln_BOp_pi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_dln_BOp_pi",nmax,maxbo); - d_dln_BOp_pi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_dln_BOp_pi2",nmax,maxbo); + MemKK::realloc_kokkos(d_dln_BOp_pi,"reaxff/kk:d_dln_BOp_pi",nmax,maxbo); + MemKK::realloc_kokkos(d_dln_BOp_pi2,"reaxff/kk:d_dln_BOp_pi2",nmax,maxbo); - d_C1dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbo",nmax,maxbo); - d_C2dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbo",nmax,maxbo); - d_C3dbo = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbo",nmax,maxbo); + MemKK::realloc_kokkos(d_C1dbo,"reaxff/kk:d_C1dbo",nmax,maxbo); + MemKK::realloc_kokkos(d_C2dbo,"reaxff/kk:d_C2dbo",nmax,maxbo); + MemKK::realloc_kokkos(d_C3dbo,"reaxff/kk:d_C3dbo",nmax,maxbo); - d_C1dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbopi",nmax,maxbo); - d_C2dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbopi",nmax,maxbo); - d_C3dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbopi",nmax,maxbo); - d_C4dbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C4dbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_C1dbopi,"reaxff/kk:d_C1dbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_C2dbopi,"reaxff/kk:d_C2dbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_C3dbopi,"reaxff/kk:d_C3dbopi",nmax,maxbo); + MemKK::realloc_kokkos(d_C4dbopi,"reaxff/kk:d_C4dbopi",nmax,maxbo); - d_C1dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C1dbopi2",nmax,maxbo); - d_C2dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C2dbopi2",nmax,maxbo); - d_C3dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C3dbopi2",nmax,maxbo); - d_C4dbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:d_C4dbopi2",nmax,maxbo); + MemKK::realloc_kokkos(d_C1dbopi2,"reaxff/kk:d_C1dbopi2",nmax,maxbo); + MemKK::realloc_kokkos(d_C2dbopi2,"reaxff/kk:d_C2dbopi2",nmax,maxbo); + MemKK::realloc_kokkos(d_C3dbopi2,"reaxff/kk:d_C3dbopi2",nmax,maxbo); + MemKK::realloc_kokkos(d_C4dbopi2,"reaxff/kk:d_C4dbopi2",nmax,maxbo); - d_dBOp = typename AT::t_ffloat_2d_dl("reaxff/kk:dBOp",nmax,maxbo); + MemKK::realloc_kokkos(d_dBOp,"reaxff/kk:dBOp",nmax,maxbo); - d_dDeltap_self = typename AT::t_ffloat_2d_dl("reaxff/kk:dDeltap_self",nmax,3); - d_Deltap_boc = typename AT::t_ffloat_1d("reaxff/kk:Deltap_boc",nmax); - d_Deltap = typename AT::t_ffloat_1d("reaxff/kk:Deltap",nmax); - d_total_bo = typename AT::t_ffloat_1d("reaxff/kk:total_bo",nmax); + MemKK::realloc_kokkos(d_dDeltap_self,"reaxff/kk:dDeltap_self",nmax,3); + MemKK::realloc_kokkos(d_Deltap_boc,"reaxff/kk:Deltap_boc",nmax); + MemKK::realloc_kokkos(d_Deltap,"reaxff/kk:Deltap",nmax); + MemKK::realloc_kokkos(d_total_bo,"reaxff/kk:total_bo",nmax); - d_Cdbo = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbo",nmax,3*maxbo); - d_Cdbopi = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbopi",nmax,3*maxbo); - d_Cdbopi2 = typename AT::t_ffloat_2d_dl("reaxff/kk:Cdbopi2",nmax,3*maxbo); + MemKK::realloc_kokkos(d_Cdbo,"reaxff/kk:Cdbo",nmax,3*maxbo); + MemKK::realloc_kokkos(d_Cdbopi,"reaxff/kk:Cdbopi",nmax,3*maxbo); + MemKK::realloc_kokkos(d_Cdbopi2,"reaxff/kk:Cdbopi2",nmax,3*maxbo); - d_Delta = typename AT::t_ffloat_1d("reaxff/kk:Delta",nmax); - d_Delta_boc = typename AT::t_ffloat_1d("reaxff/kk:Delta_boc",nmax); - d_dDelta_lp = typename AT::t_ffloat_1d("reaxff/kk:dDelta_lp",nmax); - d_Delta_lp = typename AT::t_ffloat_1d("reaxff/kk:Delta_lp",nmax); - d_Delta_lp_temp = typename AT::t_ffloat_1d("reaxff/kk:Delta_lp_temp",nmax); - d_CdDelta = typename AT::t_ffloat_1d("reaxff/kk:CdDelta",nmax); - d_sum_ovun = typename AT::t_ffloat_2d_dl("reaxff/kk:sum_ovun",nmax,3); + MemKK::realloc_kokkos(d_Delta,"reaxff/kk:Delta",nmax); + MemKK::realloc_kokkos(d_Delta_boc,"reaxff/kk:Delta_boc",nmax); + MemKK::realloc_kokkos(d_dDelta_lp,"reaxff/kk:dDelta_lp",nmax); + MemKK::realloc_kokkos(d_Delta_lp,"reaxff/kk:Delta_lp",nmax); + MemKK::realloc_kokkos(d_Delta_lp_temp,"reaxff/kk:Delta_lp_temp",nmax); + MemKK::realloc_kokkos(d_CdDelta,"reaxff/kk:CdDelta",nmax); + MemKK::realloc_kokkos(d_sum_ovun,"reaxff/kk:sum_ovun",nmax,3); // FixReaxFFBonds - d_abo = typename AT::t_ffloat_2d("reaxff/kk:abo",nmax,maxbo); - d_neighid = typename AT::t_tagint_2d("reaxff/kk:neighid",nmax,maxbo); - d_numneigh_bonds = typename AT::t_int_1d("reaxff/kk:numneigh_bonds",nmax); + MemKK::realloc_kokkos(d_abo,"reaxff/kk:abo",nmax,maxbo); + MemKK::realloc_kokkos(d_neighid,"reaxff/kk:neighid",nmax,maxbo); + MemKK::realloc_kokkos(d_numneigh_bonds,"reaxff/kk:numneigh_bonds",nmax); // ComputeAngular intermediates - d_angular_intermediates = typename AT::t_ffloat_2d("reaxff/kk:angular_intermediates",nmax,4); -} - -/* ---------------------------------------------------------------------- */ - -template -void PairReaxFFKokkos::deallocate_array() -{ - if (cut_hbsq > 0.0) { - d_hb_first = typename AT::t_int_1d(); - d_hb_num = typename AT::t_int_1d(); - d_hb_list = typename AT::t_int_1d(); - } - d_bo_first = typename AT::t_int_1d(); - d_bo_num = typename AT::t_int_1d(); - d_bo_list = typename AT::t_int_1d(); - - d_BO = typename AT::t_ffloat_2d_dl(); - d_BO_s = typename AT::t_ffloat_2d_dl(); - d_BO_pi = typename AT::t_ffloat_2d_dl(); - d_BO_pi2 = typename AT::t_ffloat_2d_dl(); - - d_dln_BOp_pi = typename AT::t_ffloat_2d_dl(); - d_dln_BOp_pi2 = typename AT::t_ffloat_2d_dl(); - - d_C1dbo = typename AT::t_ffloat_2d_dl(); - d_C2dbo = typename AT::t_ffloat_2d_dl(); - d_C3dbo = typename AT::t_ffloat_2d_dl(); - - d_C1dbopi = typename AT::t_ffloat_2d_dl(); - d_C2dbopi = typename AT::t_ffloat_2d_dl(); - d_C3dbopi = typename AT::t_ffloat_2d_dl(); - d_C4dbopi = typename AT::t_ffloat_2d_dl(); - - d_C1dbopi2 = typename AT::t_ffloat_2d_dl(); - d_C2dbopi2 = typename AT::t_ffloat_2d_dl(); - d_C3dbopi2 = typename AT::t_ffloat_2d_dl(); - d_C4dbopi2 = typename AT::t_ffloat_2d_dl(); - - d_dBOp = typename AT::t_ffloat_2d_dl(); - - d_dDeltap_self = typename AT::t_ffloat_2d_dl(); - d_Deltap_boc = typename AT::t_ffloat_1d(); - d_Deltap = typename AT::t_ffloat_1d(); - d_total_bo = typename AT::t_ffloat_1d(); - - d_Cdbo = typename AT::t_ffloat_2d_dl(); - d_Cdbopi = typename AT::t_ffloat_2d_dl(); - d_Cdbopi2 = typename AT::t_ffloat_2d_dl(); - - d_Delta = typename AT::t_ffloat_1d(); - d_Delta_boc = typename AT::t_ffloat_1d(); - d_dDelta_lp = typename AT::t_ffloat_1d(); - d_Delta_lp = typename AT::t_ffloat_1d(); - d_Delta_lp_temp = typename AT::t_ffloat_1d(); - d_CdDelta = typename AT::t_ffloat_1d(); - d_sum_ovun = typename AT::t_ffloat_2d_dl(); - - // FixReaxFFBonds - d_abo = typename AT::t_ffloat_2d(); - d_neighid = typename AT::t_tagint_2d(); - d_numneigh_bonds = typename AT::t_int_1d(); - - // ComputeAngular intermediates - d_angular_intermediates = typename AT::t_ffloat_2d(); + MemKK::realloc_kokkos(d_angular_intermediates,"reaxff/kk:angular_intermediates",nmax,4); } /* ---------------------------------------------------------------------- */ @@ -3549,9 +3469,6 @@ void PairReaxFFKokkos::operator()(TagPairReaxUpdateBond, Kokkos::View::value>> a_Cdbo = d_Cdbo; Kokkos::View::value>> a_Cdbopi = d_Cdbopi; Kokkos::View::value>> a_Cdbopi2 = d_Cdbopi2; - //auto a_Cdbo = dup_Cdbo.template access>(); - //auto a_Cdbopi = dup_Cdbopi.template access>(); - //auto a_Cdbopi2 = dup_Cdbopi2.template access>(); const int i = d_ilist[ii]; const tagint itag = tag(i); diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h index 64d9ced875..39b323a0fe 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.h +++ b/src/KOKKOS/pair_reaxff_kokkos.h @@ -384,7 +384,6 @@ class PairReaxFFKokkos : public PairReaxFF { protected: void allocate(); void allocate_array(); - void deallocate_array(); void setup(); void init_md(); int Init_Lookup_Tables(); @@ -436,6 +435,8 @@ class PairReaxFFKokkos : public PairReaxFF { typename AT::t_ffloat_2d_dl d_C1dbopi2, d_C2dbopi2, d_C3dbopi2, d_C4dbopi2; typename AT::t_ffloat_2d_dl d_Cdbo, d_Cdbopi, d_Cdbopi2, d_dDeltap_self; + int need_dup; + using KKDeviceType = typename KKDevice::value; template @@ -444,27 +445,19 @@ class PairReaxFFKokkos : public PairReaxFF { template using NonDupScatterView = KKScatterView; - DupScatterView dup_total_bo; - DupScatterView dup_CdDelta; - DupScatterView dup_eatom; DupScatterView dup_f; + DupScatterView dup_eatom; DupScatterView dup_vatom; DupScatterView dup_dDeltap_self; - DupScatterView dup_Cdbo; - DupScatterView dup_Cdbopi; - DupScatterView dup_Cdbopi2; + DupScatterView dup_total_bo; + DupScatterView dup_CdDelta; - NonDupScatterView ndup_total_bo; - NonDupScatterView ndup_CdDelta; - NonDupScatterView ndup_eatom; NonDupScatterView ndup_f; + NonDupScatterView ndup_eatom; NonDupScatterView ndup_vatom; NonDupScatterView ndup_dDeltap_self; - NonDupScatterView ndup_Cdbo; - NonDupScatterView ndup_Cdbopi; - NonDupScatterView ndup_Cdbopi2; - - int need_dup; + NonDupScatterView ndup_total_bo; + NonDupScatterView ndup_CdDelta; typedef Kokkos::DualView tdual_ffloat_2d_n7; typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; diff --git a/src/KOKKOS/pair_snap_kokkos.h b/src/KOKKOS/pair_snap_kokkos.h index 2cd0003478..2937359822 100644 --- a/src/KOKKOS/pair_snap_kokkos.h +++ b/src/KOKKOS/pair_snap_kokkos.h @@ -238,44 +238,14 @@ class PairSNAPKokkos : public PairSNAP { typename AT::t_efloat_1d d_eatom; typename AT::t_virial_array d_vatom; - typedef Kokkos::View t_bvec; - t_bvec bvec; - typedef Kokkos::View t_dbvec; - t_dbvec dbvec; SNAKokkos snaKK; int inum,max_neighs,chunk_size,chunk_offset; - int host_flag; + int host_flag,neighflag; int eflag,vflag; void allocate() override; - //void read_files(char *, char *); - /*template - inline int equal(double* x,double* y); - template - inline double dist2(double* x,double* y); - double extra_cutoff(); - void load_balance(); - void set_sna_to_shared(int snaid,int i); - void build_per_atom_arrays();*/ - - int neighflag; - - Kokkos::View ilistmast; - Kokkos::View ghostilist; - Kokkos::View ghostnumneigh; - Kokkos::View ghostneighs; - Kokkos::View ghostfirstneigh; - - Kokkos::View i_pairs; - Kokkos::View i_rij; - Kokkos::View i_inside; - Kokkos::View i_wj; - Kokkos::Viewi_rcutij; - Kokkos::View i_ninside; - Kokkos::View i_uarraytot_r, i_uarraytot_i; - Kokkos::View i_zarray_r, i_zarray_i; Kokkos::View d_radelem; // element radii Kokkos::View d_wjelem; // elements weights @@ -286,7 +256,6 @@ class PairSNAPKokkos : public PairSNAP { Kokkos::View d_ninside; // ninside for all atoms in list Kokkos::View d_beta; // betas for all atoms in list Kokkos::View d_beta_pack; // betas for all atoms in list, GPU - Kokkos::View d_bispectrum; // bispectrum components for all atoms in list typedef Kokkos::DualView tdual_fparams; tdual_fparams k_cutsq; diff --git a/src/KOKKOS/pair_snap_kokkos_impl.h b/src/KOKKOS/pair_snap_kokkos_impl.h index c2657f8534..0656a1f9ee 100644 --- a/src/KOKKOS/pair_snap_kokkos_impl.h +++ b/src/KOKKOS/pair_snap_kokkos_impl.h @@ -206,10 +206,10 @@ void PairSNAPKokkos::compute(int eflag_in, if (beta_max < inum) { beta_max = inum; - d_beta = Kokkos::View("PairSNAPKokkos:beta",ncoeff,inum); + MemKK::realloc_kokkos(d_beta,"PairSNAPKokkos:beta",ncoeff,inum); if (!host_flag) - d_beta_pack = Kokkos::View("PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length); - d_ninside = Kokkos::View("PairSNAPKokkos:ninside",inum); + MemKK::realloc_kokkos(d_beta_pack,"PairSNAPKokkos:beta_pack",vector_length,ncoeff,(inum + vector_length - 1) / vector_length); + MemKK::realloc_kokkos(d_ninside,"PairSNAPKokkos:ninside",inum); } chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user @@ -545,7 +545,7 @@ void PairSNAPKokkos::allocate() PairSNAP::allocate(); int n = atom->ntypes; - d_map = Kokkos::View("PairSNAPKokkos::map",n+1); + MemKK::realloc_kokkos(d_map,"PairSNAPKokkos::map",n+1); } @@ -574,11 +574,11 @@ void PairSNAPKokkos::coeff(int narg, char // Set up element lists - d_radelem = Kokkos::View("pair:radelem",nelements); - d_wjelem = Kokkos::View("pair:wjelem",nelements); - d_coeffelem = Kokkos::View("pair:coeffelem",nelements,ncoeffall); - d_sinnerelem = Kokkos::View("pair:sinnerelem",nelements); - d_dinnerelem = Kokkos::View("pair:dinnerelem",nelements); + MemKK::realloc_kokkos(d_radelem,"pair:radelem",nelements); + MemKK::realloc_kokkos(d_wjelem,"pair:wjelem",nelements); + MemKK::realloc_kokkos(d_coeffelem,"pair:coeffelem",nelements,ncoeffall); + MemKK::realloc_kokkos(d_sinnerelem,"pair:sinnerelem",nelements); + MemKK::realloc_kokkos(d_dinnerelem,"pair:dinnerelem",nelements); auto h_radelem = Kokkos::create_mirror_view(d_radelem); auto h_wjelem = Kokkos::create_mirror_view(d_wjelem); @@ -1411,12 +1411,16 @@ template double PairSNAPKokkos::memory_usage() { double bytes = Pair::memory_usage(); - int n = atom->ntypes+1; - bytes += n*n*sizeof(int); - bytes += n*n*sizeof(real_type); - bytes += (2*ncoeffall)*sizeof(real_type); - bytes += (ncoeff*3)*sizeof(real_type); - bytes += snaKK.memory_usage(); + bytes += MemKK::memory_usage(d_beta); + if (!host_flag) + bytes += MemKK::memory_usage(d_beta_pack); + bytes += MemKK::memory_usage(d_ninside); + bytes += MemKK::memory_usage(d_map); + bytes += MemKK::memory_usage(d_radelem); + bytes += MemKK::memory_usage(d_wjelem); + bytes += MemKK::memory_usage(d_coeffelem); + bytes += MemKK::memory_usage(d_sinnerelem); + bytes += MemKK::memory_usage(d_dinnerelem); return bytes; } diff --git a/src/KOKKOS/sna_kokkos_impl.h b/src/KOKKOS/sna_kokkos_impl.h index 77130b9781..8d81a4a65b 100644 --- a/src/KOKKOS/sna_kokkos_impl.h +++ b/src/KOKKOS/sna_kokkos_impl.h @@ -17,6 +17,7 @@ ------------------------------------------------------------------------- */ #include "sna_kokkos.h" +#include "memory_kokkos.h" #include #include #include @@ -62,12 +63,12 @@ SNAKokkos::SNAKokkos(real_type rfac0_in, build_indexlist(); int jdimpq = twojmax + 2; - rootpqarray = t_sna_2d("SNAKokkos::rootpqarray",jdimpq,jdimpq); + MemKK::realloc_kokkos(rootpqarray,"SNAKokkos::rootpqarray",jdimpq,jdimpq); - cglist = t_sna_1d("SNAKokkos::cglist",idxcg_max); + MemKK::realloc_kokkos(cglist,"SNAKokkos::cglist",idxcg_max); if (bzero_flag) { - bzero = Kokkos::View("sna:bzero",twojmax+1); + MemKK::realloc_kokkos(bzero,"sna:bzero",twojmax+1); auto h_bzero = Kokkos::create_mirror_view(bzero); double www = wself*wself*wself; @@ -95,7 +96,7 @@ void SNAKokkos::build_indexlist() // index list for cglist int jdim = twojmax + 1; - idxcg_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxcg_block"),jdim,jdim,jdim); + MemKK::realloc_kokkos(idxcg_block,"SNAKokkos::idxcg_block",jdim,jdim,jdim); auto h_idxcg_block = Kokkos::create_mirror_view(idxcg_block); int idxcg_count = 0; @@ -113,7 +114,7 @@ void SNAKokkos::build_indexlist() // index list for uarray // need to include both halves - idxu_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxu_block"),jdim); + MemKK::realloc_kokkos(idxu_block,"SNAKokkos::idxu_block",jdim); auto h_idxu_block = Kokkos::create_mirror_view(idxu_block); int idxu_count = 0; @@ -128,7 +129,7 @@ void SNAKokkos::build_indexlist() Kokkos::deep_copy(idxu_block,h_idxu_block); // index list for half uarray - idxu_half_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxu_half_block"),jdim); + MemKK::realloc_kokkos(idxu_half_block,"SNAKokkos::idxu_half_block",jdim); auto h_idxu_half_block = Kokkos::create_mirror_view(idxu_half_block); int idxu_half_count = 0; @@ -142,7 +143,7 @@ void SNAKokkos::build_indexlist() Kokkos::deep_copy(idxu_half_block, h_idxu_half_block); // mapping between full and half indexing, encoding flipping - idxu_full_half = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxu_full_half"),idxu_max); + MemKK::realloc_kokkos(idxu_full_half,"SNAKokkos::idxu_full_half",idxu_max); auto h_idxu_full_half = Kokkos::create_mirror_view(idxu_full_half); idxu_count = 0; @@ -169,7 +170,7 @@ void SNAKokkos::build_indexlist() // index list for "cache" uarray // this is the GPU scratch memory requirements // applied the CPU structures - idxu_cache_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxu_cache_block"),jdim); + MemKK::realloc_kokkos(idxu_cache_block,"SNAKokkos::idxu_cache_block",jdim); auto h_idxu_cache_block = Kokkos::create_mirror_view(idxu_cache_block); int idxu_cache_count = 0; @@ -191,7 +192,7 @@ void SNAKokkos::build_indexlist() if (j >= j1) idxb_count++; idxb_max = idxb_count; - idxb = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxb"),idxb_max); + MemKK::realloc_kokkos(idxb,"SNAKokkos::idxb",idxb_max); auto h_idxb = Kokkos::create_mirror_view(idxb); idxb_count = 0; @@ -208,7 +209,7 @@ void SNAKokkos::build_indexlist() // reverse index list for beta and b - idxb_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxb_block"),jdim,jdim,jdim); + MemKK::realloc_kokkos(idxb_block,"SNAKokkos::idxb_block",jdim,jdim,jdim); auto h_idxb_block = Kokkos::create_mirror_view(idxb_block); idxb_count = 0; @@ -234,10 +235,10 @@ void SNAKokkos::build_indexlist() idxz_count++; idxz_max = idxz_count; - idxz = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxz"),idxz_max); + MemKK::realloc_kokkos(idxz,"SNAKokkos::idxz",idxz_max); auto h_idxz = Kokkos::create_mirror_view(idxz); - idxz_block = Kokkos::View(Kokkos::NoInit("SNAKokkos::idxz_block"), jdim,jdim,jdim); + MemKK::realloc_kokkos(idxz_block,"SNAKokkos::idxz_block", jdim,jdim,jdim); auto h_idxz_block = Kokkos::create_mirror_view(idxz_block); idxz_count = 0; @@ -294,59 +295,59 @@ void SNAKokkos::grow_rij(int newnatom, int natom = newnatom; nmax = newnmax; - rij = t_sna_3d(Kokkos::NoInit("sna:rij"),natom,nmax,3); - wj = t_sna_2d(Kokkos::NoInit("sna:wj"),natom,nmax); - rcutij = t_sna_2d(Kokkos::NoInit("sna:rcutij"),natom,nmax); - sinnerij = t_sna_2d(Kokkos::NoInit("sna:sinnerij"),natom,nmax); - dinnerij = t_sna_2d(Kokkos::NoInit("sna:dinnerij"),natom,nmax); - inside = t_sna_2i(Kokkos::NoInit("sna:inside"),natom,nmax); - element = t_sna_2i(Kokkos::NoInit("sna:element"),natom,nmax); - dedr = t_sna_3d(Kokkos::NoInit("sna:dedr"),natom,nmax,3); + MemKK::realloc_kokkos(rij,"sna:rij",natom,nmax,3); + MemKK::realloc_kokkos(wj,"sna:wj",natom,nmax); + MemKK::realloc_kokkos(rcutij,"sna:rcutij",natom,nmax); + MemKK::realloc_kokkos(sinnerij,"sna:sinnerij",natom,nmax); + MemKK::realloc_kokkos(dinnerij,"sna:dinnerij",natom,nmax); + MemKK::realloc_kokkos(inside,"sna:inside",natom,nmax); + MemKK::realloc_kokkos(element,"sna:element",natom,nmax); + MemKK::realloc_kokkos(dedr,"sna:dedr",natom,nmax,3); #ifdef LMP_KOKKOS_GPU if (!host_flag) { const int natom_div = (natom + vector_length - 1) / vector_length; - a_pack = t_sna_3c_ll(Kokkos::NoInit("sna:a_pack"),vector_length,nmax,natom_div); - b_pack = t_sna_3c_ll(Kokkos::NoInit("sna:b_pack"),vector_length,nmax,natom_div); - da_pack = t_sna_4c_ll(Kokkos::NoInit("sna:da_pack"),vector_length,nmax,natom_div,3); - db_pack = t_sna_4c_ll(Kokkos::NoInit("sna:db_pack"),vector_length,nmax,natom_div,3); - sfac_pack = t_sna_4d_ll(Kokkos::NoInit("sna:sfac_pack"),vector_length,nmax,natom_div,4); - ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1); // dummy allocation - ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),1,1,1); - ulisttot_re_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_re_pack"),vector_length,idxu_half_max,nelements,natom_div); - ulisttot_im_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_im_pack"),vector_length,idxu_half_max,nelements,natom_div); - ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),vector_length,idxu_max,nelements,natom_div); - ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),1,1,1); - zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),1,1,1); - zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),vector_length,idxz_max,ndoubles,natom_div); - blist = t_sna_3d(Kokkos::NoInit("sna:blist"),natom,ntriples,idxb_max); - blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),vector_length,idxb_max,ntriples,natom_div); - ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),1,1,1); - ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),vector_length,idxu_half_max,nelements,natom_div); - ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),vector_length,idxu_half_max,nelements,natom_div); - dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),1,1,1); + MemKK::realloc_kokkos(a_pack,"sna:a_pack",vector_length,nmax,natom_div); + MemKK::realloc_kokkos(b_pack,"sna:b_pack",vector_length,nmax,natom_div); + MemKK::realloc_kokkos(da_pack,"sna:da_pack",vector_length,nmax,natom_div,3); + MemKK::realloc_kokkos(db_pack,"sna:db_pack",vector_length,nmax,natom_div,3); + MemKK::realloc_kokkos(sfac_pack,"sna:sfac_pack",vector_length,nmax,natom_div,4); + MemKK::realloc_kokkos(ulisttot,"sna:ulisttot",1,1,1); // dummy allocation + MemKK::realloc_kokkos(ulisttot_full,"sna:ulisttot",1,1,1); + MemKK::realloc_kokkos(ulisttot_re_pack,"sna:ulisttot_re_pack",vector_length,idxu_half_max,nelements,natom_div); + MemKK::realloc_kokkos(ulisttot_im_pack,"sna:ulisttot_im_pack",vector_length,idxu_half_max,nelements,natom_div); + MemKK::realloc_kokkos(ulisttot_pack,"sna:ulisttot_pack",vector_length,idxu_max,nelements,natom_div); + MemKK::realloc_kokkos(ulist,"sna:ulist",1,1,1); + MemKK::realloc_kokkos(zlist,"sna:zlist",1,1,1); + MemKK::realloc_kokkos(zlist_pack,"sna:zlist_pack",vector_length,idxz_max,ndoubles,natom_div); + MemKK::realloc_kokkos(blist,"sna:blist",natom,ntriples,idxb_max); + MemKK::realloc_kokkos(blist_pack,"sna:blist_pack",vector_length,idxb_max,ntriples,natom_div); + MemKK::realloc_kokkos(ylist,"sna:ylist",1,1,1); + MemKK::realloc_kokkos(ylist_pack_re,"sna:ylist_pack_re",vector_length,idxu_half_max,nelements,natom_div); + MemKK::realloc_kokkos(ylist_pack_im,"sna:ylist_pack_im",vector_length,idxu_half_max,nelements,natom_div); + MemKK::realloc_kokkos(dulist,"sna:dulist",1,1,1); } else { #endif - a_pack = t_sna_3c_ll(Kokkos::NoInit("sna:a_pack"),1,1,1); - b_pack = t_sna_3c_ll(Kokkos::NoInit("sna:b_pack"),1,1,1); - da_pack = t_sna_4c_ll(Kokkos::NoInit("sna:da_pack"),1,1,1,1); - db_pack = t_sna_4c_ll(Kokkos::NoInit("sna:db_pack"),1,1,1,1); - sfac_pack = t_sna_4d_ll(Kokkos::NoInit("sna:sfac_pack"),1,1,1,1); - ulisttot = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot"),idxu_half_max,nelements,natom); - ulisttot_full = t_sna_3c_ll(Kokkos::NoInit("sna:ulisttot_full"),idxu_max,nelements,natom); - ulisttot_re_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_re"),1,1,1,1); - ulisttot_im_pack = t_sna_4d_ll(Kokkos::NoInit("sna:ulisttot_im"),1,1,1,1); - ulisttot_pack = t_sna_4c_ll(Kokkos::NoInit("sna:ulisttot_pack"),1,1,1,1); - ulist = t_sna_3c_ll(Kokkos::NoInit("sna:ulist"),idxu_cache_max,natom,nmax); - zlist = t_sna_3c_ll(Kokkos::NoInit("sna:zlist"),idxz_max,ndoubles,natom); - zlist_pack = t_sna_4c_ll(Kokkos::NoInit("sna:zlist_pack"),1,1,1,1); - blist = t_sna_3d(Kokkos::NoInit("sna:blist"),natom,ntriples,idxb_max); - blist_pack = t_sna_4d_ll(Kokkos::NoInit("sna:blist_pack"),1,1,1,1); - ylist = t_sna_3c_ll(Kokkos::NoInit("sna:ylist"),idxu_half_max,nelements,natom); - ylist_pack_re = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_re"),1,1,1,1); - ylist_pack_im = t_sna_4d_ll(Kokkos::NoInit("sna:ylist_pack_im"),1,1,1,1); - dulist = t_sna_4c3_ll(Kokkos::NoInit("sna:dulist"),idxu_cache_max,natom,nmax); + MemKK::realloc_kokkos(a_pack,"sna:a_pack",1,1,1); + MemKK::realloc_kokkos(b_pack,"sna:b_pack",1,1,1); + MemKK::realloc_kokkos(da_pack,"sna:da_pack",1,1,1,1); + MemKK::realloc_kokkos(db_pack,"sna:db_pack",1,1,1,1); + MemKK::realloc_kokkos(sfac_pack,"sna:sfac_pack",1,1,1,1); + MemKK::realloc_kokkos(ulisttot,"sna:ulisttot",idxu_half_max,nelements,natom); + MemKK::realloc_kokkos(ulisttot_full,"sna:ulisttot_full",idxu_max,nelements,natom); + MemKK::realloc_kokkos(ulisttot_re_pack,"sna:ulisttot_re",1,1,1,1); + MemKK::realloc_kokkos(ulisttot_im_pack,"sna:ulisttot_im",1,1,1,1); + MemKK::realloc_kokkos(ulisttot_pack,"sna:ulisttot_pack",1,1,1,1); + MemKK::realloc_kokkos(ulist,"sna:ulist",idxu_cache_max,natom,nmax); + MemKK::realloc_kokkos(zlist,"sna:zlist",idxz_max,ndoubles,natom); + MemKK::realloc_kokkos(zlist_pack,"sna:zlist_pack",1,1,1,1); + MemKK::realloc_kokkos(blist,"sna:blist",natom,ntriples,idxb_max); + MemKK::realloc_kokkos(blist_pack,"sna:blist_pack",1,1,1,1); + MemKK::realloc_kokkos(ylist,"sna:ylist",idxu_half_max,nelements,natom); + MemKK::realloc_kokkos(ylist_pack_re,"sna:ylist_pack_re",1,1,1,1); + MemKK::realloc_kokkos(ylist_pack_im,"sna:ylist_pack_im",1,1,1,1); + MemKK::realloc_kokkos(dulist,"sna:dulist",idxu_cache_max,natom,nmax); #ifdef LMP_KOKKOS_GPU } @@ -2356,74 +2357,68 @@ void SNAKokkos::compute_s_dsfac(const real template double SNAKokkos::memory_usage() { - int jdimpq = twojmax + 2; - int jdim = twojmax + 1; - double bytes; + double bytes = 0; - bytes = 0; - - bytes += jdimpq*jdimpq * sizeof(real_type); // pqarray - bytes += idxcg_max * sizeof(real_type); // cglist + bytes += MemKK::memory_usage(rootpqarray); + bytes += MemKK::memory_usage(cglist); #ifdef LMP_KOKKOS_GPU if (!host_flag) { - auto natom_pad = (natom+vector_length-1)/vector_length; - - bytes += natom_pad * nmax * sizeof(real_type) * 2; // a_pack - bytes += natom_pad * nmax * sizeof(real_type) * 2; // b_pack - bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // da_pack - bytes += natom_pad * nmax * 3 * sizeof(real_type) * 2; // db_pack - bytes += natom_pad * nmax * 4 * sizeof(real_type); // sfac_pack + bytes += MemKK::memory_usage(a_pack); + bytes += MemKK::memory_usage(b_pack); + bytes += MemKK::memory_usage(da_pack); + bytes += MemKK::memory_usage(db_pack); + bytes += MemKK::memory_usage(sfac_pack); - bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_re_pack - bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ulisttot_im_pack - bytes += natom_pad * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_pack + bytes += MemKK::memory_usage(ulisttot_re_pack); + bytes += MemKK::memory_usage(ulisttot_im_pack); + bytes += MemKK::memory_usage(ulisttot_pack); - bytes += natom_pad * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist_pack - bytes += natom_pad * idxb_max * ntriples * sizeof(real_type); // blist_pack + bytes += MemKK::memory_usage(zlist_pack); + bytes += MemKK::memory_usage(blist_pack); - bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_re - bytes += natom_pad * idxu_half_max * nelements * sizeof(real_type); // ylist_pack_im + bytes += MemKK::memory_usage(ylist_pack_re); + bytes += MemKK::memory_usage(ylist_pack_im); } else { #endif - bytes += natom * nmax * idxu_cache_max * sizeof(real_type) * 2; // ulist - bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ulisttot - bytes += natom * idxu_max * nelements * sizeof(real_type) * 2; // ulisttot_full + bytes += MemKK::memory_usage(ulist); + bytes += MemKK::memory_usage(ulisttot); + bytes += MemKK::memory_usage(ulisttot_full); - bytes += natom * idxz_max * ndoubles * sizeof(real_type) * 2; // zlist - bytes += natom * idxb_max * ntriples * sizeof(real_type); // blist + bytes += MemKK::memory_usage(zlist); + bytes += MemKK::memory_usage(blist); - bytes += natom * idxu_half_max * nelements * sizeof(real_type) * 2; // ylist + bytes += MemKK::memory_usage(ylist); - bytes += natom * nmax * idxu_cache_max * 3 * sizeof(real_type) * 2; // dulist + bytes += MemKK::memory_usage(dulist); #ifdef LMP_KOKKOS_GPU } #endif - bytes += natom * nmax * 3 * sizeof(real_type); // dedr + bytes += MemKK::memory_usage(dedr); - bytes += jdim * jdim * jdim * sizeof(int); // idxcg_block - bytes += jdim * sizeof(int); // idxu_block - bytes += jdim * sizeof(int); // idxu_half_block - bytes += idxu_max * sizeof(FullHalfMapper); // idxu_full_half - bytes += jdim * sizeof(int); // idxu_cache_block - bytes += jdim * jdim * jdim * sizeof(int); // idxz_block - bytes += jdim * jdim * jdim * sizeof(int); // idxb_block + bytes += MemKK::memory_usage(idxcg_block); + bytes += MemKK::memory_usage(idxu_block); + bytes += MemKK::memory_usage(idxu_half_block); + bytes += MemKK::memory_usage(idxu_full_half); + bytes += MemKK::memory_usage(idxu_cache_block); + bytes += MemKK::memory_usage(idxz_block); + bytes += MemKK::memory_usage(idxb_block); - bytes += idxz_max * 10 * sizeof(int); // idxz - bytes += idxb_max * 3 * sizeof(int); // idxb + bytes += MemKK::memory_usage(idxz); + bytes += MemKK::memory_usage(idxb); - bytes += jdim * sizeof(real_type); // bzero + bytes += MemKK::memory_usage(bzero); - bytes += natom * nmax * 3 * sizeof(real_type); // rij - bytes += natom * nmax * sizeof(real_type); // inside - bytes += natom * nmax * sizeof(real_type); // wj - bytes += natom * nmax * sizeof(real_type); // rcutij - bytes += natom * nmax * sizeof(real_type); // sinnerij - bytes += natom * nmax * sizeof(real_type); // dinnerij + bytes += MemKK::memory_usage(rij); + bytes += MemKK::memory_usage(inside); + bytes += MemKK::memory_usage(wj); + bytes += MemKK::memory_usage(rcutij); + bytes += MemKK::memory_usage(sinnerij); + bytes += MemKK::memory_usage(dinnerij); return bytes; } diff --git a/src/KSPACE/pppm_disp.h b/src/KSPACE/pppm_disp.h index 87265bee2f..1f254e772d 100644 --- a/src/KSPACE/pppm_disp.h +++ b/src/KSPACE/pppm_disp.h @@ -320,7 +320,7 @@ class PPPMDisp : public KSpace { void compute_drho1d(const FFT_SCALAR &, const FFT_SCALAR &, const FFT_SCALAR &, int, FFT_SCALAR **, FFT_SCALAR **); void compute_rho_coeff(FFT_SCALAR **, FFT_SCALAR **, int); - void slabcorr(int); + virtual void slabcorr(int); // grid communication diff --git a/src/KSPACE/pppm_disp_tip4p.cpp b/src/KSPACE/pppm_disp_tip4p.cpp index 902b512bc3..9ebc32ad05 100644 --- a/src/KSPACE/pppm_disp_tip4p.cpp +++ b/src/KSPACE/pppm_disp_tip4p.cpp @@ -486,6 +486,85 @@ void PPPMDispTIP4P::fieldforce_c_peratom() } } +/* ---------------------------------------------------------------------- + Fix handling of TIP4P dipole compared to PPPMDisp::slabcorr +------------------------------------------------------------------------- */ + +#define SMALL 0.00001 + +void PPPMDispTIP4P::slabcorr(int /*eflag*/) +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + int *type = atom->type; + double *xi, xM[3]; int iH1, iH2; //for TIP4P virtual site + + // sum local contributions to get global dipole moment + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + dipole += q[i]*xi[2]; + } + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy_1 += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) { + double fzi_corr = ffact * q[i]*(dipole_all - qsum*x[i][2]); + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + f[i][2] += fzi_corr*(1 - alpha); + f[iH1][2] += 0.5*alpha*fzi_corr; + f[iH2][2] += 0.5*alpha*fzi_corr; + } + else f[i][2] += fzi_corr; + } +} + /* ---------------------------------------------------------------------- find 2 H atoms bonded to O atom i compute position xM of fictitious charge site for O atom diff --git a/src/KSPACE/pppm_disp_tip4p.h b/src/KSPACE/pppm_disp_tip4p.h index a432a7eeeb..e9d5babe0f 100644 --- a/src/KSPACE/pppm_disp_tip4p.h +++ b/src/KSPACE/pppm_disp_tip4p.h @@ -37,6 +37,7 @@ class PPPMDispTIP4P : public PPPMDisp { void fieldforce_c_ik() override; void fieldforce_c_ad() override; void fieldforce_c_peratom() override; + void slabcorr(int) override; private: void find_M(int, int &, int &, double *); diff --git a/src/KSPACE/pppm_tip4p.cpp b/src/KSPACE/pppm_tip4p.cpp index 1cb7bf462f..e9d72bcefe 100644 --- a/src/KSPACE/pppm_tip4p.cpp +++ b/src/KSPACE/pppm_tip4p.cpp @@ -477,6 +477,86 @@ void PPPMTIP4P::fieldforce_peratom() } } + +/* ---------------------------------------------------------------------- + Fix handling of TIP4P dipole compared to PPPMDisp::slabcorr +------------------------------------------------------------------------- */ + +#define SMALL 0.00001 + +void PPPMTIP4P::slabcorr() +{ + // compute local contribution to global dipole moment + + double *q = atom->q; + double **x = atom->x; + double zprd = domain->zprd; + int nlocal = atom->nlocal; + int *type = atom->type; + double *xi, xM[3]; int iH1, iH2; //for TIP4P virtual site + + // sum local contributions to get global dipole moment + double dipole = 0.0; + for (int i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + dipole += q[i]*xi[2]; + } + + double dipole_all; + MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world); + + // need to make non-neutral systems and/or + // per-atom energy translationally invariant + + double dipole_r2 = 0.0; + if (eflag_atom || fabs(qsum) > SMALL) { + for (int i = 0; i < nlocal; i++) + dipole_r2 += q[i]*x[i][2]*x[i][2]; + + // sum local contributions + + double tmp; + MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world); + dipole_r2 = tmp; + } + + // compute corrections + + const double e_slabcorr = MY_2PI*(dipole_all*dipole_all - + qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume; + const double qscale = force->qqrd2e * scale; + + if (eflag_global) energy_1 += qscale * e_slabcorr; + + // per-atom energy + + if (eflag_atom) { + double efact = qscale * MY_2PI/volume; + for (int i = 0; i < nlocal; i++) + eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 + + qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0); + } + + // add on force corrections + + double ffact = qscale * (-4.0*MY_PI/volume); + double **f = atom->f; + + for (int i = 0; i < nlocal; i++) { + double fzi_corr = ffact * q[i]*(dipole_all - qsum*x[i][2]); + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + f[i][2] += fzi_corr*(1 - alpha); + f[iH1][2] += 0.5*alpha*fzi_corr; + f[iH2][2] += 0.5*alpha*fzi_corr; + } + else f[i][2] += fzi_corr; + } +} + /* ---------------------------------------------------------------------- find 2 H atoms bonded to O atom i compute position xM of fictitious charge site for O atom diff --git a/src/KSPACE/pppm_tip4p.h b/src/KSPACE/pppm_tip4p.h index cf63af0d81..0e6b2cc91b 100644 --- a/src/KSPACE/pppm_tip4p.h +++ b/src/KSPACE/pppm_tip4p.h @@ -35,6 +35,7 @@ class PPPMTIP4P : public PPPM { void fieldforce_ik() override; void fieldforce_ad() override; void fieldforce_peratom() override; + void slabcorr() override; private: void find_M(int, int &, int &, double *); diff --git a/unittest/force-styles/tests/kspace-pppm_tip4p_slab.yaml b/unittest/force-styles/tests/kspace-pppm_tip4p_slab.yaml index 51c5a45941..9b9806df5b 100644 --- a/unittest/force-styles/tests/kspace-pppm_tip4p_slab.yaml +++ b/unittest/force-styles/tests/kspace-pppm_tip4p_slab.yaml @@ -1,7 +1,7 @@ --- -lammps_version: 10 Feb 2021 +lammps_version: 4 May 2022 tags: slow -date_generated: Fri Feb 26 23:09:34 2021 +date_generated: Mon May 23 20:12:38 2022 epsilon: 5e-13 prerequisites: ! | atom full @@ -27,67 +27,67 @@ init_coul: 0 init_stress: ! |2- 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 init_forces: ! |2 - 1 -5.1875059319876482e-01 5.1660852898514097e-02 -5.7568746684221916e-01 - 2 2.1689992534273889e-01 -2.6058788219741147e-01 4.1798750467672274e-01 - 3 -3.2123603964927645e-02 -8.8740124548059782e-03 -1.3836110655816162e-02 - 4 1.5801256389492813e-01 2.1126659375654909e-02 7.8030101326860571e-02 - 5 1.4630529362834138e-01 7.0541031372980123e-02 1.0367534136208822e-01 - 6 4.5271336509733173e-01 4.3434073283904712e-01 1.9672609182540585e-01 - 7 -2.5202832073509374e-01 -4.0972860647817611e-01 -5.1025448731009770e-01 - 8 -2.7733728801934087e-03 -6.6285305944413486e-01 -4.2259182161309783e-01 - 9 8.7284510947996261e-02 3.4644067741232221e-01 5.2649415280028289e-01 - 10 -7.0358238250045405e-02 1.2324438068757468e-01 9.7430764543568638e-02 - 11 -1.0611397012429427e-01 1.6778028608628817e-01 1.0243492986761592e-01 - 12 4.7522165036946828e-01 -4.7569230861934814e-01 -3.7620104501978141e-01 - 13 -1.5084555220571103e-01 1.3477824777242514e-01 1.5530580320476042e-01 - 14 -1.7331931611645846e-01 1.4489217533438961e-01 1.5187450400216165e-01 - 15 -1.4457383479163205e-01 1.0191957555196750e-01 1.2647045003724980e-01 - 16 -4.6380086833284712e-01 5.1725747136642608e-01 1.3944164623658954e+00 - 17 2.5830539911922701e-01 -4.4001348824516739e-01 -1.6368568925682263e+00 - 18 7.1711828509507158e-01 1.5135594083235546e+00 -3.0000511375094279e+00 - 19 -2.6526242862377652e-01 -7.5615232036394919e-01 1.6174668711138471e+00 - 20 -3.6412116833354524e-01 -6.8003985707517800e-01 1.5219834320490606e+00 - 21 2.1913962201245335e-01 2.8436767546323427e-01 -1.5830999741451532e+00 - 22 -1.1342236771135741e-01 -2.1501547367331550e-02 8.1828468024256407e-01 - 23 -1.3904196539005109e-01 -1.8302644136991242e-01 7.6537752144201421e-01 - 24 9.9781913572244155e-02 9.7885535038983940e-01 -1.2668572125502537e+00 - 25 1.3435697022842072e-01 -3.4498868790162351e-01 6.9488200140438949e-01 - 26 -2.4062268840602163e-01 -6.1062269338471520e-01 5.8807795407966368e-01 - 27 -6.0379577180174060e-01 9.0922264678215248e-01 -1.6721200413729438e+00 - 28 4.2997465966368542e-01 -5.0965848067708741e-01 9.0674794598209418e-01 - 29 2.4583990189455251e-01 -4.3624778607752923e-01 7.9388967726077175e-01 + 1 -5.1875059319876493e-01 5.1660852898514124e-02 -5.3139324142026889e-01 + 2 2.1689992534273903e-01 -2.6058788219741147e-01 3.8877216450479823e-01 + 3 -3.2123603964927645e-02 -8.8740124548059712e-03 -1.1951249999562964e-02 + 4 1.5801256389492810e-01 2.1126659375654853e-02 6.9548228373721183e-02 + 5 1.4630529362834138e-01 7.0541031372980081e-02 9.5193468408948823e-02 + 6 4.5271336509733173e-01 4.3434073283904689e-01 1.4866214509094927e-01 + 7 -2.5202832073509379e-01 -4.0972860647817605e-01 -4.6219054057564113e-01 + 8 -2.7733728801934924e-03 -6.6285305944413497e-01 -3.7829759619114767e-01 + 9 8.7284510947996316e-02 3.4644067741232221e-01 4.9727881262835838e-01 + 10 -7.0358238250045405e-02 1.2324438068757469e-01 9.0833752246682456e-02 + 11 -1.0611397012429427e-01 1.6778028608628820e-01 9.3953056914476557e-02 + 12 4.7522165036946817e-01 -4.7569230861934830e-01 -3.5075542616036326e-01 + 13 -1.5084555220571100e-01 1.3477824777242520e-01 1.4682393025162105e-01 + 14 -1.7331931611645843e-01 1.4489217533438964e-01 1.4339263104902228e-01 + 15 -1.4457383479163199e-01 1.0191957555196751e-01 1.1798857708411041e-01 + 16 -4.6380086833284706e-01 5.1725747136642608e-01 1.3463525156314389e+00 + 17 2.5830539911922706e-01 -4.4001348824516717e-01 -1.5887929458337697e+00 + 18 7.1711828509507103e-01 1.5135594083235548e+00 -2.9202084401105424e+00 + 19 -2.6526242862377630e-01 -7.5615232036394919e-01 1.5775455224144039e+00 + 20 -3.6412116833354502e-01 -6.8003985707517844e-01 1.4820620833496179e+00 + 21 2.1913962201245299e-01 2.8436767546323433e-01 -1.3892564908575578e+00 + 22 -1.1342236771135721e-01 -2.1501547367331557e-02 7.2136293859876610e-01 + 23 -1.3904196539005081e-01 -1.8302644136991245e-01 6.6845577979821647e-01 + 24 9.9781913572244044e-02 9.7885535038983906e-01 -1.0730137292626583e+00 + 25 1.3435697022842086e-01 -3.4498868790162335e-01 5.9796025976059175e-01 + 26 -2.4062268840602169e-01 -6.1062269338471475e-01 4.9115621243586605e-01 + 27 -6.0379577180174060e-01 9.0922264678215248e-01 -1.4782765580853483e+00 + 28 4.2997465966368531e-01 -5.0965848067708730e-01 8.0982620433829633e-01 + 29 2.4583990189455257e-01 -4.3624778607752923e-01 6.9696793561697401e-01 run_vdwl: 0 run_coul: 0 run_stress: ! |2- 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 run_forces: ! |2 - 1 -5.1741021308022928e-01 5.2164463970109785e-02 -5.7035795586691229e-01 - 2 2.1552845355081662e-01 -2.6145885390966006e-01 4.1455932829319120e-01 - 3 -3.2103136898117973e-02 -8.8506517835662529e-03 -1.3601661681107857e-02 - 4 1.5813149106431929e-01 2.0998590621170252e-02 7.7085061898295948e-02 - 5 1.4611782148177443e-01 7.0495101424978557e-02 1.0262211863021993e-01 - 6 4.5237682723361744e-01 4.3384953789975256e-01 1.9026464391810138e-01 - 7 -2.5220238048340610e-01 -4.1025291093818367e-01 -5.0478860778438306e-01 - 8 -1.8080584227147985e-03 -6.6262062885438788e-01 -4.1641859631215317e-01 - 9 8.6463014670901714e-02 3.4599477171923032e-01 5.2262452042463015e-01 - 10 -7.0489736534953079e-02 1.2334020200473024e-01 9.6566992716111827e-02 - 11 -1.0626441695286094e-01 1.6812163421601856e-01 1.0149591994495212e-01 - 12 4.7577991930135149e-01 -4.7567916757872036e-01 -3.7271573985070183e-01 - 13 -1.5102820761813157e-01 1.3482444566331483e-01 1.5423678933941967e-01 - 14 -1.7348673211954843e-01 1.4502141845434144e-01 1.5087187633492033e-01 - 15 -1.4462875081155377e-01 1.0177069356565076e-01 1.2517182091704590e-01 - 16 -4.6460162929294996e-01 5.1779385845248271e-01 1.3893654572043623e+00 - 17 2.5891442131187525e-01 -4.3950548671559936e-01 -1.6317550757697421e+00 - 18 7.2168431617151652e-01 1.5193451885286362e+00 -2.9893884981487782e+00 - 19 -2.6652798165736163e-01 -7.5811338734451528e-01 1.6128149463359120e+00 - 20 -3.6685702835539502e-01 -6.8322050954334113e-01 1.5165955023073157e+00 - 21 2.1951624771560643e-01 2.7908195130062186e-01 -1.5761953363577819e+00 - 22 -1.1336951380799791e-01 -1.8808477419676436e-02 8.1504600690354212e-01 - 23 -1.3925079650238253e-01 -1.8099002366078934e-01 7.6197026590425576e-01 - 24 1.0040861588721736e-01 9.7605298874640822e-01 -1.2616161159601158e+00 - 25 1.3326916759186308e-01 -3.4383632694371846e-01 6.9190323110048380e-01 - 26 -2.4021815268192853e-01 -6.0890409178886584e-01 5.8556538656853796e-01 - 27 -6.0409904237121848e-01 9.0912495784502378e-01 -1.6654694276801689e+00 - 28 4.3013936095125799e-01 -5.0934391123833689e-01 9.0325113244229294e-01 - 29 2.4601612065863304e-01 -4.3639537669310857e-01 7.9029601422825357e-01 + 1 -5.1741014216891323e-01 5.2164635296042393e-02 -5.2623836978475858e-01 + 2 2.1552846412199758e-01 -2.6145894539516673e-01 3.8545898349593888e-01 + 3 -3.2103138095670579e-02 -8.8506538753520981e-03 -1.1724235859914865e-02 + 4 1.5813145650026225e-01 2.0998621233542170e-02 6.8636593918036282e-02 + 5 1.4611784388196503e-01 7.0495096056163456e-02 9.4173752375773245e-02 + 6 4.5237719221599776e-01 4.3384962277065459e-01 1.4239025998317195e-01 + 7 -2.5220268493187059e-01 -4.1025301686518484e-01 -4.5691379591079689e-01 + 8 -1.8087303830687639e-03 -6.6262053829478962e-01 -3.7229928601228018e-01 + 9 8.6463301781221830e-02 3.4599465684255803e-01 4.9352454715915695e-01 + 10 -7.0489594513774040e-02 1.2334016665695119e-01 8.9996005394677253e-02 + 11 -1.0626421813039562e-01 1.6812158323291176e-01 9.3047478449378010e-02 + 12 4.7577947092697664e-01 -4.7567891996498934e-01 -3.4737046113988318e-01 + 13 -1.5102808666764547e-01 1.3482433032858601e-01 1.4578835246021055e-01 + 14 -1.7348659325588509e-01 1.4502136217343140e-01 1.4242345028236647e-01 + 15 -1.4462864359019598e-01 1.0177061950990737e-01 1.1672337003180658e-01 + 16 -4.6460067645864556e-01 5.1779370227303589e-01 1.3414908637759566e+00 + 17 2.5891386126470772e-01 -4.3950547582143235e-01 -1.5838807201814791e+00 + 18 7.2168370323432252e-01 1.5193447526996697e+00 -2.9098598385970016e+00 + 19 -2.6652787885830853e-01 -7.5811315791679745e-01 1.5730504781450976e+00 + 20 -3.6685665590502275e-01 -6.8322021768929220e-01 1.4768311717846097e+00 + 21 2.1951586762660091e-01 2.7908220235205461e-01 -1.3830824926773011e+00 + 22 -1.1336938042579009e-01 -1.8808601624035179e-02 7.1848935671661196e-01 + 23 -1.3925069633951992e-01 -1.8099007738613887e-01 6.6541382825713724e-01 + 24 1.0040887040966230e-01 9.7605345725512060e-01 -1.0685029118802341e+00 + 25 1.3326911366323715e-01 -3.4383656089990644e-01 5.9534683106490160e-01 + 26 -2.4021835151581941e-01 -6.0890428532016694e-01 4.8900886112265285e-01 + 27 -6.0409926106451306e-01 9.0912468504667421e-01 -1.4723570251596647e+00 + 28 4.3013929998720690e-01 -5.0934380660262080e-01 8.0669499930969779e-01 + 29 2.4601628669087905e-01 -4.3639523607143099e-01 6.9373995347613393e-01 ...