diff --git a/cmake/Modules/Packages/ML-PACE.cmake b/cmake/Modules/Packages/ML-PACE.cmake index 0159f36c34..b64584e5a7 100644 --- a/cmake/Modules/Packages/ML-PACE.cmake +++ b/cmake/Modules/Packages/ML-PACE.cmake @@ -1,6 +1,6 @@ -set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2022.10.15.tar.gz" CACHE STRING "URL for PACE evaluator library sources") +set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.01.3.tar.gz" CACHE STRING "URL for PACE evaluator library sources") -set(PACELIB_MD5 "848ad6a6cc79fa82745927001fb1c9b5" CACHE STRING "MD5 checksum of PACE evaluator library tarball") +set(PACELIB_MD5 "f418d32b60e531063ac4285bf702b468" CACHE STRING "MD5 checksum of PACE evaluator library tarball") mark_as_advanced(PACELIB_URL) mark_as_advanced(PACELIB_MD5) diff --git a/doc/src/fix_pair.rst b/doc/src/fix_pair.rst index abb44718cd..44c91f18ee 100644 --- a/doc/src/fix_pair.rst +++ b/doc/src/fix_pair.rst @@ -1,7 +1,7 @@ .. index:: fix pair fix pair command -======================= +================ Syntax """""" @@ -47,7 +47,12 @@ These are example use cases: The *N* argument determines how often the fix is invoked. The *pstyle* argument is the name of the pair style. It can be a -sub-style used in a :doc:`pair_style hybrid ` command. +sub-style used in a :doc:`pair_style hybrid ` command. If +there are multiple sub-styles using the same pair style, then *pstyle* +should be specified as "style:N", where *N* is the number of the +instance of the pair style you wish monitor (e.g., the first or second). +For example, *pstyle* could be specified as "pace/extrapolation" or +"amoeba" or "eam:1" or "eam:2". One or more *name/flag* pairs of arguments follow. Each *name* is a per-atom quantity which the pair style must recognize as an extraction diff --git a/doc/src/pair_pace.rst b/doc/src/pair_pace.rst index 697a9965b6..01f7df63f6 100644 --- a/doc/src/pair_pace.rst +++ b/doc/src/pair_pace.rst @@ -1,6 +1,7 @@ .. index:: pair_style pace .. index:: pair_style pace/kk .. index:: pair_style pace/extrapolation +.. index:: pair_style pace/extrapolation/kk pair_style pace command ======================= @@ -127,6 +128,9 @@ but not more often than every 20 steps. On all other steps `pair_style pace recursive` will be used. +When using the pair style *pace/extrapolation* with the KOKKOS package on GPUs +product B-basis evaluator is always used and only *linear* ASI is supported. + ---------- See the :doc:`pair_coeff ` page for alternate ways @@ -186,4 +190,4 @@ recursive, chunksize = 4096, .. _Lysogorskiy2022: -**(Lysogorskiy2022)** Lysogorskiy, Bochkarev, Mrovec, Drautz, TBS (2022). +**(Lysogorskiy2022)** Lysogorskiy, Bochkarev, Mrovec, Drautz, arXiv:2212.08716 (2022). diff --git a/lib/pace/Install.py b/lib/pace/Install.py index 9f132a9580..f7f50d4f6d 100644 --- a/lib/pace/Install.py +++ b/lib/pace/Install.py @@ -18,11 +18,11 @@ from install_helpers import fullpath, geturl, checkmd5sum # settings thisdir = fullpath('.') -version ='v.2022.10.15' +version ='v.2023.01.3' # known checksums for different PACE versions. used to validate the download. checksums = { \ - 'v.2022.10.15': '848ad6a6cc79fa82745927001fb1c9b5' + 'v.2023.01.3': 'f418d32b60e531063ac4285bf702b468' } parser = ArgumentParser(prog='Install.py', description="LAMMPS library build wrapper script") diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index a5bf6437fc..77a9932d90 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -320,6 +320,8 @@ action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp action pair_multi_lucy_rx_kokkos.h pair_multi_lucy_rx.h action pair_pace_kokkos.cpp pair_pace.cpp action pair_pace_kokkos.h pair_pace.h +action pair_pace_extrapolation_kokkos.cpp pair_pace_extrapolation.cpp +action pair_pace_extrapolation_kokkos.h pair_pace_extrapolation.h action pair_reaxff_kokkos.cpp pair_reaxff.cpp action pair_reaxff_kokkos.h pair_reaxff.h action pair_snap_kokkos.cpp pair_snap.cpp diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp new file mode 100644 index 0000000000..e6ae694cad --- /dev/null +++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.cpp @@ -0,0 +1,1882 @@ +// clang-format off +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + aE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Yury Lysogorskiy (ICAMS) +------------------------------------------------------------------------- */ + +#include "pair_pace_extrapolation_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "neighbor_kokkos.h" +#include "neigh_request.h" + +#include "ace-evaluator/ace_version.h" +#include "ace-evaluator/ace_radial.h" + +#include "ace/ace_b_basis.h" +#include "ace/ace_b_evaluator.h" + +#include + +namespace LAMMPS_NS { + struct ACEALImpl { + ACEALImpl() : basis_set(nullptr), ace(nullptr) {} + + ~ACEALImpl() { + delete basis_set; + delete ace; + } + + ACEBBasisSet *basis_set; + ACEBEvaluator *ace; + }; +} // namespace LAMMPS_NS + +using namespace LAMMPS_NS; +using namespace MathConst; + +enum{FS,FS_SHIFTEDSCALED}; + +/* ---------------------------------------------------------------------- */ + +template +PairPACEExtrapolationKokkos::PairPACEExtrapolationKokkos(LAMMPS *lmp) : PairPACEExtrapolation(lmp) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + host_flag = (execution_space == Host); +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +template +PairPACEExtrapolationKokkos::~PairPACEExtrapolationKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + + // deallocate views of views in serial to prevent issues in Kokkos tools + + if (k_splines_gk.h_view.data()) { + for (int i = 0; i < nelements; i++) { + for (int j = 0; j < nelements; j++) { + k_splines_gk.h_view(i, j).deallocate(); + k_splines_rnl.h_view(i, j).deallocate(); + k_splines_hc.h_view(i, j).deallocate(); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::grow(int natom, int maxneigh) +{ + auto basis_set = aceimpl->basis_set; + + if ((int)A.extent(0) < natom) { + + MemKK::realloc_kokkos(A, "pace:A", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(A_rank1, "pace:A_rank1", natom, nelements, nradbase); + + MemKK::realloc_kokkos(A_list, "pace:A_list", natom, idx_ms_combs_max, basis_set->rankmax); + //size is +1 of max to avoid out-of-boundary array access in double-triangular scheme + MemKK::realloc_kokkos(A_forward_prod, "pace:A_forward_prod", natom, idx_ms_combs_max, basis_set->rankmax + 1); + + MemKK::realloc_kokkos(e_atom, "pace:e_atom", natom); + MemKK::realloc_kokkos(rhos, "pace:rhos", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion + MemKK::realloc_kokkos(dF_drho, "pace:dF_drho", natom, basis_set->ndensitymax + 1); // +1 density for core repulsion + + MemKK::realloc_kokkos(weights, "pace:weights", natom, nelements, nradmax + 1, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(weights_rank1, "pace:weights_rank1", natom, nelements, nradbase); + + // hard-core repulsion + MemKK::realloc_kokkos(rho_core, "pace:rho_core", natom); + MemKK::realloc_kokkos(dF_drho_core, "pace:dF_drho_core", natom); + + MemKK::realloc_kokkos(dB_flatten, "pace:dB_flatten", natom, idx_ms_combs_max, basis_set->rankmax); + + //B-projections + MemKK::realloc_kokkos(projections, "pace:projections", natom, total_num_functions_max); // per-atom B-projections + MemKK::realloc_kokkos(d_gamma, "pace:gamma", natom); // per-atom gamma + } + + if (((int)ylm.extent(0) < natom) || ((int)ylm.extent(1) < maxneigh)) { + + // radial functions + MemKK::realloc_kokkos(fr, "pace:fr", natom, maxneigh, nradmax, lmax + 1); + MemKK::realloc_kokkos(dfr, "pace:dfr", natom, maxneigh, nradmax, lmax + 1); + MemKK::realloc_kokkos(gr, "pace:gr", natom, maxneigh, nradbase); + MemKK::realloc_kokkos(dgr, "pace:dgr", natom, maxneigh, nradbase); + const int max_num_functions = MAX(nradbase, nradmax*(lmax + 1)); + MemKK::realloc_kokkos(d_values, "pace:d_values", natom, maxneigh, max_num_functions); + MemKK::realloc_kokkos(d_derivatives, "pace:d_derivatives", natom, maxneigh, max_num_functions); + + // hard-core repulsion + MemKK::realloc_kokkos(cr, "pace:cr", natom, maxneigh); + MemKK::realloc_kokkos(dcr, "pace:dcr", natom, maxneigh); + + // spherical harmonics + MemKK::realloc_kokkos(plm, "pace:plm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(dplm, "pace:dplm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(ylm, "pace:ylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(dylm, "pace:dylm", natom, maxneigh, (lmax + 1) * (lmax + 1)); + + // short neigh list + MemKK::realloc_kokkos(d_ncount, "pace:ncount", natom); + MemKK::realloc_kokkos(d_mu, "pace:mu", natom, maxneigh); + MemKK::realloc_kokkos(d_rhats, "pace:rhats", natom, maxneigh); + MemKK::realloc_kokkos(d_rnorms, "pace:rnorms", natom, maxneigh); + MemKK::realloc_kokkos(d_nearest, "pace:nearest", natom, maxneigh); + + MemKK::realloc_kokkos(f_ij, "pace:f_ij", natom, maxneigh); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::copy_pertype() +{ + auto basis_set = aceimpl->basis_set; + + MemKK::realloc_kokkos(d_rho_core_cutoff, "pace:rho_core_cutoff", nelements); + MemKK::realloc_kokkos(d_drho_core_cutoff, "pace:drho_core_cutoff", nelements); + MemKK::realloc_kokkos(d_E0vals, "pace:E0vals", nelements); + MemKK::realloc_kokkos(d_ndensity, "pace:ndensity", nelements); + MemKK::realloc_kokkos(d_npoti, "pace:npoti", nelements); + + auto h_rho_core_cutoff = Kokkos::create_mirror_view(d_rho_core_cutoff); + auto h_drho_core_cutoff = Kokkos::create_mirror_view(d_drho_core_cutoff); + auto h_E0vals = Kokkos::create_mirror_view(d_E0vals); + auto h_ndensity = Kokkos::create_mirror_view(d_ndensity); + auto h_npoti = Kokkos::create_mirror_view(d_npoti); + + for (int n = 0; n < nelements; n++) { + h_rho_core_cutoff[n] = basis_set->map_embedding_specifications.at(n).rho_core_cutoff; + h_drho_core_cutoff[n] = basis_set->map_embedding_specifications.at(n).drho_core_cutoff; + + h_E0vals(n) = basis_set->E0vals(n); + + h_ndensity(n) = basis_set->map_embedding_specifications.at(n).ndensity; + + string npoti = basis_set->map_embedding_specifications.at(n).npoti; + if (npoti == "FinnisSinclair") + h_npoti(n) = FS; + else if (npoti == "FinnisSinclairShiftedScaled") + h_npoti(n) = FS_SHIFTEDSCALED; + } + + Kokkos::deep_copy(d_rho_core_cutoff, h_rho_core_cutoff); + Kokkos::deep_copy(d_drho_core_cutoff, h_drho_core_cutoff); + Kokkos::deep_copy(d_E0vals, h_E0vals); + Kokkos::deep_copy(d_ndensity, h_ndensity); + Kokkos::deep_copy(d_npoti, h_npoti); + + MemKK::realloc_kokkos(d_wpre, "pace:wpre", nelements, basis_set->ndensitymax); + MemKK::realloc_kokkos(d_mexp, "pace:mexp", nelements, basis_set->ndensitymax); + + auto h_wpre = Kokkos::create_mirror_view(d_wpre); + auto h_mexp = Kokkos::create_mirror_view(d_mexp); + + for (int n = 0; n < nelements; n++) { + const int ndensity = basis_set->map_embedding_specifications.at(n).ndensity; + for (int p = 0; p < ndensity; p++) { + h_wpre(n, p) = basis_set->map_embedding_specifications.at(n).FS_parameters.at(p * 2 + 0); + h_mexp(n, p) = basis_set->map_embedding_specifications.at(n).FS_parameters.at(p * 2 + 1); + } + } + + Kokkos::deep_copy(d_wpre, h_wpre); + Kokkos::deep_copy(d_mexp, h_mexp); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::copy_splines() +{ + auto basis_set = aceimpl->basis_set; + + if (k_splines_gk.d_view.data()) { + for (int i = 0; i < nelements; i++) { + for (int j = 0; j < nelements; j++) { + k_splines_gk.h_view(i, j).deallocate(); + k_splines_rnl.h_view(i, j).deallocate(); + k_splines_hc.h_view(i, j).deallocate(); + } + } + } + + k_splines_gk = Kokkos::DualView("pace:splines_gk", nelements, nelements); + k_splines_rnl = Kokkos::DualView("pace:splines_rnl", nelements, nelements); + k_splines_hc = Kokkos::DualView("pace:splines_hc", nelements, nelements); + + ACERadialFunctions* radial_functions = dynamic_cast(basis_set->radial_functions); + + for (int i = 0; i < nelements; i++) { + for (int j = 0; j < nelements; j++) { + k_splines_gk.h_view(i, j) = radial_functions->splines_gk(i, j); + k_splines_rnl.h_view(i, j) = radial_functions->splines_rnl(i, j); + k_splines_hc.h_view(i, j) = radial_functions->splines_hc(i, j); + } + } + + k_splines_gk.modify_host(); + k_splines_rnl.modify_host(); + k_splines_hc.modify_host(); + + k_splines_gk.sync_device(); + k_splines_rnl.sync_device(); + k_splines_hc.sync_device(); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::copy_tilde() +{ + auto basis_set = aceimpl->basis_set; + auto b_evaluator = aceimpl->ace; + + // flatten loops, get per-element count and max + + idx_ms_combs_max = 0; + total_num_functions_max = 0; + + MemKK::realloc_kokkos(d_idx_ms_combs_count, "pace:idx_ms_combs_count", nelements); + MemKK::realloc_kokkos(d_total_basis_size, "pace:total_basis_size", nelements); + auto h_idx_ms_combs_count = Kokkos::create_mirror_view(d_idx_ms_combs_count); + auto h_total_basis_size = Kokkos::create_mirror_view(d_total_basis_size); + + for (int mu = 0; mu < nelements; mu++) { + int idx_ms_combs = 0; + const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[mu]; + const int total_basis_size = basis_set->total_basis_size[mu]; + + ACEBBasisFunction *basis = basis_set->basis[mu]; + + // rank=1 + for (int func_rank1_ind = 0; func_rank1_ind < total_basis_size_rank1; ++func_rank1_ind) + idx_ms_combs++; + + // rank > 1 + for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) { + ACEBBasisFunction *func = &basis[func_ind]; + + // loop over {ms} combinations in sum + for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind) + idx_ms_combs++; + } + h_idx_ms_combs_count(mu) = idx_ms_combs; + idx_ms_combs_max = MAX(idx_ms_combs_max, idx_ms_combs); + total_num_functions_max = MAX(total_num_functions_max, total_basis_size_rank1 + total_basis_size); + h_total_basis_size(mu) = total_basis_size_rank1 + total_basis_size; + } + + Kokkos::deep_copy(d_idx_ms_combs_count, h_idx_ms_combs_count); + Kokkos::deep_copy(d_total_basis_size, h_total_basis_size); + + MemKK::realloc_kokkos(d_rank, "pace:rank", nelements, total_num_functions_max); + MemKK::realloc_kokkos(d_num_ms_combs, "pace:num_ms_combs", nelements, total_num_functions_max); + MemKK::realloc_kokkos(d_func_inds, "pace:func_inds", nelements, idx_ms_combs_max); + MemKK::realloc_kokkos(d_mus, "pace:mus", nelements, total_num_functions_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ns, "pace:ns", nelements, total_num_functions_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ls, "pace:ls", nelements, total_num_functions_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_ms_combs, "pace:ms_combs", nelements, idx_ms_combs_max, basis_set->rankmax); + MemKK::realloc_kokkos(d_gen_cgs, "pace:gen_cgs", nelements, idx_ms_combs_max); + MemKK::realloc_kokkos(d_coeffs, "pace:coeffs", nelements, total_num_functions_max, basis_set->ndensitymax); + // active set inverted + t_ace_3d d_ASI_temp; + MemKK::realloc_kokkos(d_ASI_temp, "pace:ASI_temp", nelements, total_num_functions_max, total_num_functions_max); + + auto h_rank = Kokkos::create_mirror_view(d_rank); + auto h_num_ms_combs = Kokkos::create_mirror_view(d_num_ms_combs); + auto h_func_inds = Kokkos::create_mirror_view(d_func_inds); + auto h_mus = Kokkos::create_mirror_view(d_mus); + auto h_ns = Kokkos::create_mirror_view(d_ns); + auto h_ls = Kokkos::create_mirror_view(d_ls); + auto h_ms_combs = Kokkos::create_mirror_view(d_ms_combs); + auto h_gen_cgs = Kokkos::create_mirror_view(d_gen_cgs); + auto h_coeffs = Kokkos::create_mirror_view(d_coeffs); + // asi + auto h_ASI = Kokkos::create_mirror_view(d_ASI_temp); + + // copy values on host + + for (int mu = 0; mu < nelements; mu++) { + const int total_basis_size_rank1 = basis_set->total_basis_size_rank1[mu]; + const int total_basis_size = basis_set->total_basis_size[mu]; + + ACEBBasisFunction *basis_rank1 = basis_set->basis_rank1[mu]; + ACEBBasisFunction *basis = basis_set->basis[mu]; + + const int ndensity = basis_set->map_embedding_specifications.at(mu).ndensity; + + int idx_ms_comb = 0; + + // rank=1 + for (int func_ind = 0; func_ind < total_basis_size_rank1; ++func_ind) { + ACEBBasisFunction *func = &basis_rank1[func_ind]; + h_rank(mu, func_ind) = 1; + h_mus(mu, func_ind, 0) = func->mus[0]; + h_ns(mu, func_ind, 0) = func->ns[0]; + + for (int p = 0; p < ndensity; ++p) + h_coeffs(mu, func_ind, p) = func->coeff[p]; + + h_gen_cgs(mu, idx_ms_comb) = func->gen_cgs[0]; + + h_func_inds(mu, idx_ms_comb) = func_ind; + idx_ms_comb++; + } + + // rank > 1 + for (int func_ind = 0; func_ind < total_basis_size; ++func_ind) { + ACEBBasisFunction *func = &basis[func_ind]; + // TODO: check if func->ctildes are zero, then skip + + const int func_ind_through = total_basis_size_rank1 + func_ind; + + const int rank = h_rank(mu, func_ind_through) = func->rank; + h_num_ms_combs(mu, func_ind_through) = func->num_ms_combs; + for (int t = 0; t < rank; t++) { + h_mus(mu, func_ind_through, t) = func->mus[t]; + h_ns(mu, func_ind_through, t) = func->ns[t]; + h_ls(mu, func_ind_through, t) = func->ls[t]; + } + + for (int p = 0; p < ndensity; ++p) + h_coeffs(mu, func_ind_through, p) = func->coeff[p]; + + + // loop over {ms} combinations in sum + for (int ms_ind = 0; ms_ind < func->num_ms_combs; ++ms_ind) { + auto ms = &func->ms_combs[ms_ind * rank]; // current ms-combination (of length = rank) + for (int t = 0; t < rank; t++) + h_ms_combs(mu, idx_ms_comb, t) = ms[t]; + + + h_gen_cgs(mu, idx_ms_comb) = func->gen_cgs[ms_ind]; + + + h_func_inds(mu, idx_ms_comb) = func_ind_through; + idx_ms_comb++; + } + } + + // ASI + const auto &A_as_inv = b_evaluator->A_active_set_inv.at(mu); + for (int i = 0; i < total_basis_size_rank1 + total_basis_size; i++) + for (int j = 0; j < total_basis_size_rank1 + total_basis_size; j++){ + h_ASI(mu,i,j) = A_as_inv(j,i); // transpose back for better performance on GPU + } + } + + Kokkos::deep_copy(d_rank, h_rank); + Kokkos::deep_copy(d_num_ms_combs, h_num_ms_combs); + Kokkos::deep_copy(d_func_inds, h_func_inds); + Kokkos::deep_copy(d_mus, h_mus); + Kokkos::deep_copy(d_ns, h_ns); + Kokkos::deep_copy(d_ls, h_ls); + Kokkos::deep_copy(d_ms_combs, h_ms_combs); + Kokkos::deep_copy(d_gen_cgs, h_gen_cgs); + Kokkos::deep_copy(d_coeffs, h_coeffs); + Kokkos::deep_copy(d_ASI_temp, h_ASI); + d_ASI = d_ASI_temp; // copy from temopary array to const array +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::init_style() +{ + if (host_flag) { + if (lmp->kokkos->nthreads > 1) + error->all(FLERR,"Pair style pace/extrapolation/kk can currently only run on a single " + "CPU thread"); + + PairPACEExtrapolation::init_style(); + return; + } + + if (atom->tag_enable == 0) error->all(FLERR, "Pair style PACE requires atom IDs"); + if (force->newton_pair == 0) error->all(FLERR, "Pair style PACE requires newton pair on"); + + // neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + + auto request = neighbor->add_request(this, NeighConst::REQ_FULL); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); + if (neighflag == FULL) + error->all(FLERR,"Must use half neighbor list style with pair pace/kk"); + + auto basis_set = aceimpl->basis_set; + + nelements = basis_set->nelements; + lmax = basis_set->lmax; + nradmax = basis_set->nradmax; + nradbase = basis_set->nradbase; + + // spherical harmonics + + MemKK::realloc_kokkos(alm, "pace:alm", (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(blm, "pace:blm", (lmax + 1) * (lmax + 1)); + MemKK::realloc_kokkos(cl, "pace:cl", lmax + 1); + MemKK::realloc_kokkos(dl, "pace:dl", lmax + 1); + + pre_compute_harmonics(lmax); + copy_pertype(); + copy_splines(); + copy_tilde(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +template +double PairPACEExtrapolationKokkos::init_one(int i, int j) +{ + double cutone = PairPACEExtrapolation::init_one(i,j); + + k_scale.h_view(i,j) = k_scale.h_view(j,i) = scale[i][j]; + k_scale.template modify(); + + k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; + k_cutsq.template modify(); + + return cutone; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::coeff(int narg, char **arg) +{ + PairPACEExtrapolation::coeff(narg,arg); + + auto b_evaluator = aceimpl->ace; + if (!b_evaluator->get_is_linear_extrapolation_grade()) { + error->all(FLERR,"Must use LINEAR ASI with pair pace/extrapolation/kk"); + } + // Set up element lists + + auto h_map = Kokkos::create_mirror_view(d_map); + + for (int i = 1; i <= atom->ntypes; i++) + h_map(i) = map[i]; + + Kokkos::deep_copy(d_map,h_map); +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::allocate() +{ + PairPACEExtrapolation::allocate(); + + int n = atom->ntypes + 1; + MemKK::realloc_kokkos(d_map, "pace:map", n); + + MemKK::realloc_kokkos(k_cutsq, "pace:cutsq", n, n); + d_cutsq = k_cutsq.template view(); + + MemKK::realloc_kokkos(k_scale, "pace:scale", n, n); + d_scale = k_scale.template view(); +} + +/* ---------------------------------------------------------------------- */ + +template +struct FindMaxNumNeighs { + typedef DeviceType device_type; + NeighListKokkos k_list; + + FindMaxNumNeighs(NeighListKokkos* nl): k_list(*nl) {} + ~FindMaxNumNeighs() {k_list.copymode = 1;} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii, int& maxneigh) const { + const int i = k_list.d_ilist[ii]; + const int num_neighs = k_list.d_numneigh[i]; + if (maxneigh < num_neighs) maxneigh = num_neighs; + } +}; + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::compute(int eflag_in, int vflag_in) +{ + if (host_flag) { + atomKK->sync(Host,X_MASK|TYPE_MASK); + PairPACEExtrapolation::compute(eflag_in,vflag_in); + atomKK->modified(Host,F_MASK); + return; + } + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + if (gamma_flag && atom->nlocal > nmax) { + memory->destroy(extrapolation_grade_gamma); + nmax = atom->nlocal; + memory->create(extrapolation_grade_gamma, nmax, "pace/atom:gamma"); + //zeroify array + memset(extrapolation_grade_gamma, 0, nmax * sizeof(*extrapolation_grade_gamma)); + } + + copymode = 1; + if (!force->newton_pair) + error->all(FLERR,"PairPACEExtrapolationKokkos requires 'newton on'"); + + atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK); + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + k_scale.template sync(); + k_cutsq.template sync(); + + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + inum = list->inum; + + need_dup = lmp->kokkos->need_dup(); + if (need_dup) { + dup_f = Kokkos::Experimental::create_scatter_view(f); + dup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); + } else { + ndup_f = Kokkos::Experimental::create_scatter_view(f); + ndup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); + } + + maxneigh = 0; + Kokkos::parallel_reduce("pace::find_maxneigh", inum, FindMaxNumNeighs(k_list), Kokkos::Max(maxneigh)); + + int vector_length_default = 1; + int team_size_default = 1; + if (!host_flag) + team_size_default = 32; + + chunk_size = MIN(chunksize,inum); // "chunksize" variable is set by user + chunk_offset = 0; + + + grow(chunk_size, maxneigh); + + EV_FLOAT ev; + + while (chunk_offset < inum) { // chunk up loop to prevent running out of memory + + Kokkos::deep_copy(weights, 0.0); + Kokkos::deep_copy(weights_rank1, 0.0); + Kokkos::deep_copy(A, 0.0); + Kokkos::deep_copy(A_rank1, 0.0); + Kokkos::deep_copy(rhos, 0.0); + + Kokkos::deep_copy(projections, 0.0); + Kokkos::deep_copy(d_gamma, 0.0); + + EV_FLOAT ev_tmp; + + if (chunk_size > inum - chunk_offset) + chunk_size = inum - chunk_offset; + + //Neigh + { + int vector_length = vector_length_default; + int team_size = team_size_default; + check_team_size_for(chunk_size,team_size,vector_length); + int scratch_size = scratch_size_helper(team_size * maxneigh); + typename Kokkos::TeamPolicy policy_neigh(chunk_size,team_size,vector_length); + policy_neigh = policy_neigh.set_scratch_size(0, Kokkos::PerTeam(scratch_size)); + Kokkos::parallel_for("ComputeNeigh",policy_neigh,*this); + } + + //ComputeRadial + { + int vector_length = vector_length_default; + int team_size = team_size_default; + check_team_size_for(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + typename Kokkos::TeamPolicy policy_radial(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + Kokkos::parallel_for("ComputeRadial",policy_radial,*this); + } + + //ComputeYlm + { + int vector_length = vector_length_default; + int team_size = 16; + check_team_size_for(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + typename Kokkos::TeamPolicy policy_ylm(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + Kokkos::parallel_for("ComputeYlm",policy_ylm,*this); + } + + //ComputeAi + { + int vector_length = vector_length_default; + int team_size = team_size_default; + check_team_size_for(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + typename Kokkos::TeamPolicy policy_ai(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + Kokkos::parallel_for("ComputeAi",policy_ai,*this); + } + + //ConjugateAi + { + typename Kokkos::RangePolicy policy_conj_ai(0,chunk_size); + Kokkos::parallel_for("ConjugateAi",policy_conj_ai,*this); + } + + //ComputeRho + { + typename Kokkos::RangePolicy policy_rho(0, chunk_size * idx_ms_combs_max); + Kokkos::parallel_for("ComputeRho",policy_rho,*this); + } + + //ComputeFS + { + typename Kokkos::RangePolicy policy_fs(0,chunk_size); + Kokkos::parallel_for("ComputeFS",policy_fs,*this); + } + + //ComputeGamma + if (gamma_flag) { + typename Kokkos::RangePolicy policy_gamma(0,chunk_size); + Kokkos::parallel_for("ComputeGamma",policy_gamma,*this); + } + + //ComputeWeights + { + typename Kokkos::RangePolicy policy_weights(0, chunk_size * idx_ms_combs_max); + Kokkos::parallel_for("ComputeWeights",policy_weights,*this); + } + + //ComputeDerivative + { + int vector_length = vector_length_default; + int team_size = team_size_default; + check_team_size_for(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + typename Kokkos::TeamPolicy policy_derivative(((chunk_size+team_size-1)/team_size)*maxneigh,team_size,vector_length); + Kokkos::parallel_for("ComputeDerivative",policy_derivative,*this); + } + + //ComputeForce + { + if (evflag) { + if (neighflag == HALF) { + typename Kokkos::RangePolicy > policy_force(0,chunk_size); + Kokkos::parallel_reduce(policy_force, *this, ev_tmp); + } else if (neighflag == HALFTHREAD) { + typename Kokkos::RangePolicy > policy_force(0,chunk_size); + Kokkos::parallel_reduce("ComputeForce",policy_force, *this, ev_tmp); + } + } else { + if (neighflag == HALF) { + typename Kokkos::RangePolicy > policy_force(0,chunk_size); + Kokkos::parallel_for(policy_force, *this); + } else if (neighflag == HALFTHREAD) { + typename Kokkos::RangePolicy > policy_force(0,chunk_size); + Kokkos::parallel_for("ComputeForce",policy_force, *this); + } + } + } + ev += ev_tmp; + + //if gamma_flag - copy current d_gamma to extrapolation_grade_gamma + if (gamma_flag){ + h_gamma = Kokkos::create_mirror_view(d_gamma); + Kokkos:deep_copy(h_gamma, d_gamma); + memcpy(extrapolation_grade_gamma+chunk_offset, (void *) h_gamma.data(), sizeof(double)*chunk_size); + } + + chunk_offset += chunk_size; + } // end while + + if (need_dup) + Kokkos::Experimental::contribute(f, dup_f); + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + if (need_dup) + Kokkos::Experimental::contribute(d_vatom, dup_vatom); + k_vatom.template modify(); + k_vatom.template sync(); + } + + atomKK->modified(execution_space,F_MASK); + + copymode = 0; + + // free duplicated memory + if (need_dup) { + dup_f = decltype(dup_f)(); + dup_vatom = decltype(dup_vatom)(); + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const +{ + const int ii = team.league_rank(); + const int i = d_ilist[ii + chunk_offset]; + const int itype = type[i]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int jnum = d_numneigh[i]; + + // get a pointer to scratch memory + // This is used to cache whether or not an atom is within the cutoff + // If it is, inside is assigned to 1, otherwise -1 + const int team_rank = team.team_rank(); + const int scratch_shift = team_rank * maxneigh; // offset into pointer for entire team + int* inside = (int*)team.team_shmem().get_shmem(team.team_size() * maxneigh * sizeof(int), 0) + scratch_shift; + + // loop over list of all neighbors within force cutoff + // distsq[] = distance sq to each + // rlist[] = distance vector to each + // nearest[] = atom indices of neighbors + + int ncount = 0; + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum), + [&] (const int jj, int& count) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + + const int jtype = type(j); + + const F_FLOAT delx = xtmp - x(j,0); + const F_FLOAT dely = ytmp - x(j,1); + const F_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + inside[jj] = -1; + if (rsq < d_cutsq(itype,jtype)) { + inside[jj] = 1; + count++; + } + },ncount); + + d_ncount(ii) = ncount; + + Kokkos::parallel_scan(Kokkos::TeamThreadRange(team,jnum), + [&] (const int jj, int& offset, bool final) { + + if (inside[jj] < 0) return; + + if (final) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const F_FLOAT delx = xtmp - x(j,0); + const F_FLOAT dely = ytmp - x(j,1); + const F_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT r = sqrt(rsq); + const F_FLOAT rinv = 1.0/r; + const int mu_j = d_map(type(j)); + d_mu(ii,offset) = mu_j; + d_rnorms(ii,offset) = r; + d_rhats(ii,offset,0) = -delx*rinv; + d_rhats(ii,offset,1) = -dely*rinv; + d_rhats(ii,offset,2) = -delz*rinv; + d_nearest(ii,offset) = j; + } + offset++; + }); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeRadial, const typename Kokkos::TeamPolicy::member_type& team) const +{ + // Extract the atom number + int ii = team.team_rank() + team.team_size() * (team.league_rank() % + ((chunk_size+team.team_size()-1)/team.team_size())); + if (ii >= chunk_size) return; + const int i = d_ilist[ii + chunk_offset]; + + // Extract the neighbor number + const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size()); + const int ncount = d_ncount(ii); + if (jj >= ncount) return; + + const double r_norm = d_rnorms(ii, jj); + const int mu_i = d_map(type(i)); + const int mu_j = d_mu(ii, jj); + + evaluate_splines(ii, jj, r_norm, nradbase, nradmax, mu_i, mu_j); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeYlm, const typename Kokkos::TeamPolicy::member_type& team) const +{ + // Extract the atom number + int ii = team.team_rank() + team.team_size() * (team.league_rank() % + ((chunk_size+team.team_size()-1)/team.team_size())); + if (ii >= chunk_size) return; + + // Extract the neighbor number + const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size()); + const int ncount = d_ncount(ii); + if (jj >= ncount) return; + + const double xn = d_rhats(ii, jj, 0); + const double yn = d_rhats(ii, jj, 1); + const double zn = d_rhats(ii, jj, 2); + compute_ylm(ii,jj,xn,yn,zn,lmax); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeAi, const typename Kokkos::TeamPolicy::member_type& team) const +{ + // Extract the atom number + int ii = team.team_rank() + team.team_size() * (team.league_rank() % + ((chunk_size+team.team_size()-1)/team.team_size())); + if (ii >= chunk_size) return; + + // Extract the neighbor number + const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size()); + const int ncount = d_ncount(ii); + if (jj >= ncount) return; + + const int mu_j = d_mu(ii, jj); + + // rank = 1 + for (int n = 0; n < nradbase; n++) + Kokkos::atomic_add(&A_rank1(ii, mu_j, n), gr(ii, jj, n) * Y00); + + // rank > 1 + for (int n = 0; n < nradmax; n++) { + for (int l = 0; l <= lmax; l++) { + for (int m = 0; m <= l; m++) { + const int idx = l * (l + 1) + m; // (l, m) + Kokkos::atomic_add(&A(ii, mu_j, n, idx).re, fr(ii, jj, n, l) * ylm(ii, jj, idx).re); + Kokkos::atomic_add(&A(ii, mu_j, n, idx).im, fr(ii, jj, n, l) * ylm(ii, jj, idx).im); + } + } + } + + // hard-core repulsion + Kokkos::atomic_add(&rho_core(ii), cr(ii, jj)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEConjugateAi, const int& ii) const +{ + //complex conjugate A's (for NEGATIVE (-m) terms) + // for rank > 1 + for (int mu_j = 0; mu_j < nelements; mu_j++) { + for (int n = 0; n < nradmax; n++) { + for (int l = 0; l <= lmax; l++) { + //fill in -m part in the outer loop using the same m <-> -m symmetry as for Ylm + for (int m = 1; m <= l; m++) { + const int idx = l * (l + 1) + m; // (l, m) + const int idxm = l * (l + 1) - m; // (l, -m) + const int factor = m % 2 == 0 ? 1 : -1; + A(ii, mu_j, n, idxm) = A(ii, mu_j, n, idx).conj() * (double)factor; + } + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeRho, const int& iter) const +{ + const int idx_ms_comb = iter / chunk_size; + const int ii = iter % chunk_size; + + const int i = d_ilist[ii + chunk_offset]; + const int mu_i = d_map(type(i)); + + if (idx_ms_comb >= d_idx_ms_combs_count(mu_i)) return; + + const int ndensity = d_ndensity(mu_i); + + const int func_ind = d_func_inds(mu_i, idx_ms_comb); + const int rank = d_rank(mu_i, func_ind); + const int r = rank - 1; + + // Basis functions B with iterative product and density rho(p) calculation + if (rank == 1) { + const int mu = d_mus(mu_i, func_ind, 0); + const int n = d_ns(mu_i, func_ind, 0); + double A_cur = A_rank1(ii, mu, n - 1); + for (int p = 0; p < ndensity; ++p) { + //for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1 + Kokkos::atomic_add(&rhos(ii, p), d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb) * A_cur); + } + + + //gamma_i + if (gamma_flag) + Kokkos::atomic_add(&projections(ii, func_ind), d_gen_cgs(mu_i, idx_ms_comb) * A_cur); + + } else { // rank > 1 + // loop over {ms} combinations in sum + + // loop over m, collect B = product of A with given ms + A_forward_prod(ii, idx_ms_comb, 0) = complex::one(); + + // fill forward A-product triangle + for (int t = 0; t < rank; t++) { + //TODO: optimize ns[t]-1 -> ns[t] during functions construction + const int mu = d_mus(mu_i, func_ind, t); + const int n = d_ns(mu_i, func_ind, t); + const int l = d_ls(mu_i, func_ind, t); + const int m = d_ms_combs(mu_i, idx_ms_comb, t); // current ms-combination (of length = rank) + const int idx = l * (l + 1) + m; // (l, m) + A_list(ii, idx_ms_comb, t) = A(ii, mu, n - 1, idx); + A_forward_prod(ii, idx_ms_comb, t + 1) = A_forward_prod(ii, idx_ms_comb, t) * A_list(ii, idx_ms_comb, t); + } + + complex A_backward_prod = complex::one(); + + // fill backward A-product triangle + for (int t = r; t >= 1; t--) { + const complex dB = A_forward_prod(ii, idx_ms_comb, t) * A_backward_prod; // dB - product of all A's except t-th + dB_flatten(ii, idx_ms_comb, t) = dB; + + A_backward_prod = A_backward_prod * A_list(ii, idx_ms_comb, t); + } + dB_flatten(ii, idx_ms_comb, 0) = A_forward_prod(ii, idx_ms_comb, 0) * A_backward_prod; + + const complex B = A_forward_prod(ii, idx_ms_comb, rank); + + for (int p = 0; p < ndensity; ++p) { + // real-part only multiplication + Kokkos::atomic_add(&rhos(ii, p), B.real_part_product(d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb))); + } + //gamma_i + if (gamma_flag) + Kokkos::atomic_add(&projections(ii, func_ind), B.real_part_product(d_gen_cgs(mu_i, idx_ms_comb))); + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeFS, const int& ii) const +{ + const int i = d_ilist[ii + chunk_offset]; + const int mu_i = d_map(type(i)); + + const double rho_cut = d_rho_core_cutoff(mu_i); + const double drho_cut = d_drho_core_cutoff(mu_i); + const int ndensity = d_ndensity(mu_i); + + double evdwl, fcut, dfcut; + evdwl = fcut = dfcut = 0.0; + + inner_cutoff(rho_core(ii), rho_cut, drho_cut, fcut, dfcut); + FS_values_and_derivatives(ii, evdwl, mu_i); + + dF_drho_core(ii) = evdwl * dfcut + 1; + for (int p = 0; p < ndensity; ++p) + dF_drho(ii, p) *= fcut; + + + // tally energy contribution + if (eflag) { + double evdwl_cut = evdwl * fcut + rho_core(ii); + // E0 shift + evdwl_cut += d_E0vals(mu_i); + e_atom(ii) = evdwl_cut; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeGamma, const int& ii) const +{ + const int i = d_ilist[ii + chunk_offset]; + const int mu_i = d_map(type(i)); + const int basis_size = d_total_basis_size(mu_i); + + double gamma_max = 0; + double current_gamma; + for (int j = 0; j gamma_max) + gamma_max = current_gamma; + } + + // tally energy contribution + d_gamma(ii) = gamma_max; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeWeights, const int& iter) const +{ + const int idx_ms_comb = iter / chunk_size; + const int ii = iter % chunk_size; + + const int i = d_ilist[ii + chunk_offset]; + const int mu_i = d_map(type(i)); + + if (idx_ms_comb >= d_idx_ms_combs_count(mu_i)) return; + + const int ndensity = d_ndensity(mu_i); + + const int func_ind = d_func_inds(mu_i, idx_ms_comb); + const int rank = d_rank(mu_i, func_ind); + + // Weights and theta calculation + + if (rank == 1) { + const int mu = d_mus(mu_i, func_ind, 0); + const int n = d_ns(mu_i, func_ind, 0); + double theta = 0.0; + for (int p = 0; p < ndensity; ++p) { + // for rank=1 (r=0) only 1 ms-combination exists (ms_ind=0), so index of func.ctildes is 0..ndensity-1 + theta += dF_drho(ii, p) * d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb); + } + Kokkos::atomic_add(&weights_rank1(ii, mu, n - 1), theta); + } else { // rank > 1 + double theta = 0.0; + for (int p = 0; p < ndensity; ++p) + theta += dF_drho(ii, p) * d_coeffs(mu_i, func_ind, p) * d_gen_cgs(mu_i, idx_ms_comb); + + theta *= 0.5; // 0.5 factor due to possible double counting ??? + for (int t = 0; t < rank; ++t) { + const int m_t = d_ms_combs(mu_i, idx_ms_comb, t); + const int factor = (m_t % 2 == 0 ? 1 : -1); + const complex dB = dB_flatten(ii, idx_ms_comb, t); + const int mu_t = d_mus(mu_i, func_ind, t); + const int n_t = d_ns(mu_i, func_ind, t); + const int l_t = d_ls(mu_i, func_ind, t); + const int idx = l_t * (l_t + 1) + m_t; // (l, m) + const complex value = theta * dB; + Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).re), value.re); + Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idx).im), value.im); + // update -m_t (that could also be positive), because the basis is half_basis + const int idxm = l_t * (l_t + 1) - m_t; // (l, -m) + const complex valuem = theta * dB.conj() * (double)factor; + Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).re), valuem.re); + Kokkos::atomic_add(&(weights(ii, mu_t, n_t - 1, idxm).im), valuem.im); + } + } +} + +/* ---------------------------------------------------------------------- */ +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeDerivative, const typename Kokkos::TeamPolicy::member_type& team) const +{ + // Extract the atom number + int ii = team.team_rank() + team.team_size() * (team.league_rank() % + ((chunk_size+team.team_size()-1)/team.team_size())); + if (ii >= chunk_size) return; + const int i = d_ilist[ii + chunk_offset]; + + // Extract the neighbor number + const int jj = team.league_rank() / ((chunk_size+team.team_size()-1)/team.team_size()); + const int ncount = d_ncount(ii); + if (jj >= ncount) return; + + const int itype = type(i); + const double scale = d_scale(itype,itype); + + const int mu_j = d_mu(ii, jj); + double r_hat[3]; + r_hat[0] = d_rhats(ii, jj, 0); + r_hat[1] = d_rhats(ii, jj, 1); + r_hat[2] = d_rhats(ii, jj, 2); + const double r = d_rnorms(ii, jj); + const double rinv = 1.0/r; + + double f_ji[3]; + f_ji[0] = f_ji[1] = f_ji[2] = 0; + + // for rank = 1 + for (int n = 0; n < nradbase; ++n) { + if (weights_rank1(ii, mu_j, n) == 0) continue; + double &DG = dgr(ii, jj, n); + double DGR = DG * Y00; + DGR *= weights_rank1(ii, mu_j, n); + f_ji[0] += DGR * r_hat[0]; + f_ji[1] += DGR * r_hat[1]; + f_ji[2] += DGR * r_hat[2]; + } + + // for rank > 1 + for (int n = 0; n < nradmax; n++) { + for (int l = 0; l <= lmax; l++) { + const double R_over_r = fr(ii, jj, n, l) * rinv; + const double DR = dfr(ii, jj, n, l); + + // for m >= 0 + for (int m = 0; m <= l; m++) { + const int idx = l * (l + 1) + m; // (l, m) + complex w = weights(ii, mu_j, n, idx); + if (w.re == 0.0 && w.im == 0.0) continue; + // counting for -m cases if m > 0 + if (m > 0) { + w.re *= 2.0; + w.im *= 2.0; + } + + complex DY[3]; + DY[0] = dylm(ii, jj, idx, 0); + DY[1] = dylm(ii, jj, idx, 1); + DY[2] = dylm(ii, jj, idx, 2); + const complex Y_DR = ylm(ii, jj, idx) * DR; + + complex grad_phi_nlm[3]; + grad_phi_nlm[0] = Y_DR * r_hat[0] + DY[0] * R_over_r; + grad_phi_nlm[1] = Y_DR * r_hat[1] + DY[1] * R_over_r; + grad_phi_nlm[2] = Y_DR * r_hat[2] + DY[2] * R_over_r; + // real-part multiplication only + f_ji[0] += w.real_part_product(grad_phi_nlm[0]); + f_ji[1] += w.real_part_product(grad_phi_nlm[1]); + f_ji[2] += w.real_part_product(grad_phi_nlm[2]); + } + } + } + + // hard-core repulsion + const double fpair = dF_drho_core(ii) * dcr(ii,jj); + f_ij(ii, jj, 0) = scale * f_ji[0] + fpair * r_hat[0]; + f_ij(ii, jj, 1) = scale * f_ji[1] + fpair * r_hat[1]; + f_ij(ii, jj, 2) = scale * f_ji[2] + fpair * r_hat[2]; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeForce, const int& ii, EV_FLOAT& ev) const +{ + // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + const auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + const auto a_f = v_f.template access::value>(); + + const int i = d_ilist[ii + chunk_offset]; + const int itype = type(i); + const double scale = d_scale(itype,itype); + + const int ncount = d_ncount(ii); + + F_FLOAT fitmp[3] = {0.0,0.0,0.0}; + for (int jj = 0; jj < ncount; jj++) { + int j = d_nearest(ii,jj); + + double r_hat[3]; + r_hat[0] = d_rhats(ii, jj, 0); + r_hat[1] = d_rhats(ii, jj, 1); + r_hat[2] = d_rhats(ii, jj, 2); + const double r = d_rnorms(ii, jj); + const double delx = -r_hat[0]*r; + const double dely = -r_hat[1]*r; + const double delz = -r_hat[2]*r; + + const double fpairx = f_ij(ii, jj, 0); + const double fpairy = f_ij(ii, jj, 1); + const double fpairz = f_ij(ii, jj, 2); + + fitmp[0] += fpairx; + fitmp[1] += fpairy; + fitmp[2] += fpairz; + a_f(j,0) -= fpairx; + a_f(j,1) -= fpairy; + a_f(j,2) -= fpairz; + + // tally per-atom virial contribution + if (EVFLAG && vflag_either) + v_tally_xyz(ev, i, j, fpairx, fpairy, fpairz, delx, dely, delz); + } + + a_f(i,0) += fitmp[0]; + a_f(i,1) += fitmp[1]; + a_f(i,2) += fitmp[2]; + + // tally energy contribution + if (EVFLAG && eflag_either) { + const double evdwl = scale*e_atom(ii); + //ev_tally_full(i, 2.0 * evdwl, 0.0, 0.0, 0.0, 0.0, 0.0); + if (eflag_global) ev.evdwl += evdwl; + if (eflag_atom) d_eatom[i] += evdwl; + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::operator() (TagPairPACEComputeForce,const int& ii) const { + EV_FLOAT ev; + this->template operator()(TagPairPACEComputeForce(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, + const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const +{ + // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + + auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); + auto a_vatom = v_vatom.template access>(); + + const E_FLOAT v0 = delx*fx; + const E_FLOAT v1 = dely*fy; + const E_FLOAT v2 = delz*fz; + const E_FLOAT v3 = delx*fy; + const E_FLOAT v4 = delx*fz; + const E_FLOAT v5 = dely*fz; + + if (vflag_global) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } + + if (vflag_atom) { + a_vatom(i,0) += 0.5*v0; + a_vatom(i,1) += 0.5*v1; + a_vatom(i,2) += 0.5*v2; + a_vatom(i,3) += 0.5*v3; + a_vatom(i,4) += 0.5*v4; + a_vatom(i,5) += 0.5*v5; + a_vatom(j,0) += 0.5*v0; + a_vatom(j,1) += 0.5*v1; + a_vatom(j,2) += 0.5*v2; + a_vatom(j,3) += 0.5*v3; + a_vatom(j,4) += 0.5*v4; + a_vatom(j,5) += 0.5*v5; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairPACEExtrapolationKokkos::pre_compute_harmonics(int lmax) +{ + auto h_alm = Kokkos::create_mirror_view(alm); + auto h_blm = Kokkos::create_mirror_view(blm); + auto h_cl = Kokkos::create_mirror_view(cl); + auto h_dl = Kokkos::create_mirror_view(dl); + + for (int l = 1; l <= lmax; l++) { + const double lsq = l * l; + const double ld = 2 * l; + const double l1 = (4 * lsq - 1); + const double l2 = lsq - ld + 1; + for (int m = 0; m < l - 1; m++) { + const double msq = m * m; + const double a = sqrt((double(l1)) / (double(lsq - msq))); + const double b = -sqrt((double(l2 - msq)) / (double(4 * l2 - 1))); + const int idx = l * (l + 1) + m; // (l, m) + h_alm(idx) = a; + h_blm(idx) = b; + } + } + + for (int l = 1; l <= lmax; l++) { + h_cl(l) = -sqrt(1.0 + 0.5 / (double(l))); + h_dl(l) = sqrt(double(2 * (l - 1) + 3)); + } + + Kokkos::deep_copy(alm, h_alm); + Kokkos::deep_copy(blm, h_blm); + Kokkos::deep_copy(cl, h_cl); + Kokkos::deep_copy(dl, h_dl); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::compute_barplm(int ii, int jj, double rz, int lmax) const +{ + // requires -1 <= rz <= 1 , NO CHECKING IS PERFORMED !!!!!!!!! + // prefactors include 1/sqrt(2) factor compared to reference + + // l=0, m=0 + // plm(ii, jj, 0, 0) = Y00/sq1o4pi; //= sq1o4pi; + plm(ii, jj, 0) = Y00; //= 1; + dplm(ii, jj, 0) = 0.0; + + if (lmax > 0) { + + // l=1, m=0 + plm(ii, jj, 2) = Y00 * sq3 * rz; + dplm(ii, jj, 2) = Y00 * sq3; + + // l=1, m=1 + plm(ii, jj, 3) = -sq3o2 * Y00; + dplm(ii, jj, 3) = 0.0; + + // loop l = 2, lmax + for (int l = 2; l <= lmax; l++) { + for (int m = 0; m < l - 1; m++) { + const int idx = l * (l + 1) + m; // (l, m) + const int idx1 = (l - 1) * l + m; // (l - 1, m) + const int idx2 = (l - 2) * (l - 1) + m; // (l - 2, m) + plm(ii, jj, idx) = alm(idx) * (rz * plm(ii, jj, idx1) + blm(idx) * plm(ii, jj, idx2)); + dplm(ii, jj, idx) = alm(idx) * (plm(ii, jj, idx1) + rz * dplm(ii, jj, idx1) + blm(idx) * dplm(ii, jj, idx2)); + } + const int idx = l * (l + 1) + l; // (l, l) + const int idx1 = l * (l + 1) + l - 1; // (l, l - 1) + const int idx2 = (l - 1) * l + l - 1; // (l - 1, l - 1) + const double t = dl(l) * plm(ii, jj, idx2); + plm(ii, jj, idx1) = t * rz; + dplm(ii, jj, idx1) = t; + plm(ii, jj, idx) = cl(l) * plm(ii, jj, idx2); + dplm(ii, jj, idx) = 0.0; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::compute_ylm(int ii, int jj, double rx, double ry, double rz, int lmax) const +{ + // requires rx^2 + ry^2 + rz^2 = 1 , NO CHECKING IS PERFORMED !!!!!!!!! + + complex phase; + complex phasem, mphasem1; + complex dyx, dyy, dyz; + complex rdy; + + phase.re = rx; + phase.im = ry; + + // compute barplm + compute_barplm(ii, jj, rz, lmax); + + // m = 0 + for (int l = 0; l <= lmax; l++) { + const int idx = l * (l + 1); + + ylm(ii, jj, idx).re = plm(ii, jj, idx); + ylm(ii, jj, idx).im = 0.0; + + dyz.re = dplm(ii, jj, idx); + rdy.re = dyz.re * rz; + + dylm(ii, jj, idx, 0).re = -rdy.re * rx; + dylm(ii, jj, idx, 0).im = 0.0; + dylm(ii, jj, idx, 1).re = -rdy.re * ry; + dylm(ii, jj, idx, 1).im = 0.0; + dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; + dylm(ii, jj, idx, 2).im = 0; + } + // m = 1 + for (int l = 1; l <= lmax; l++) { + const int idx = l * (l + 1) + 1; + + ylm(ii, jj, idx) = phase * plm(ii, jj, idx); + + dyx.re = plm(ii, jj, idx); + dyx.im = 0.0; + dyy.re = 0.0; + dyy.im = plm(ii, jj, idx); + dyz.re = phase.re * dplm(ii, jj, idx); + dyz.im = phase.im * dplm(ii, jj, idx); + + rdy.re = rx * dyx.re + +rz * dyz.re; + rdy.im = ry * dyy.im + rz * dyz.im; + + dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx; + dylm(ii, jj, idx, 0).im = -rdy.im * rx; + dylm(ii, jj, idx, 1).re = -rdy.re * ry; + dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry; + dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; + dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz; + } + + // m > 1 + phasem = phase; + for (int m = 2; m <= lmax; m++) { + + mphasem1.re = phasem.re * double(m); + mphasem1.im = phasem.im * double(m); + phasem = phasem * phase; + + for (int l = m; l <= lmax; l++) { + const int idx = l * (l + 1) + m; + + ylm(ii, jj, idx).re = phasem.re * plm(ii, jj, idx); + ylm(ii, jj, idx).im = phasem.im * plm(ii, jj, idx); + + dyx = mphasem1 * plm(ii, jj, idx); + dyy.re = -dyx.im; + dyy.im = dyx.re; + dyz = phasem * dplm(ii, jj, idx); + + rdy.re = rx * dyx.re + ry * dyy.re + rz * dyz.re; + rdy.im = rx * dyx.im + ry * dyy.im + rz * dyz.im; + + dylm(ii, jj, idx, 0).re = dyx.re - rdy.re * rx; + dylm(ii, jj, idx, 0).im = dyx.im - rdy.im * rx; + dylm(ii, jj, idx, 1).re = dyy.re - rdy.re * ry; + dylm(ii, jj, idx, 1).im = dyy.im - rdy.im * ry; + dylm(ii, jj, idx, 2).re = dyz.re - rdy.re * rz; + dylm(ii, jj, idx, 2).im = dyz.im - rdy.im * rz; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::cutoff_func_poly(const double r, const double r_in, const double delta_in, double &fc, double &dfc) const +{ + if (r <= r_in-delta_in) { + fc = 1; + dfc = 0; + } else if (r >= r_in ) { + fc = 0; + dfc = 0; + } else { + double x = 1 - 2 * (1 + (r - r_in) / delta_in); + fc = 0.5 + 7.5 / 2. * (x / 4. - pow(x, 3) / 6. + pow(x, 5) / 20.); + dfc = -7.5 / delta_in * (0.25 - x * x / 2.0 + pow(x, 4) / 4.); + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::Fexp(const double x, const double m, double &F, double &DF) const +{ + const double w = 1.e6; + const double eps = 1e-10; + + const double lambda = pow(1.0 / w, m - 1.0); + if (abs(x) > eps) { + double g; + const double a = abs(x); + const double am = pow(a, m); + const double w3x3 = pow(w * a, 3); //// use cube + const double sign_factor = (signbit(x) ? -1 : 1); + if (w3x3 > 30.0) + g = 0.0; + else + g = exp(-w3x3); + + const double omg = 1.0 - g; + F = sign_factor * (omg * am + lambda * g * a); + const double dg = -3.0 * w * w * w * a * a * g; + DF = m * pow(a, m - 1.0) * omg - am * dg + lambda * dg * a + lambda * g; + } else { + F = lambda * x; + DF = lambda; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::FexpShiftedScaled(const double rho, const double mexp, double &F, double &DF) const +{ + const double eps = 1e-10; + + if (abs(mexp - 1.0) < eps) { + F = rho; + DF = 1; + } else { + const double a = abs(rho); + const double exprho = exp(-a); + const double nx = 1. / mexp; + const double xoff = pow(nx, (nx / (1.0 - nx))) * exprho; + const double yoff = pow(nx, (1 / (1.0 - nx))) * exprho; + const double sign_factor = (signbit(rho) ? -1 : 1); + F = sign_factor * (pow(xoff + a, mexp) - yoff); + DF = yoff + mexp * (-xoff + 1.0) * pow(xoff + a, mexp - 1.); + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::inner_cutoff(const double rho_core, const double rho_cut, const double drho_cut, + double &fcut, double &dfcut) const +{ + double rho_low = rho_cut - drho_cut; + if (rho_core >= rho_cut) { + fcut = 0; + dfcut = 0; + } else if (rho_core <= rho_low) { + fcut = 1; + dfcut = 0; + } else { + cutoff_func_poly(rho_core, rho_cut, drho_cut, fcut, dfcut); + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::FS_values_and_derivatives(const int ii, double &evdwl, const int mu_i) const +{ + double F, DF = 0; + int npoti = d_npoti(mu_i); + int ndensity = d_ndensity(mu_i); + for (int p = 0; p < ndensity; p++) { + const double wpre = d_wpre(mu_i, p); + const double mexp = d_mexp(mu_i, p); + + if (npoti == FS) + Fexp(rhos(ii, p), mexp, F, DF); + else if (npoti == FS_SHIFTEDSCALED) + FexpShiftedScaled(rhos(ii, p), mexp, F, DF); + + evdwl += F * wpre; // * weight (wpre) + dF_drho(ii, p) = DF * wpre; // * weight (wpre) + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::evaluate_splines(const int ii, const int jj, double r, + int /*nradbase_c*/, int /*nradial_c*/, + int mu_i, int mu_j) const +{ + auto &spline_gk = k_splines_gk.template view()(mu_i, mu_j); + auto &spline_rnl = k_splines_rnl.template view()(mu_i, mu_j); + auto &spline_hc = k_splines_hc.template view()(mu_i, mu_j); + + spline_gk.calcSplines(ii, jj, r, gr, dgr); + + spline_rnl.calcSplines(ii, jj, r, d_values, d_derivatives); + for (int kk = 0; kk < (int)fr.extent(2); kk++) { + for (int ll = 0; ll < (int)fr.extent(3); ll++) { + const int flatten = kk*fr.extent(3) + ll; + fr(ii, jj, kk, ll) = d_values(ii, jj, flatten); + dfr(ii, jj, kk, ll) = d_derivatives(ii, jj, flatten); + } + } + + spline_hc.calcSplines(ii, jj, r, d_values, d_derivatives); + cr(ii, jj) = d_values(ii, jj, 0); + dcr(ii, jj) = d_derivatives(ii, jj, 0); +} + +/* ---------------------------------------------------------------------- */ +template +void PairPACEExtrapolationKokkos::SplineInterpolatorKokkos::operator=(const SplineInterpolator &spline) { + cutoff = spline.cutoff; + deltaSplineBins = spline.deltaSplineBins; + ntot = spline.ntot; + nlut = spline.nlut; + invrscalelookup = spline.invrscalelookup; + rscalelookup = spline.rscalelookup; + num_of_functions = spline.num_of_functions; + + lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions); + auto h_lookupTable = Kokkos::create_mirror_view(lookupTable); + for (int i = 0; i < ntot+1; i++) + for (int j = 0; j < num_of_functions; j++) + for (int k = 0; k < 4; k++) + h_lookupTable(i, j, k) = spline.lookupTable(i, j, k); + Kokkos::deep_copy(lookupTable, h_lookupTable); +} +/* ---------------------------------------------------------------------- */ +template +KOKKOS_INLINE_FUNCTION +void PairPACEExtrapolationKokkos::SplineInterpolatorKokkos::calcSplines(const int ii, const int jj, const double r, const t_ace_3d &d_values, const t_ace_3d &d_derivatives) const +{ + double wl, wl2, wl3, w2l1, w3l2; + double c[4]; + double x = r * rscalelookup; + int nl = static_cast(floor(x)); + + if (nl <= 0) + Kokkos::abort("Encountered very small distance. Stopping."); + + if (nl < nlut) { + wl = x - double(nl); + wl2 = wl * wl; + wl3 = wl2 * wl; + w2l1 = 2.0 * wl; + w3l2 = 3.0 * wl2; + for (int func_id = 0; func_id < num_of_functions; func_id++) { + for (int idx = 0; idx < 4; idx++) + c[idx] = lookupTable(nl, func_id, idx); + d_values(ii, jj, func_id) = c[0] + c[1] * wl + c[2] * wl2 + c[3] * wl3; + d_derivatives(ii, jj, func_id) = (c[1] + c[2] * w2l1 + c[3] * w3l2) * rscalelookup; + } + } else { // fill with zeroes + for (int func_id = 0; func_id < num_of_functions; func_id++) { + d_values(ii, jj, func_id) = 0.0; + d_derivatives(ii, jj, func_id) = 0.0; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +template +void PairPACEExtrapolationKokkos::check_team_size_for(int inum, int &team_size, int vector_length) { + int team_size_max; + + team_size_max = Kokkos::TeamPolicy(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag()); + + if (team_size*vector_length > team_size_max) + team_size = team_size_max/vector_length; +} + +/* ---------------------------------------------------------------------- */ + +template +template +void PairPACEExtrapolationKokkos::check_team_size_reduce(int inum, int &team_size, int vector_length) { + int team_size_max; + + team_size_max = Kokkos::TeamPolicy(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelReduceTag()); + + if (team_size*vector_length > team_size_max) + team_size = team_size_max/vector_length; +} + +template +template +int PairPACEExtrapolationKokkos::scratch_size_helper(int values_per_team) { + typedef Kokkos::View > ScratchViewType; + + return ScratchViewType::shmem_size(values_per_team); +} + +/* ---------------------------------------------------------------------- + memory usage of arrays +------------------------------------------------------------------------- */ + +template +double PairPACEExtrapolationKokkos::memory_usage() +{ + double bytes = 0; + + bytes += MemKK::memory_usage(A); + bytes += MemKK::memory_usage(A_rank1); + bytes += MemKK::memory_usage(A_list); + bytes += MemKK::memory_usage(A_forward_prod); + bytes += MemKK::memory_usage(e_atom); + bytes += MemKK::memory_usage(rhos); + bytes += MemKK::memory_usage(dF_drho); + bytes += MemKK::memory_usage(weights); + bytes += MemKK::memory_usage(weights_rank1); + bytes += MemKK::memory_usage(rho_core); + bytes += MemKK::memory_usage(dF_drho_core); + bytes += MemKK::memory_usage(dB_flatten); + bytes += MemKK::memory_usage(fr); + bytes += MemKK::memory_usage(dfr); + bytes += MemKK::memory_usage(gr); + bytes += MemKK::memory_usage(dgr); + bytes += MemKK::memory_usage(d_values); + bytes += MemKK::memory_usage(d_derivatives); + bytes += MemKK::memory_usage(cr); + bytes += MemKK::memory_usage(dcr); + bytes += MemKK::memory_usage(plm); + bytes += MemKK::memory_usage(dplm); + bytes += MemKK::memory_usage(ylm); + bytes += MemKK::memory_usage(dylm); + bytes += MemKK::memory_usage(d_ncount); + bytes += MemKK::memory_usage(d_mu); + bytes += MemKK::memory_usage(d_rhats); + bytes += MemKK::memory_usage(d_rnorms); + bytes += MemKK::memory_usage(d_nearest); + bytes += MemKK::memory_usage(f_ij); + bytes += MemKK::memory_usage(d_rho_core_cutoff); + bytes += MemKK::memory_usage(d_drho_core_cutoff); + bytes += MemKK::memory_usage(d_E0vals); + bytes += MemKK::memory_usage(d_ndensity); + bytes += MemKK::memory_usage(d_npoti); + bytes += MemKK::memory_usage(d_wpre); + bytes += MemKK::memory_usage(d_mexp); + bytes += MemKK::memory_usage(d_idx_ms_combs_count); + bytes += MemKK::memory_usage(d_rank); + bytes += MemKK::memory_usage(d_num_ms_combs); + bytes += MemKK::memory_usage(d_func_inds); + bytes += MemKK::memory_usage(d_mus); + bytes += MemKK::memory_usage(d_ns); + bytes += MemKK::memory_usage(d_ls); + bytes += MemKK::memory_usage(d_ms_combs); + bytes += MemKK::memory_usage(d_gen_cgs); + bytes += MemKK::memory_usage(d_coeffs); + bytes += MemKK::memory_usage(alm); + bytes += MemKK::memory_usage(blm); + bytes += MemKK::memory_usage(cl); + bytes += MemKK::memory_usage(dl); + bytes += MemKK::memory_usage(d_total_basis_size); + bytes += MemKK::memory_usage(d_ASI); + bytes += MemKK::memory_usage(projections); + bytes += MemKK::memory_usage(d_gamma); + + if (k_splines_gk.h_view.data()) { + for (int i = 0; i < nelements; i++) { + for (int j = 0; j < nelements; j++) { + bytes += k_splines_gk.h_view(i, j).memory_usage(); + bytes += k_splines_rnl.h_view(i, j).memory_usage(); + bytes += k_splines_hc.h_view(i, j).memory_usage(); + } + } + } + + return bytes; +} + +/* ---------------------------------------------------------------------- + extract method for extracting value of scale variable + ---------------------------------------------------------------------- */ + +template +void *PairPACEExtrapolationKokkos::extract(const char *str, int &dim) +{ + //check if str=="gamma_flag" then compute extrapolation grades on this iteration + dim = 0; + if (strcmp(str, "gamma_flag") == 0) return (void *) &gamma_flag; + + dim = 2; + if (strcmp(str, "scale") == 0) return (void *) scale; + return nullptr; +} + +/* ---------------------------------------------------------------------- + peratom requests from FixPair + return ptr to requested data + also return ncol = # of quantites per atom + 0 = per-atom vector + 1 or more = # of columns in per-atom array + return NULL if str is not recognized +---------------------------------------------------------------------- */ + +template +void *PairPACEExtrapolationKokkos::extract_peratom(const char *str, int &ncol) +{ + if (strcmp(str, "gamma") == 0) { + ncol = 0; + return (void *) extrapolation_grade_gamma; + } + + return nullptr; +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class PairPACEExtrapolationKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairPACEExtrapolationKokkos; +#endif +} + diff --git a/src/KOKKOS/pair_pace_extrapolation_kokkos.h b/src/KOKKOS/pair_pace_extrapolation_kokkos.h new file mode 100644 index 0000000000..a65841f8f6 --- /dev/null +++ b/src/KOKKOS/pair_pace_extrapolation_kokkos.h @@ -0,0 +1,339 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(pace/extrapolation/kk,PairPACEExtrapolationKokkos); +PairStyle(pace/extrapolation/kk/device,PairPACEExtrapolationKokkos); +PairStyle(pace/extrapolation/kk/host,PairPACEExtrapolationKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_PACE_EXTRAPOLATION_KOKKOS_H +#define LMP_PAIR_PACE_EXTRAPOLATION_KOKKOS_H + +#include "pair_pace_extrapolation.h" +#include "kokkos_type.h" +#include "pair_kokkos.h" + +class SplineInterpolator; + +namespace LAMMPS_NS { + +template +class PairPACEExtrapolationKokkos : public PairPACEExtrapolation { + public: + struct TagPairPACEComputeNeigh{}; + struct TagPairPACEComputeRadial{}; + struct TagPairPACEComputeYlm{}; + struct TagPairPACEComputeAi{}; + struct TagPairPACEConjugateAi{}; + struct TagPairPACEComputeRho{}; + struct TagPairPACEComputeFS{}; + struct TagPairPACEComputeGamma{}; + struct TagPairPACEComputeWeights{}; + struct TagPairPACEComputeDerivative{}; + + template + struct TagPairPACEComputeForce{}; + + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + using complex = SNAComplex; + + PairPACEExtrapolationKokkos(class LAMMPS *); + ~PairPACEExtrapolationKokkos() override; + + void compute(int, int) override; + void coeff(int, char **) override; + void init_style() override; + double init_one(int, int) override; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeNeigh,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeRadial,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeYlm,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeAi,const typename Kokkos::TeamPolicy::member_type& team) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEConjugateAi,const int& ii) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeRho,const int& iter) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeFS,const int& ii) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeGamma, const int& ii) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeWeights,const int& iter) const; + + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeDerivative,const typename Kokkos::TeamPolicy::member_type& team) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeForce,const int& ii) const; + + template + KOKKOS_INLINE_FUNCTION + void operator() (TagPairPACEComputeForce,const int& ii, EV_FLOAT&) const; + + + void *extract(const char *str, int &dim); + void *extract_peratom(const char *str, int &ncol); + + protected: + int inum, maxneigh, chunk_size, chunk_offset, idx_ms_combs_max, total_num_functions_max; + int host_flag; + int gamma_flag; + + int eflag, vflag; + + int neighflag, max_ndensity; + int nelements, lmax, nradmax, nradbase; + + typename AT::t_neighbors_2d d_neighbors; + typename AT::t_int_1d_randomread d_ilist; + typename AT::t_int_1d_randomread d_numneigh; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + + typedef Kokkos::DualView tdual_fparams; + tdual_fparams k_cutsq, k_scale; + typedef Kokkos::View t_fparams; + t_fparams d_cutsq, d_scale; + + typename AT::t_int_1d d_map; + + int need_dup; + + using KKDeviceType = typename KKDevice::value; + + template + using DupScatterView = KKScatterView; + + template + using NonDupScatterView = KKScatterView; + + DupScatterView dup_f; + DupScatterView dup_vatom; + + NonDupScatterView ndup_f; + NonDupScatterView ndup_vatom; + + friend void pair_virial_fdotr_compute(PairPACEExtrapolationKokkos*); + + void grow(int, int); + void copy_pertype(); + void copy_splines(); + void copy_tilde(); + void allocate() override; + void precompute_harmonics(); + double memory_usage() override; + + template + KOKKOS_INLINE_FUNCTION + void v_tally_xyz(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &fx, const F_FLOAT &fy, const F_FLOAT &fz, + const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const; + + KOKKOS_INLINE_FUNCTION + void compute_barplm(int, int, double, int) const; + + KOKKOS_INLINE_FUNCTION + void compute_ylm(int, int, double, double, double, int) const; + + KOKKOS_INLINE_FUNCTION + void cutoff_func_poly(const double, const double, const double, double &, double &) const; + + KOKKOS_INLINE_FUNCTION + void Fexp(const double, const double, double &, double &) const; + + KOKKOS_INLINE_FUNCTION + void FexpShiftedScaled(const double, const double, double &, double &) const; + + KOKKOS_INLINE_FUNCTION + void inner_cutoff(const double, const double, const double, double &, double &) const; + + KOKKOS_INLINE_FUNCTION + void FS_values_and_derivatives(const int, double&, const int) const; + + KOKKOS_INLINE_FUNCTION + void evaluate_splines(const int, const int, double, int, int, int, int) const; + + template + void check_team_size_for(int, int&, int); + + template + void check_team_size_reduce(int, int&, int); + + // Utility routine which wraps computing per-team scratch size requirements for + // ComputeNeigh, ComputeUi, and ComputeFusedDeidrj + template + int scratch_size_helper(int values_per_team); + + typedef Kokkos::View t_ace_1i; + typedef Kokkos::View t_ace_2i; + typedef Kokkos::View t_ace_3i; + typedef Kokkos::View t_ace_4i; + typedef Kokkos::View t_ace_1d; + typedef Kokkos::View t_ace_2d; + typedef Kokkos::View t_ace_2d3; + typedef Kokkos::View t_ace_3d; + typedef Kokkos::View tc_ace_3d; + typedef Kokkos::View t_ace_3d3; + typedef Kokkos::View t_ace_3d4; + typedef Kokkos::View t_ace_4d; + typedef Kokkos::View t_ace_1c; + typedef Kokkos::View t_ace_2c; + typedef Kokkos::View t_ace_3c; + typedef Kokkos::View t_ace_3c3; + typedef Kokkos::View t_ace_4c; + typedef Kokkos::View t_ace_4c3; + + typedef Kokkos::View::HostMirror th_ace_1d; + + t_ace_3d A_rank1; + t_ace_4c A; + + t_ace_3c A_list; + t_ace_3c A_forward_prod; + + t_ace_3d weights_rank1; + t_ace_4c weights; + + t_ace_1d e_atom; + t_ace_2d rhos; + t_ace_2d dF_drho; + + // hard-core repulsion + t_ace_1d rho_core; + t_ace_3c dB_flatten; + t_ace_2d cr; + t_ace_2d dcr; + t_ace_1d dF_drho_core; + + // radial functions + t_ace_4d fr; + t_ace_4d dfr; + t_ace_3d gr; + t_ace_3d dgr; + t_ace_3d d_values; + t_ace_3d d_derivatives; + + // inverted active set + tc_ace_3d d_ASI; + t_ace_2d projections; + t_ace_1d d_gamma; + th_ace_1d h_gamma; + + // Spherical Harmonics + void pre_compute_harmonics(int); + + KOKKOS_INLINE_FUNCTION + void compute_barplm(double rz, int lmaxi); + + KOKKOS_INLINE_FUNCTION + void compute_ylm(double rx, double ry, double rz, int lmaxi); + + t_ace_1d alm; + t_ace_1d blm; + t_ace_1d cl; + t_ace_1d dl; + + t_ace_3d plm; + t_ace_3d dplm; + + t_ace_3c ylm; + t_ace_4c3 dylm; + + // short neigh list + t_ace_1i d_ncount; + t_ace_2d d_mu; + t_ace_2d d_rnorms; + t_ace_3d3 d_rhats; + t_ace_2i d_nearest; + + // per-type + t_ace_1i d_ndensity; + t_ace_1i d_npoti; + t_ace_1d d_rho_core_cutoff; + t_ace_1d d_drho_core_cutoff; + t_ace_1d d_E0vals; + t_ace_2d d_wpre; + t_ace_2d d_mexp; + + // tilde + t_ace_1i d_idx_ms_combs_count; + t_ace_1i d_total_basis_size; + t_ace_2i d_rank; + t_ace_2i d_num_ms_combs; + t_ace_2i d_func_inds; + t_ace_3i d_mus; + t_ace_3i d_ns; + t_ace_3i d_ls; + t_ace_3i d_ms_combs; +// t_ace_3d d_ctildes; + t_ace_2d d_gen_cgs; + t_ace_3d d_coeffs; + + t_ace_3d3 f_ij; + + public: + struct SplineInterpolatorKokkos { + int ntot, nlut, num_of_functions; + double cutoff, deltaSplineBins, invrscalelookup, rscalelookup; + + t_ace_3d4 lookupTable; + + void operator=(const SplineInterpolator &spline); + + void deallocate() { + lookupTable = t_ace_3d4(); + } + + double memory_usage() { + return lookupTable.span() * sizeof(typename decltype(lookupTable)::value_type); + } + + KOKKOS_INLINE_FUNCTION + void calcSplines(const int ii, const int jj, const double r, const t_ace_3d &d_values, const t_ace_3d &d_derivatives) const; + }; + + Kokkos::DualView k_splines_gk; + Kokkos::DualView k_splines_rnl; + Kokkos::DualView k_splines_hc; + +}; +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp index dd5b97fe32..6f1e3feaf8 100644 --- a/src/KOKKOS/pair_pace_kokkos.cpp +++ b/src/KOKKOS/pair_pace_kokkos.cpp @@ -1574,7 +1574,25 @@ void PairPACEKokkos::evaluate_splines(const int ii, const int jj, do } /* ---------------------------------------------------------------------- */ +template +void PairPACEKokkos::SplineInterpolatorKokkos::operator=(const SplineInterpolator &spline) { + cutoff = spline.cutoff; + deltaSplineBins = spline.deltaSplineBins; + ntot = spline.ntot; + nlut = spline.nlut; + invrscalelookup = spline.invrscalelookup; + rscalelookup = spline.rscalelookup; + num_of_functions = spline.num_of_functions; + lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions); + auto h_lookupTable = Kokkos::create_mirror_view(lookupTable); + for (int i = 0; i < ntot+1; i++) + for (int j = 0; j < num_of_functions; j++) + for (int k = 0; k < 4; k++) + h_lookupTable(i, j, k) = spline.lookupTable(i, j, k); + Kokkos::deep_copy(lookupTable, h_lookupTable); +} +/* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void PairPACEKokkos::SplineInterpolatorKokkos::calcSplines(const int ii, const int jj, const double r, const t_ace_3d &d_values, const t_ace_3d &d_derivatives) const diff --git a/src/KOKKOS/pair_pace_kokkos.h b/src/KOKKOS/pair_pace_kokkos.h index cc2a3cb674..39cfd100f8 100644 --- a/src/KOKKOS/pair_pace_kokkos.h +++ b/src/KOKKOS/pair_pace_kokkos.h @@ -24,10 +24,11 @@ PairStyle(pace/kk/host,PairPACEKokkos); #define LMP_PAIR_PACE_KOKKOS_H #include "pair_pace.h" -#include "ace-evaluator/ace_radial.h" #include "kokkos_type.h" #include "pair_kokkos.h" +class SplineInterpolator; + namespace LAMMPS_NS { template @@ -293,23 +294,7 @@ class PairPACEKokkos : public PairPACE { t_ace_3d4 lookupTable; - void operator=(const SplineInterpolator &spline) { - cutoff = spline.cutoff; - deltaSplineBins = spline.deltaSplineBins; - ntot = spline.ntot; - nlut = spline.nlut; - invrscalelookup = spline.invrscalelookup; - rscalelookup = spline.rscalelookup; - num_of_functions = spline.num_of_functions; - - lookupTable = t_ace_3d4("lookupTable", ntot+1, num_of_functions); - auto h_lookupTable = Kokkos::create_mirror_view(lookupTable); - for (int i = 0; i < ntot+1; i++) - for (int j = 0; j < num_of_functions; j++) - for (int k = 0; k < 4; k++) - h_lookupTable(i, j, k) = spline.lookupTable(i, j, k); - Kokkos::deep_copy(lookupTable, h_lookupTable); - } + void operator=(const SplineInterpolator &spline); void deallocate() { lookupTable = t_ace_3d4(); diff --git a/src/ML-PACE/pair_pace_extrapolation.cpp b/src/ML-PACE/pair_pace_extrapolation.cpp index ec185e75df..8a0116526a 100644 --- a/src/ML-PACE/pair_pace_extrapolation.cpp +++ b/src/ML-PACE/pair_pace_extrapolation.cpp @@ -98,6 +98,8 @@ PairPACEExtrapolation::PairPACEExtrapolation(LAMMPS *lmp) : Pair(lmp) scale = nullptr; flag_compute_extrapolation_grade = 0; extrapolation_grade_gamma = nullptr; + + chunksize = 4096; } /* ---------------------------------------------------------------------- @@ -133,16 +135,12 @@ void PairPACEExtrapolation::compute(int eflag, int vflag) double **x = atom->x; double **f = atom->f; - tagint *tag = atom->tag; int *type = atom->type; // number of atoms in cell int nlocal = atom->nlocal; int newton_pair = force->newton_pair; - // number of atoms including ghost atoms - int nall = nlocal + atom->nghost; - // inum: length of the neighborlists list inum = list->inum; @@ -283,7 +281,20 @@ void PairPACEExtrapolation::allocate() void PairPACEExtrapolation::settings(int narg, char **arg) { - if (narg > 0) error->all(FLERR, "Pair style pace/extrapolation supports no keywords"); +// if (narg > 2) error->all(FLERR, "Pair style pace/extrapolation supports no keywords"); + if (narg > 2) utils::missing_cmd_args(FLERR, "pair_style pace/extrapolation", error); + // ACE potentials are parameterized in metal units + if (strcmp("metal", update->unit_style) != 0) + error->all(FLERR, "ACE potentials require 'metal' units"); + + int iarg = 0; + while (iarg < narg) { + if (strcmp(arg[iarg], "chunksize") == 0) { + chunksize = utils::inumeric(FLERR, arg[iarg + 1], false, lmp); + iarg += 2; + } else + error->all(FLERR, "Unknown pair_style pace keyword: {}", arg[iarg]); + } if (comm->me == 0) utils::logmesg(lmp, "ACE/AL version: {}.{}.{}\n", VERSION_YEAR, VERSION_MONTH, VERSION_DAY); @@ -343,7 +354,6 @@ void PairPACEExtrapolation::coeff(int narg, char **arg) aceimpl->rec_ace->element_type_mapping.init(atom->ntypes + 1); aceimpl->rec_ace->element_type_mapping.fill(-1); //-1 means atom not included into potential - FILE *species_type_file = nullptr; const int n = atom->ntypes; element_names.resize(n); diff --git a/src/ML-PACE/pair_pace_extrapolation.h b/src/ML-PACE/pair_pace_extrapolation.h index c5d9da23db..6f7eeb279e 100644 --- a/src/ML-PACE/pair_pace_extrapolation.h +++ b/src/ML-PACE/pair_pace_extrapolation.h @@ -49,13 +49,15 @@ class PairPACEExtrapolation : public Pair { struct ACEALImpl *aceimpl; int nmax; - void allocate(); + virtual void allocate(); std::vector element_names; // list of elements (used by dump pace/extrapolation) double *extrapolation_grade_gamma; //per-atom gamma value int flag_compute_extrapolation_grade; double **scale; + + int chunksize; }; } // namespace LAMMPS_NS diff --git a/src/fix_pair.cpp b/src/fix_pair.cpp index 5e5ccbf810..59f9dbda4a 100644 --- a/src/fix_pair.cpp +++ b/src/fix_pair.cpp @@ -21,6 +21,7 @@ #include "memory.h" #include "pair.h" #include "update.h" +#include "fmt/format.h" using namespace LAMMPS_NS; using namespace FixConst; @@ -36,7 +37,7 @@ FixPair::FixPair(LAMMPS *lmp, int narg, char **arg) : if (nevery < 1) error->all(FLERR,"Illegal fix pair every value: {}", nevery); pairname = utils::strdup(arg[4]); - pstyle = force->pair_match(pairname,1,0); + query_pstyle(lmp); if (pstyle == nullptr) error->all(FLERR,"Pair style {} for fix pair not found", pairname); nfield = (narg-5) / 2; @@ -130,6 +131,28 @@ FixPair::FixPair(LAMMPS *lmp, int narg, char **arg) : lasttime = -1; } +/* ---------------------------------------------------------------------- */ + +void FixPair::query_pstyle(LAMMPS *lmp) { + char *cptr=nullptr; + int nsub = 0; + if ((cptr = strchr(pairname, ':'))) { + *cptr = '\0'; + nsub = utils::inumeric(FLERR,cptr+1,false,lmp); + } + pstyle = nullptr; + if (lmp->suffix_enable) { + if (lmp->suffix) { + pstyle = force->pair_match(fmt::format("{}/{}", pairname, lmp->suffix), 1, nsub); + if (pstyle == nullptr && (lmp->suffix2)) { + pstyle = force->pair_match(fmt::format("{}/{}", pairname, lmp->suffix2), 1, nsub); + } + } + } + if (pstyle == nullptr) pstyle = force->pair_match(pairname, 1, nsub); +} + + /* ---------------------------------------------------------------------- */ FixPair::~FixPair() @@ -171,7 +194,7 @@ void FixPair::init() { // insure pair style still exists - pstyle = force->pair_match(pairname,1,0); + query_pstyle(lmp); if (pstyle == nullptr) error->all(FLERR,"Pair style {} for fix pair not found", pairname); } diff --git a/src/fix_pair.h b/src/fix_pair.h index 765b261ee5..57fbdf8ecb 100644 --- a/src/fix_pair.h +++ b/src/fix_pair.h @@ -56,6 +56,8 @@ class FixPair : public Fix { class Pair *pstyle; double *vector; double **array; + + void query_pstyle(LAMMPS *lmp); }; } // namespace LAMMPS_NS