diff --git a/doc/src/fix_acks2_reaxff.rst b/doc/src/fix_acks2_reaxff.rst new file mode 100644 index 0000000000..d4b176ad22 --- /dev/null +++ b/doc/src/fix_acks2_reaxff.rst @@ -0,0 +1,97 @@ +.. index:: fix acks2/reaxff + +fix acks2/reaxff command +==================== + +Syntax +"""""" + +.. parsed-literal:: + + fix ID group-ID acks2/reaxff Nevery cutlo cuthi tolerance params args + +* ID, group-ID are documented in :doc:`fix ` command +* acks2/reaxff = style name of this fix command +* Nevery = perform QEq every this many steps +* cutlo,cuthi = lo and hi cutoff for Taper radius +* tolerance = precision to which charges will be equilibrated +* params = reaxff or a filename + +Examples +"""""""" + +.. code-block:: LAMMPS + + fix 1 all acks2/reaxff 1 0.0 10.0 1.0e-6 reaxff + fix 1 all acks2/reaxff 1 0.0 10.0 1.0e-6 param.acks2 + +Description +""""""""""" + +Perform the charge equilibration (QEq) method as described in :ref:`(Verstraelen) `. It is +typically used in conjunction with the ReaxFF force field model as +implemented in the :doc:`pair_style reaxff ` command, but +it can be used with any potential in LAMMPS, so long as it defines and +uses charges on each atom. For more technical details about the +charge equilibration performed by fix acks2/reaxff, see the +:ref:`(O'Hearn) ` paper. + +The QEq method minimizes the electrostatic energy of the system by +adjusting the partial charge on individual atoms based on interactions +with their neighbors. It requires some parameters for each atom type. +If the *params* setting above is the word "reaxff", then these are +extracted from the :doc:`pair_style reaxff ` command and +the ReaxFF force field file it reads in. If a file name is specified +for *params*\ , then the parameters are taken from the specified file +and the file must contain one line for each atom type. The latter +form must be used when performing QeQ with a non-ReaxFF potential. +Each line should be formatted as follows: + +.. parsed-literal:: + + bond_softness + itype chi eta gamma bcut + +where the first line is the global parameter *bond_softness*. The remaining +1 to Ntypes lines include *itype*, the atom type from 1 to Ntypes, *chi*, the +electronegativity in eV, *eta*, the self-Coulomb +potential in eV, *gamma*, the valence orbital +exponent, and *bcut*, the bond cutoff distance. Note that these 4 quantities are also in the ReaxFF +potential file, except that eta is defined here as twice the eta value +in the ReaxFF file. Note that unlike the rest of LAMMPS, the units +of this fix are hard-coded to be A, eV, and electronic charge. + +**Restart, fix_modify, output, run start/stop, minimize info:** + +No information about this fix is written to :doc:`binary restart files `. No global scalar or vector or per-atom +quantities are stored by this fix for access by various :doc:`output commands `. No parameter of this fix can be used +with the *start/stop* keywords of the :doc:`run ` command. + +This fix is invoked during :doc:`energy minimization `. + +Restrictions +"""""""""""" + +This fix is part of the REAXFF package. It is only enabled if +LAMMPS was built with that package. See the :doc:`Build package ` doc page for more info. + +This fix does not correctly handle interactions +involving multiple periodic images of the same atom. Hence, it should not +be used for periodic cell dimensions less than 10 angstroms. + +Related commands +"""""""""""""""" + +:doc:`pair_style reaxff ` + +**Default:** none + +---------- + +.. _O'Hearn: + +**(O'Hearn)** O'Hearn, Alperen, Aktulga, SIAM J. Sci. Comput., 42(1), C1–C22 (2020). + +.. _Verstraelen: + +**(Verstraelen)** Verstraelen, Ayers, Speybroeck, Waroquier, J. Chem. Phys. 138, 074108 (2013). diff --git a/doc/src/pair_reaxff.rst b/doc/src/pair_reaxff.rst index a3a9c81cb8..be49f565c2 100644 --- a/doc/src/pair_reaxff.rst +++ b/doc/src/pair_reaxff.rst @@ -20,7 +20,7 @@ Syntax .. parsed-literal:: keyword = *checkqeq* or *lgvdw* or *safezone* or *mincap* or *minhbonds* - *checkqeq* value = *yes* or *no* = whether or not to require qeq/reaxff fix + *checkqeq* value = *yes* or *no* = whether or not to require qeq/reaxff or acks2/reax fix *enobonds* value = *yes* or *no* = whether or not to tally energy of atoms with no bonds *lgvdw* value = *yes* or *no* = whether or not to use a low gradient vdW correction *safezone* = factor used for array allocation @@ -119,7 +119,8 @@ The ReaxFF parameter files provided were created using a charge equilibration (QEq) model for handling the electrostatic interactions. Therefore, by default, LAMMPS requires that either the :doc:`fix qeq/reaxff ` or the -:doc:`fix qeq/shielded ` command be used with +:doc:`fix qeq/shielded ` or :doc:`fix acks2/reaxff ` +command be used with *pair_style reaxff* when simulating a ReaxFF model, to equilibrate the charges each timestep. @@ -128,7 +129,8 @@ for the QEq fixes, allowing a simulation to be run without charge equilibration. In this case, the static charges you assign to each atom will be used for computing the electrostatic interactions in the system. See the :doc:`fix qeq/reaxff ` or -:doc:`fix qeq/shielded ` command documentation for more details. +:doc:`fix qeq/shielded ` or :doc:`fix acks2/reaxff ` +command documentation for more details. Using the optional keyword *lgvdw* with the value *yes* turns on the low-gradient correction of ReaxFF for long-range London Dispersion, @@ -352,7 +354,8 @@ Related commands """""""""""""""" :doc:`pair_coeff `, :doc:`fix qeq/reaxff `, -:doc:`fix reaxff/bonds `, :doc:`fix reaxff/species ` +:doc:`fix acks2/reax `, :doc:`fix reaxff/bonds `, +:doc:`fix reaxff/species ` Default """"""" diff --git a/src/.gitignore b/src/.gitignore index 6c0a838c1b..2fc68c0e5d 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -613,6 +613,8 @@ /fft3d_wrap.h /fix_accelerate_cos.cpp /fix_accelerate_cos.h +/fix_acks2_reaxff.cpp +/fix_acks2_reaxff.h /fix_adapt_fep.cpp /fix_adapt_fep.h /fix_addtorque.cpp diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index e23dd11500..412cae72a4 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -109,6 +109,8 @@ action domain_kokkos.h action fftdata_kokkos.h fft3d.h action fft3d_kokkos.cpp fft3d.cpp action fft3d_kokkos.h fft3d.h +action fix_acks2_reaxff_kokkos.cpp fix_acks2_reaxff.cpp +action fix_acks2_reaxff_kokkos.h fix_acks2_reaxff.h action fix_deform_kokkos.cpp action fix_deform_kokkos.h action fix_enforce2d_kokkos.cpp diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp new file mode 100644 index 0000000000..c4b6a6b931 --- /dev/null +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.cpp @@ -0,0 +1,1930 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "fix_acks2_reaxff_kokkos.h" +#include +#include "kokkos.h" +#include "atom.h" +#include "atom_masks.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list_kokkos.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "memory_kokkos.h" +#include "error.h" +#include "pair_reaxff_kokkos.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +#define SMALL 0.0001 +#define EV_TO_KCAL_PER_MOL 14.4 + +/* ---------------------------------------------------------------------- */ + +template +FixACKS2ReaxFFKokkos:: +FixACKS2ReaxFFKokkos(LAMMPS *lmp, int narg, char **arg) : + FixACKS2ReaxFF(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK; + datamask_modify = Q_MASK | X_MASK; + + nmax = m_cap = 0; + allocated_flag = 0; + nprev = 4; + + memory->destroy(s_hist); + memory->destroy(s_hist_X); + memory->destroy(s_hist_last); + grow_arrays(atom->nmax); + memoryKK->create_kokkos(k_s_hist_last,s_hist_last,2,nprev,"acks2/reax:s_hist_last"); + d_s_hist_last = k_s_hist_last.template view(); + + d_mfill_offset = typename AT::t_int_scalar("acks2/kk:mfill_offset"); + + comm_me_0_flag = (comm->me == 0); +} + +/* ---------------------------------------------------------------------- */ + +template +FixACKS2ReaxFFKokkos::~FixACKS2ReaxFFKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_s_hist,s_hist); + memoryKK->destroy_kokkos(k_s_hist_X,s_hist_X); + memoryKK->destroy_kokkos(k_s_hist_last,s_hist_last); + + deallocate_array(); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::init() +{ + atomKK->k_q.modify(); + atomKK->k_q.sync(); + + FixACKS2ReaxFF::init(); + + neighflag = lmp->kokkos->neighflag_qeq; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = std::is_same::value && + !std::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = std::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->fix = 1; + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + } else { //if (neighflag == HALF || neighflag == HALFTHREAD) + neighbor->requests[irequest]->fix = 1; + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->ghost = 1; + } + + int ntypes = atom->ntypes; + k_params = Kokkos::DualView + ("FixACKS2ReaxFF::params",ntypes+1); + params = k_params.template view(); + + for (int n = 1; n <= ntypes; n++) { + k_params.h_view(n).chi = chi[n]; + k_params.h_view(n).eta = eta[n]; + k_params.h_view(n).gamma = gamma[n]; + k_params.h_view(n).bcut_acks2 = bcut_acks2[n]; + } + k_params.template modify(); + + cutsq = swb * swb; + + init_shielding_k(); + init_hist(); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::init_shielding_k() +{ + int i,j; + int ntypes = atom->ntypes; + + k_shield = DAT::tdual_ffloat_2d("acks2/kk:shield",ntypes+1,ntypes+1); + d_shield = k_shield.template view(); + + for( i = 1; i <= ntypes; ++i ) + for( j = 1; j <= ntypes; ++j ) + k_shield.h_view(i,j) = pow( gamma[i] * gamma[j], -1.5 ); + + k_shield.template modify(); + k_shield.template sync(); + + k_bcut = DAT::tdual_ffloat_2d("acks2/kk:bcut",ntypes+1,ntypes+1); + d_bcut = k_bcut.template view(); + + for( i = 1; i <= ntypes; ++i ) + for( j = 1; j <= ntypes; ++j ) + k_bcut.h_view(i,j) = 0.5*(bcut_acks2[i] + bcut_acks2[j]); + + k_bcut.template modify(); + k_bcut.template sync(); + + k_tap = DAT::tdual_ffloat_1d("acks2/kk:tap",8); + d_tap = k_tap.template view(); + + for (i = 0; i < 8; i ++) + k_tap.h_view(i) = Tap[i]; + + k_tap.template modify(); + k_tap.template sync(); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::init_hist() +{ + k_s_hist.clear_sync_state(); + k_s_hist_X.clear_sync_state(); + k_s_hist_last.clear_sync_state(); + + Kokkos::deep_copy(d_s_hist,0.0); + Kokkos::deep_copy(d_s_hist_X,0.0); + Kokkos::deep_copy(d_s_hist_last,0.0); + + k_s_hist.template modify(); + k_s_hist_X.template modify(); + k_s_hist_last.template modify(); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::setup_pre_force(int vflag) +{ + pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::pre_force(int vflag) +{ + if (update->ntimestep % nevery) return; + + atomKK->sync(execution_space,datamask_read); + + x = atomKK->k_x.view(); + v = atomKK->k_v.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + tag = atomKK->k_tag.view(); + type = atomKK->k_type.view(); + mask = atomKK->k_mask.view(); + nlocal = atomKK->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + + k_params.template sync(); + k_shield.template sync(); + k_bcut.template sync(); + k_tap.template sync(); + + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + nn = list->inum; + NN = list->inum + list->gnum; + + copymode = 1; + + // allocate + + allocate_array(); + + // get max number of neighbor + + if (!allocated_flag || last_allocate < neighbor->lastcall) { + allocate_matrix(); + last_allocate = update->ntimestep; + } + + // compute_H + + if (execution_space == Host) { // CPU + if (neighflag == FULL) { + FixACKS2ReaxFFKokkosComputeHFunctor computeH_functor(this); + Kokkos::parallel_scan(nn,computeH_functor); + } else { // HALF and HALFTHREAD are the same + FixACKS2ReaxFFKokkosComputeHFunctor computeH_functor(this); + Kokkos::parallel_scan(nn,computeH_functor); + } + } else { // GPU, use teams + Kokkos::deep_copy(d_mfill_offset,0); + + int vector_length = 32; + int atoms_per_team = 4; + int num_teams = nn / atoms_per_team + (nn % atoms_per_team ? 1 : 0); + + Kokkos::TeamPolicy policy(num_teams, atoms_per_team, + vector_length); + + if (neighflag == FULL) { + FixACKS2ReaxFFKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); + } else { // HALF and HALFTHREAD are the same + FixACKS2ReaxFFKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); + } + } + + need_dup = lmp->kokkos->need_dup(1); + + if (need_dup) + dup_X_diag = Kokkos::Experimental::create_scatter_view (d_X_diag); // allocate duplicated memory + else + ndup_X_diag = Kokkos::Experimental::create_scatter_view (d_X_diag); + + // compute_X + + Kokkos::deep_copy(d_X_diag,0.0); + + if (execution_space == Host || 1) { // CPU + if (neighflag == FULL) { + FixACKS2ReaxFFKokkosComputeXFunctor computeX_functor(this); + Kokkos::parallel_scan(nn,computeX_functor); + } else if (neighflag == HALFTHREAD) { + FixACKS2ReaxFFKokkosComputeXFunctor computeX_functor(this); + Kokkos::parallel_scan(nn,computeX_functor); + } else { + FixACKS2ReaxFFKokkosComputeXFunctor computeX_functor(this); + Kokkos::parallel_scan(nn,computeX_functor); + } + } else { // GPU, use teams + Kokkos::deep_copy(d_mfill_offset,0); + + int vector_length = 32; + int atoms_per_team = 4; + int num_teams = nn / atoms_per_team + (nn % atoms_per_team ? 1 : 0); + + Kokkos::TeamPolicy policy(num_teams, atoms_per_team, + vector_length); + if (neighflag == FULL) { + FixACKS2ReaxFFKokkosComputeXFunctor computeX_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeX_functor); + } else if (neighflag == HALFTHREAD) { + FixACKS2ReaxFFKokkosComputeXFunctor computeX_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeX_functor); + } else { + FixACKS2ReaxFFKokkosComputeXFunctor computeX_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeX_functor); + } + } + + if (need_dup) { + Kokkos::Experimental::contribute(d_X_diag, dup_X_diag); + + // free duplicated memory + + dup_X_diag = decltype(dup_X_diag)(); + } + + if (neighflag != FULL) { + pack_flag = 4; + //comm->reverse_comm_fix(this); //Coll_Vector( X_diag ); + k_X_diag.template modify(); + k_X_diag.template sync(); + comm->reverse_comm_fix(this); + k_X_diag.template modify(); + k_X_diag.template sync(); + } + + if (field_flag) + get_chi_field(); + + // init_matvec + + k_s_hist.template sync(); + k_s_hist_X.template sync(); + k_s_hist_last.template sync(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); + + pack_flag = 2; + // comm->forward_comm_fix(this); //Dist_vector( s ); + k_s.template modify(); + k_s.template sync(); + comm->forward_comm_fix(this); + more_forward_comm(k_s.h_view.data()); + k_s.template modify(); + k_s.template sync(); + + // bicgstab solve over b_s, s + + bicgstab_solve(); + + calculate_Q(); + + k_s_hist.template modify(); + k_s_hist_X.template modify(); + k_s_hist_last.template modify(); + + copymode = 0; + + if (!allocated_flag) + allocated_flag = 1; + + atomKK->modified(execution_space,datamask_modify); + k_s.modify(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::num_neigh_item(int ii, int &maxneigh) const +{ + const int i = d_ilist[ii]; + maxneigh += d_numneigh[i]; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::allocate_matrix() +{ + nmax = atom->nmax; + + // determine the total space for the H matrix + + m_cap = 0; + FixACKS2ReaxFFKokkosNumNeighFunctor neigh_functor(this); + Kokkos::parallel_reduce(nn,neigh_functor,m_cap); + + // H matrix + + d_firstnbr = typename AT::t_int_1d("acks2/kk:firstnbr",nmax); + d_numnbrs = typename AT::t_int_1d("acks2/kk:numnbrs",nmax); + d_jlist = typename AT::t_int_1d("acks2/kk:jlist",m_cap); + d_val = typename AT::t_ffloat_1d("acks2/kk:val",m_cap); + + // X matrix + + d_firstnbr_X = typename AT::t_int_1d("acks2/kk:firstnbr_X",nmax); + d_numnbrs_X = typename AT::t_int_1d("acks2/kk:numnbrs_X",nmax); + d_jlist_X = typename AT::t_int_1d("acks2/kk:jlist_X",m_cap); + d_val_X = typename AT::t_ffloat_1d("acks2/kk:val_X",m_cap); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::allocate_array() +{ + // 0 to nn-1: owned atoms related to H matrix + // nn to NN-1: ghost atoms related to H matrix + // NN to NN+nn-1: owned atoms related to X matrix + // NN+nn to 2*NN-1: ghost atoms related X matrix + // 2*NN to 2*NN+1: last two rows, owned by proc 0 + + if (atom->nmax > nmax) { + nmax = atom->nmax; + int size = nmax*2 + 2; + + d_q = typename AT::t_ffloat_1d("acks2/kk:q",size); + + memoryKK->create_kokkos(k_s,s,size,"acks2/kk:s"); + d_s = k_s.template view(); + + d_b_s = typename AT::t_ffloat_1d("acks2/kk:b_s",size); + + d_Hdia_inv = typename AT::t_ffloat_1d("acks2/kk:Hdia_inv",nmax); + + memoryKK->create_kokkos(k_chi_field,chi_field,nmax,"acks2/kk:chi_field"); + d_chi_field = k_chi_field.template view(); + + memoryKK->create_kokkos(k_X_diag,X_diag,nmax,"acks2/kk:X_diag"); + d_X_diag = k_X_diag.template view(); + + d_Xdia_inv = typename AT::t_ffloat_1d("acks2/kk:Xdia_inv",nmax); + + d_p = typename AT::t_ffloat_1d("acks2/kk:p",size); + d_r = typename AT::t_ffloat_1d("acks2/kk:r",size); + + memoryKK->create_kokkos(k_d,d,size,"acks2/kk:d"); + d_d = k_d.template view(); + + d_g = typename AT::t_ffloat_1d("acks2/kk:g",size); + + memoryKK->create_kokkos(k_q_hat,q_hat,size,"acks2/kk:q_hat"); + d_q_hat = k_q_hat.template view(); + + d_r_hat = typename AT::t_ffloat_1d("acks2/kk:r_hat",size); + + memoryKK->create_kokkos(k_y,y,size,"acks2/kk:y"); + d_y = k_y.template view(); + + memoryKK->create_kokkos(k_z,z,size,"acks2/kk:z"); + d_z = k_z.template view(); + } + + if (field_flag) + get_chi_field(); + + // init_storage + Kokkos::parallel_for(Kokkos::RangePolicy(0,NN),*this); + +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::deallocate_array() +{ + memoryKK->destroy_kokkos(k_s,s); + memoryKK->destroy_kokkos(k_chi_field,chi_field); + memoryKK->destroy_kokkos(X_diag); + memoryKK->destroy_kokkos(k_d,d); + memoryKK->destroy_kokkos(k_q_hat,q_hat); + memoryKK->destroy_kokkos(k_y,y); + memoryKK->destroy_kokkos(k_z,z); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Zero, const int &ii) const +{ + const int i = d_ilist[ii]; + const int itype = type(i); + + if (mask[i] & groupbit) { + d_Hdia_inv[i] = 1.0 / params(itype).eta; + d_b_s[i] = -params(itype).chi - d_chi_field[i]; + d_s[i] = 0.0; + d_p[i] = 0.0; + d_r[i] = 0.0; + d_d[i] = 0.0; + } + +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::compute_h_item(int ii, int &m_fill, const bool &final) const +{ + const int i = d_ilist[ii]; + int j,jj,jtype; + + if (mask[i] & groupbit) { + + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + const tagint itag = tag(i); + const int jnum = d_numneigh[i]; + if (final) + d_firstnbr[i] = m_fill; + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + const X_FLOAT delx = x(j,0) - xtmp; + const X_FLOAT dely = x(j,1) - ytmp; + const X_FLOAT delz = x(j,2) - ztmp; + + if (NEIGHFLAG != FULL) { + // skip half of the interactions + const tagint jtag = tag(j); + if (j >= nlocal) { + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp && x(j,1) < ytmp) continue; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue; + } + } + } + + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + if (rsq > cutsq) continue; + + if (final) { + const F_FLOAT r = sqrt(rsq); + d_jlist(m_fill) = j; + const F_FLOAT shldij = d_shield(itype,jtype); + d_val(m_fill) = calculate_H_k(r,shldij); + } + m_fill++; + } + if (final) + d_numnbrs[i] = m_fill - d_firstnbr[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +// Calculate Qeq matrix H where H is a sparse matrix and H[i][j] represents the electrostatic interaction coefficients on atom-i with atom-j +// d_val - contains the non-zero entries of sparse matrix H +// d_numnbrs - d_numnbrs[i] contains the # of non-zero entries in the i-th row of H (which also represents the # of neighbor atoms with electrostatic interaction coefficients with atom-i) +// d_firstnbr- d_firstnbr[i] contains the beginning index from where the H matrix entries corresponding to row-i is stored in d_val +// d_jlist - contains the column index corresponding to each entry in d_val + +template +template +void FixACKS2ReaxFFKokkos::compute_h_team( + const typename Kokkos::TeamPolicy::member_type &team, + int atoms_per_team, int vector_length) const { + + // scratch space setup + Kokkos::View, + Kokkos::MemoryTraits> + s_ilist(team.team_shmem(), atoms_per_team); + Kokkos::View, + Kokkos::MemoryTraits> + s_numnbrs(team.team_shmem(), atoms_per_team); + Kokkos::View, + Kokkos::MemoryTraits> + s_firstnbr(team.team_shmem(), atoms_per_team); + + Kokkos::View, + Kokkos::MemoryTraits> + s_jtype(team.team_shmem(), atoms_per_team, vector_length); + Kokkos::View, + Kokkos::MemoryTraits> + s_jlist(team.team_shmem(), atoms_per_team, vector_length); + Kokkos::View, + Kokkos::MemoryTraits> + s_r(team.team_shmem(), atoms_per_team, vector_length); + + // team of threads work on atoms with index in [firstatom, lastatom) + int firstatom = team.league_rank() * atoms_per_team; + int lastatom = + (firstatom + atoms_per_team < nn) ? (firstatom + atoms_per_team) : nn; + + // kokkos-thread-0 is used to load info from global memory into scratch space + if (team.team_rank() == 0) { + + // copy atom indices from d_ilist[firstatom:lastatom] to scratch space s_ilist[0:atoms_per_team] + // copy # of neighbor atoms for all the atoms with indices in d_ilist[firstatom:lastatom] from d_numneigh to scratch space s_numneigh[0:atoms_per_team] + // calculate total number of neighbor atoms for all atoms assigned to the current team of threads (Note - Total # of neighbor atoms here provides the + // upper bound space requirement to store the H matrix values corresponding to the atoms with indices in d_ilist[firstatom:lastatom]) + + Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team, atoms_per_team), + [&](const int &idx, int &totalnbrs, bool final) { + int ii = firstatom + idx; + + if (ii < nn) { + const int i = d_ilist[ii]; + int jnum = d_numneigh[i]; + + if (final) { + s_ilist[idx] = i; + s_numnbrs[idx] = jnum; + s_firstnbr[idx] = totalnbrs; + } + totalnbrs += jnum; + } else { + s_numnbrs[idx] = 0; + } + }); + } + + // barrier ensures that the data moved to scratch space is visible to all the + // threads of the corresponding team + team.team_barrier(); + + // calculate the global memory offset from where the H matrix values to be + // calculated by the current team will be stored in d_val + int team_firstnbr_idx = 0; + Kokkos::single(Kokkos::PerTeam(team), + [=](int &val) { + int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + + s_numnbrs[lastatom - firstatom - 1]; + val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); + }, + team_firstnbr_idx); + + // map the H matrix computation of each atom to kokkos-thread (one atom per + // kokkos-thread) neighbor computation for each atom is assigned to vector + // lanes of the corresponding thread + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, atoms_per_team), [&](const int &idx) { + int ii = firstatom + idx; + + if (ii < nn) { + const int i = s_ilist[idx]; + + if (mask[i] & groupbit) { + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); + const tagint itag = tag(i); + const int jnum = s_numnbrs[idx]; + + // calculate the write-offset for atom-i's first neighbor + int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + Kokkos::single(Kokkos::PerThread(team), + [&]() { d_firstnbr[i] = atomi_firstnbr_idx; }); + + // current # of neighbor atoms with non-zero electrostatic + // interaction coefficients with atom-i which represents the # of + // non-zero elements in row-i of H matrix + int atomi_nbrs_inH = 0; + + // calculate H matrix values corresponding to atom-i where neighbors + // are processed in batches and the batch size is vector_length + for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { + + int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + + // count the # of neighbor atoms with non-zero electrostatic + // interaction coefficients with atom-i in the current batch + int atomi_nbrs_curbatch = 0; + + // compute rsq, jtype, j and store in scratch space which is + // reused later + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, vector_length), + [&](const int &idx, int &m_fill) { + const int jj = jj_start + idx; + + // initialize: -1 represents no interaction with atom-j + // where j = d_neighbors(i,jj) + s_jlist(team.team_rank(), idx) = -1; + + if (jj < jnum) { + int j = d_neighbors(i, jj); + j &= NEIGHMASK; + const int jtype = type(j); + + const X_FLOAT delx = x(j, 0) - xtmp; + const X_FLOAT dely = x(j, 1) - ytmp; + const X_FLOAT delz = x(j, 2) - ztmp; + + // valid nbr interaction + bool valid = true; + if (NEIGHFLAG != FULL) { + // skip half of the interactions + const tagint jtag = tag(j); + if (j >= nlocal) { + if (itag > jtag) { + if ((itag + jtag) % 2 == 0) + valid = false; + } else if (itag < jtag) { + if ((itag + jtag) % 2 == 1) + valid = false; + } else { + if (x(j, 2) < ztmp) + valid = false; + if (x(j, 2) == ztmp && x(j, 1) < ytmp) + valid = false; + if (x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp) + valid = false; + } + } + } + + const F_FLOAT rsq = + delx * delx + dely * dely + delz * delz; + if (rsq > cutsq) + valid = false; + + if (valid) { + s_jlist(team.team_rank(), idx) = j; + s_jtype(team.team_rank(), idx) = jtype; + s_r(team.team_rank(), idx) = sqrt(rsq); + m_fill++; + } + } + }, + atomi_nbrs_curbatch); + + // write non-zero entries of H to global memory + Kokkos::parallel_scan( + Kokkos::ThreadVectorRange(team, vector_length), + [&](const int &idx, int &m_fill, bool final) { + int j = s_jlist(team.team_rank(), idx); + if (final) { + if (j != -1) { + const int jtype = s_jtype(team.team_rank(), idx); + const F_FLOAT r = s_r(team.team_rank(), idx); + const F_FLOAT shldij = d_shield(itype, jtype); + + d_jlist[atomi_nbr_writeIdx + m_fill] = j; + d_val[atomi_nbr_writeIdx + m_fill] = + calculate_H_k(r, shldij); + } + } + + if (j != -1) { + m_fill++; + } + }); + atomi_nbrs_inH += atomi_nbrs_curbatch; + } + + Kokkos::single(Kokkos::PerThread(team), + [&]() { d_numnbrs[i] = atomi_nbrs_inH; }); + } + } + }); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixACKS2ReaxFFKokkos::calculate_H_k(const F_FLOAT &r, const F_FLOAT &shld) const +{ + F_FLOAT taper, denom; + + taper = d_tap[7] * r + d_tap[6]; + taper = taper * r + d_tap[5]; + taper = taper * r + d_tap[4]; + taper = taper * r + d_tap[3]; + taper = taper * r + d_tap[2]; + taper = taper * r + d_tap[1]; + taper = taper * r + d_tap[0]; + + denom = r * r * r + shld; + denom = pow(denom,0.3333333333333); + + return taper * EV_TO_KCAL_PER_MOL / denom; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::compute_x_item(int ii, int &m_fill, const bool &final) const +{ + // The X_diag array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + auto v_X_diag = ScatterViewHelper::value,decltype(dup_X_diag),decltype(ndup_X_diag)>::get(dup_X_diag,ndup_X_diag); + auto a_X_diag = v_X_diag.template access::value>(); + + const int i = d_ilist[ii]; + int j,jj,jtype; + F_FLOAT tmp = 0.0; + + if (mask[i] & groupbit) { + + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + const tagint itag = tag(i); + const int jnum = d_numneigh[i]; + if (final) + d_firstnbr_X[i] = m_fill; + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + const X_FLOAT delx = x(j,0) - xtmp; + const X_FLOAT dely = x(j,1) - ytmp; + const X_FLOAT delz = x(j,2) - ztmp; + + if (NEIGHFLAG != FULL) { + // skip half of the interactions + const tagint jtag = tag(j); + if (j >= nlocal) { + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp && x(j,1) < ytmp) continue; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue; + } + } + } + + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + if (rsq > cutsq) continue; + + const F_FLOAT bcutoff = d_bcut(itype,jtype); + const F_FLOAT bcutoff2 = bcutoff*bcutoff; + if (rsq > bcutoff2) continue; + + if (final) { + const F_FLOAT r = sqrt(rsq); + d_jlist_X(m_fill) = j; + const F_FLOAT X_val = calculate_X_k(r,bcutoff); + d_val_X(m_fill) = X_val; + tmp -= X_val; + if (NEIGHFLAG != FULL) + a_X_diag[j] -= X_val; + } + m_fill++; + } + if (final) { + a_X_diag[i] += tmp; + d_numnbrs_X[i] = m_fill - d_firstnbr_X[i]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +template +void FixACKS2ReaxFFKokkos::compute_x_team( + const typename Kokkos::TeamPolicy::member_type &team, + int atoms_per_team, int vector_length) const { + + // The X_diag array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + auto v_X_diag = ScatterViewHelper::value,decltype(dup_X_diag),decltype(ndup_X_diag)>::get(dup_X_diag,ndup_X_diag); + auto a_X_diag = v_X_diag.template access::value>(); + + // scratch space setup + Kokkos::View, + Kokkos::MemoryTraits> + s_ilist(team.team_shmem(), atoms_per_team); + Kokkos::View, + Kokkos::MemoryTraits> + s_numnbrs(team.team_shmem(), atoms_per_team); + Kokkos::View, + Kokkos::MemoryTraits> + s_firstnbr(team.team_shmem(), atoms_per_team); + + Kokkos::View, + Kokkos::MemoryTraits> + s_jtype(team.team_shmem(), atoms_per_team, vector_length); + Kokkos::View, + Kokkos::MemoryTraits> + s_jlist(team.team_shmem(), atoms_per_team, vector_length); + Kokkos::View, + Kokkos::MemoryTraits> + s_r(team.team_shmem(), atoms_per_team, vector_length); + + // team of threads work on atoms with index in [firstatom, lastatom) + int firstatom = team.league_rank() * atoms_per_team; + int lastatom = + (firstatom + atoms_per_team < nn) ? (firstatom + atoms_per_team) : nn; + + // kokkos-thread-0 is used to load info from global memory into scratch space + if (team.team_rank() == 0) { + + // copy atom indices from d_ilist[firstatom:lastatom] to scratch space s_ilist[0:atoms_per_team] + // copy # of neighbor atoms for all the atoms with indices in d_ilist[firstatom:lastatom] from d_numneigh to scratch space s_numneigh[0:atoms_per_team] + // calculate total number of neighbor atoms for all atoms assigned to the current team of threads (Note - Total # of neighbor atoms here provides the + // upper bound space requirement to store the H matrix values corresponding to the atoms with indices in d_ilist[firstatom:lastatom]) + + Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team, atoms_per_team), + [&](const int &idx, int &totalnbrs, bool final) { + int ii = firstatom + idx; + + if (ii < nn) { + const int i = d_ilist[ii]; + int jnum = d_numneigh[i]; + + if (final) { + s_ilist[idx] = i; + s_numnbrs[idx] = jnum; + s_firstnbr[idx] = totalnbrs; + } + totalnbrs += jnum; + } else { + s_numnbrs[idx] = 0; + } + }); + } + + // barrier ensures that the data moved to scratch space is visible to all the + // threads of the corresponding team + team.team_barrier(); + + // calculate the global memory offset from where the H matrix values to be + // calculated by the current team will be stored in d_val_X + int team_firstnbr_idx = 0; + Kokkos::single(Kokkos::PerTeam(team), + [=](int &val) { + int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + + s_numnbrs[lastatom - firstatom - 1]; + val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); + }, + team_firstnbr_idx); + + // map the H matrix computation of each atom to kokkos-thread (one atom per + // kokkos-thread) neighbor computation for each atom is assigned to vector + // lanes of the corresponding thread + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, atoms_per_team), [&](const int &idx) { + int ii = firstatom + idx; + + if (ii < nn) { + const int i = s_ilist[idx]; + + if (mask[i] & groupbit) { + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); + const tagint itag = tag(i); + const int jnum = s_numnbrs[idx]; + + // calculate the write-offset for atom-i's first neighbor + int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + Kokkos::single(Kokkos::PerThread(team), + [&]() { d_firstnbr_X[i] = atomi_firstnbr_idx; }); + + // current # of neighbor atoms with non-zero electrostatic + // interaction coefficients with atom-i which represents the # of + // non-zero elements in row-i of H matrix + int atomi_nbrs_inH = 0; + + // calculate H matrix values corresponding to atom-i where neighbors + // are processed in batches and the batch size is vector_length + for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { + + int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + + // count the # of neighbor atoms with non-zero electrostatic + // interaction coefficients with atom-i in the current batch + int atomi_nbrs_curbatch = 0; + + // compute rsq, jtype, j and store in scratch space which is + // reused later + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, vector_length), + [&](const int &idx, int &m_fill) { + const int jj = jj_start + idx; + + // initialize: -1 represents no interaction with atom-j + // where j = d_neighbors(i,jj) + s_jlist(team.team_rank(), idx) = -1; + + if (jj < jnum) { + int j = d_neighbors(i, jj); + j &= NEIGHMASK; + const int jtype = type(j); + + const X_FLOAT delx = x(j, 0) - xtmp; + const X_FLOAT dely = x(j, 1) - ytmp; + const X_FLOAT delz = x(j, 2) - ztmp; + + // valid nbr interaction + bool valid = true; + if (NEIGHFLAG != FULL) { + // skip half of the interactions + const tagint jtag = tag(j); + if (j >= nlocal) { + if (itag > jtag) { + if ((itag + jtag) % 2 == 0) + valid = false; + } else if (itag < jtag) { + if ((itag + jtag) % 2 == 1) + valid = false; + } else { + if (x(j, 2) < ztmp) + valid = false; + if (x(j, 2) == ztmp && x(j, 1) < ytmp) + valid = false; + if (x(j, 2) == ztmp && x(j, 1) == ytmp && + x(j, 0) < xtmp) + valid = false; + } + } + } + + const F_FLOAT rsq = + delx * delx + dely * dely + delz * delz; + if (rsq > cutsq) + valid = false; + + const F_FLOAT bcutoff = d_bcut(itype,jtype); + const F_FLOAT bcutoff2 = bcutoff*bcutoff; + if (rsq > bcutoff2) + valid = false; + + if (valid) { + s_jlist(team.team_rank(), idx) = j; + s_jtype(team.team_rank(), idx) = jtype; + s_r(team.team_rank(), idx) = sqrt(rsq); + m_fill++; + } + } + }, + atomi_nbrs_curbatch); + + // write non-zero entries of H to global memory + Kokkos::parallel_scan( + Kokkos::ThreadVectorRange(team, vector_length), + [&](const int &idx, int &m_fill, bool final) { + int j = s_jlist(team.team_rank(), idx); + if (final) { + if (j != -1) { + const int jtype = s_jtype(team.team_rank(), idx); + const F_FLOAT r = s_r(team.team_rank(), idx); + const F_FLOAT bcutoff = d_bcut(itype, jtype); + + d_jlist_X[atomi_nbr_writeIdx + m_fill] = j; + const F_FLOAT X_val = calculate_X_k(r, bcutoff); + d_val_X[atomi_nbr_writeIdx + m_fill] = + X_val; + a_X_diag[i] -= X_val; + if (NEIGHFLAG != FULL) + a_X_diag[j] -= X_val; + } + } + + if (j != -1) { + m_fill++; + } + }); + atomi_nbrs_inH += atomi_nbrs_curbatch; + } + + Kokkos::single(Kokkos::PerThread(team), + [&]() { d_numnbrs_X[i] = atomi_nbrs_inH; }); + } + } + }); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double FixACKS2ReaxFFKokkos::calculate_X_k( const double &r, const double &bcut) const +{ + const F_FLOAT d = r/bcut; + const F_FLOAT d3 = d*d*d; + const F_FLOAT omd = 1.0 - d; + const F_FLOAT omd2 = omd*omd; + const F_FLOAT omd6 = omd2*omd2*omd2; + + return bond_softness*d3*omd6; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2InitMatvec, const int &ii) const +{ + if (d_X_diag[ii] == 0.0) + d_Xdia_inv[ii] = 1.0; + else + d_Xdia_inv[ii] = 1.0 / d_X_diag[ii]; + + const int i = d_ilist[ii]; + const int itype = type(i); + + if (mask[i] & groupbit) { + d_Hdia_inv[i] = 1.0 / params(itype).eta; + d_b_s[i] = -params(itype).chi - d_chi_field[i]; + d_b_s[NN+i] = 0.0; + + d_s[i] = 4*(d_s_hist(i,0)+d_s_hist(i,2))-(6*d_s_hist(i,1)+d_s_hist(i,3)); + d_s[NN+i] = 4*(d_s_hist_X(i,0)+d_s_hist_X(i,2))-(6*d_s_hist_X(i,1)+d_s_hist_X(i,3)); + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + for (int k = 0; k < 2; k++) { + d_b_s[2*NN+i] = 0.0; + d_s[2*NN+k] = 4*(d_s_hist_last(k,0)+d_s_hist_last(k,2))-(6*d_s_hist_last(k,1)+d_s_hist_last(k,3)); + } + } + +} + +/* ---------------------------------------------------------------------- */ + +template +int FixACKS2ReaxFFKokkos::bicgstab_solve() +{ + int i; + F_FLOAT my_norm,norm_sqr,my_dot,dot_sqr; + double tmp, sigma, rho, rho_old, rnorm, bnorm; + + // sparse_matvec( &H, &X, x, d ); + sparse_matvec_acks2(d_s, d_d); + + pack_flag = 1; + k_d.template modify(); + k_d.template sync(); + if (neighflag != FULL) + comm->reverse_comm_fix(this); //Coll_vector( d ); + more_reverse_comm(k_d.h_view.data()); + k_d.template modify(); + k_d.template sync(); + + // vector_sum( r , 1., b, -1., d, nn ); + // bnorm = parallel_norm( b, nn ); + my_norm = 0.0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nn),*this,my_norm); + norm_sqr = 0.0; + MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + bnorm = sqrt(norm_sqr); + + // rnorm = parallel_norm( r, nn); + my_norm = 0.0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nn),*this,my_norm); + norm_sqr = 0.0; + MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + rnorm = sqrt(norm_sqr); + + if (bnorm == 0.0 ) bnorm = 1.0; + deep_copy(d_r_hat,d_r); + omega = 1.0; + rho = 1.0; + + for (i = 1; i < imax && rnorm / bnorm > tolerance; ++i) { + // rho = parallel_dot( r_hat, r, nn); + my_dot = 0.0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nn),*this,my_dot); + dot_sqr = 0.0; + MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + rho = dot_sqr; + if (rho == 0.0) break; + + if (i > 1) { + beta = (rho / rho_old) * (alpha / omega); + + // vector_sum( p , 1., r, beta, q, nn); + // vector_sum( q , 1., p, -omega, z, nn); + // pre-conditioning + Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); + } else { + + // vector_copy( p , r nn); + // pre-conditioning + Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); + } + + pack_flag = 1; + // comm->forward_comm_fix(this); //Dist_vector( d ); + k_d.template modify(); + k_d.template sync(); + comm->forward_comm_fix(this); + more_forward_comm(k_d.h_view.data()); + k_d.template modify(); + k_d.template sync(); + + // sparse_matvec( &H, &X, d, z ); + sparse_matvec_acks2(d_d, d_z); + + pack_flag = 2; + k_z.template modify(); + k_z.template sync(); + if (neighflag != FULL) + comm->reverse_comm_fix(this); //Coll_vector( z ); + more_reverse_comm(k_z.h_view.data()); + k_z.template modify(); + k_z.template sync(); + + // tmp = parallel_dot( r_hat, z, nn); + my_dot = dot_sqr = 0.0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nn),*this,my_dot); + MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + tmp = dot_sqr; + alpha = rho / tmp; + + // vector_sum( q, 1., r, -alpha, z, nn); + // tmp = parallel_dot( q, q, nn); + my_dot = dot_sqr = 0.0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nn),*this,my_dot); + MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + tmp = dot_sqr; + + // early convergence check + if (tmp < tolerance) { + // vector_add( x, alpha, d, nn); + Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); + break; + } + + // pre-conditioning + Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); + + // sparse_matvec( &H, &X, q_hat, y ); + pack_flag = 3; + // comm->forward_comm_fix(this); //Dist_vector( q_hat ); + k_q_hat.template modify(); + k_q_hat.template sync(); + comm->forward_comm_fix(this); + more_forward_comm(k_q_hat.h_view.data()); + k_q_hat.template modify(); + k_q_hat.template sync(); + + sparse_matvec_acks2(d_q_hat, d_y); + + pack_flag = 3; + k_y.template modify(); + k_y.template sync(); + if (neighflag != FULL) + comm->reverse_comm_fix(this); //Coll_vector( y ); + more_reverse_comm(k_y.h_view.data()); + k_y.template modify(); + k_y.template sync(); + + // sigma = parallel_dot( y, q, nn); + my_dot = dot_sqr = 0.0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nn),*this,my_dot); + MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + sigma = dot_sqr; + + // tmp = parallel_dot( y, y, nn); + my_dot = dot_sqr = 0.0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nn),*this,my_dot); + MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + tmp = dot_sqr; + + omega = sigma / tmp; + + // vector_sum( g , alpha, d, omega, q_hat, nn); + // vector_add( x, 1., g, nn); + // vector_sum( r , 1., q, -omega, y, nn); + // rnorm = parallel_norm( r, nn); + my_dot = dot_sqr = 0.0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nn),*this,my_dot); + MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + rnorm = sqrt(dot_sqr); + + if (omega == 0) break; + rho_old = rho; + } + + if (comm->me == 0) { + if (omega == 0 || rho == 0) { + char str[128]; + sprintf(str,"Fix acks2/reax/kk BiCGStab numerical breakdown, omega = %g, rho = %g",omega,rho); + error->warning(FLERR,str); + } else if (i >= imax) { + char str[128]; + sprintf(str,"Fix acks2/reax/kk BiCGStab convergence failed after %d iterations " + "at " BIGINT_FORMAT " step",i,update->ntimestep); + error->warning(FLERR,str); + } + } + + return i; +} + + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::calculate_Q() +{ + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); + + pack_flag = 2; + //comm->forward_comm_fix( this ); //Dist_vector( s ); + k_s.modify(); + k_s.sync(); + comm->forward_comm_fix(this); + k_s.modify(); + k_s.sync(); + + Kokkos::parallel_for(Kokkos::RangePolicy(0,NN),*this); + +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::sparse_matvec_acks2(typename AT::t_ffloat_1d &d_xx_in, typename AT::t_ffloat_1d &d_bb_in) +{ + d_xx = d_xx_in; + d_bb = d_bb_in; + + if (need_dup) + dup_bb = Kokkos::Experimental::create_scatter_view (d_bb); // allocate duplicated memory + else + ndup_bb = Kokkos::Experimental::create_scatter_view (d_bb); + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nn),*this); + + if (neighflag != FULL) + Kokkos::parallel_for(Kokkos::RangePolicy(nn,NN),*this); + + if (neighflag == FULL) { + int teamsize; + if (execution_space == Host) teamsize = 1; + else teamsize = 128; + + Kokkos::parallel_for(Kokkos::TeamPolicy(nn,teamsize),*this); + } else if (neighflag == HALFTHREAD) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,nn),*this); + else if (neighflag == HALF) + Kokkos::parallel_for(Kokkos::RangePolicy >(0,nn),*this); + + if (need_dup) { + Kokkos::Experimental::contribute(d_bb, dup_bb); + + // free duplicated memory + + dup_bb = decltype(dup_bb)(); + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2SparseMatvec1, const int &ii) const +{ + const int i = d_ilist[ii]; + const int itype = type(i); + if (mask[i] & groupbit) { + d_bb[i] = params(itype).eta * d_xx[i]; + d_bb[NN + i] = d_X_diag[i] * d_xx[NN + i]; + } + + // last two rows + if (ii == 0) { + d_bb[2*NN] = 0.0; + d_bb[2*NN + 1] = 0.0; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2SparseMatvec2, const int &ii) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + d_bb[i] = 0.0; + d_bb[NN + i] = 0.0; + } +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2SparseMatvec3_Half, const int &ii) const +{ + // The bb array is duplicated for OpenMP, atomic for CUDA, and neither for Serial + auto v_bb = ScatterViewHelper::value,decltype(dup_bb),decltype(ndup_bb)>::get(dup_bb,ndup_bb); + auto a_bb = v_bb.template access::value>(); + + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + F_FLOAT tmp = 0.0; + + // H Matrix + for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) { + const int j = d_jlist(jj); + tmp += d_val(jj) * d_xx[j]; + a_bb[j] += d_val(jj) * d_xx[i]; + } + a_bb[i] += tmp; + + // X Matrix + tmp = 0.0; + for(int jj = d_firstnbr_X[i]; jj < d_firstnbr_X[i] + d_numnbrs_X[i]; jj++) { + const int j = d_jlist_X(jj); + tmp += d_val_X(jj) * d_xx[NN + j]; + a_bb[NN + j] += d_val_X(jj) * d_xx[NN + i]; + } + a_bb[NN + i] += tmp; + + // Identity Matrix + a_bb[NN + i] += d_xx[i]; + a_bb[i] += d_xx[NN + i]; + + // Second-to-last row/column + a_bb[2*NN] += d_xx[NN + i]; + a_bb[NN + i] += d_xx[2*NN]; + + // Last row/column + a_bb[2*NN + 1] += d_xx[i]; + a_bb[i] += d_xx[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2SparseMatvec3_Full, const membertype &team) const +{ + const int i = d_ilist[team.league_rank()]; + if (mask[i] & groupbit) { + F_FLOAT sum; + F_FLOAT sum2; + + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr[i], d_firstnbr[i] + d_numnbrs[i]), [&] (const int &jj, F_FLOAT &sum) { + const int j = d_jlist(jj); + sum += d_val(jj) * d_xx[j]; + }, sum); + team.team_barrier(); + + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team, d_firstnbr_X[i], d_firstnbr_X[i] + d_numnbrs_X[i]), [&] (const int &jj, F_FLOAT &sum2) { + const int j = d_jlist_X(jj); + sum2 += d_val_X(jj) * d_xx[NN + j]; + }, sum2); + + Kokkos::single(Kokkos::PerTeam(team), [&] () { + d_bb[i] += sum; + d_bb[NN + i] += sum2; + + // Identity Matrix + d_bb[NN + i] += d_xx[i]; + d_bb[i] += d_xx[NN + i]; + + // Second-to-last row/column + Kokkos::atomic_add(&(d_bb[2*NN]),d_xx[NN + i]); + d_bb[NN + i] += d_xx[2*NN]; + + // Last row/column + Kokkos::atomic_add(&(d_bb[2*NN + 1]),d_xx[i]); + d_bb[i] += d_xx[2*NN + 1]; + }); + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Norm1, const int &ii, double &lsum) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + d_r[i] = d_b_s[i] - d_d[i]; + d_r[NN+i] = d_b_s[NN+i] - d_d[NN+i]; + + lsum += d_b_s[i] * d_b_s[i]; + lsum += d_b_s[NN+i] * d_b_s[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + d_r[2*NN] = d_b_s[2*NN] - d_d[2*NN]; + d_r[2*NN + 1] = d_b_s[2*NN + 1] - d_d[2*NN + 1]; + + lsum += d_b_s[2*NN] * d_b_s[2*NN]; + lsum += d_b_s[2*NN + 1] * d_b_s[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Norm2, const int &ii, double &lsum) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + lsum += d_r[i] * d_r[i]; + lsum += d_r[NN + i] * d_r[NN + i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + lsum += d_r[2*NN] * d_r[2*NN]; + lsum += d_r[2*NN + 1] * d_r[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot1, const int &ii, double &lsum) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + lsum += d_r_hat[i] * d_r[i]; + lsum += d_r_hat[NN+i] * d_r[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + lsum += d_r_hat[2*NN] * d_r[2*NN]; + lsum += d_r_hat[2*NN + 1] * d_r[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Precon1A, const int &ii) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + d_q[i] = d_p[i] - omega*d_z[i]; + d_q[NN+i] = d_p[NN+i] - omega*d_z[NN+i]; + + d_p[i] = d_r[i] + beta*d_q[i]; + d_p[NN+i] = d_r[NN+i] + beta*d_q[NN+i]; + + d_d[i] = d_p[i]*d_Hdia_inv[i]; + d_d[NN+i] = d_p[NN+i]*d_Xdia_inv[i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + d_q[2*NN] = d_p[2*NN] - omega*d_z[2*NN]; + d_q[2*NN + 1] = d_p[2*NN + 1] - omega*d_z[2*NN + 1]; + + d_p[2*NN] = d_r[2*NN] + beta*d_q[2*NN]; + d_p[2*NN + 1] = d_r[2*NN + 1] + beta*d_q[2*NN + 1]; + + d_d[2*NN] = d_p[2*NN]; + d_d[2*NN + 1] = d_p[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Precon1B, const int &ii) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + d_p[i] = d_r[i] ; + d_p[NN+i] = d_r[NN+i]; + + d_d[i] = d_p[i]*d_Hdia_inv[i]; + d_d[NN+i] = d_p[NN+i]*d_Xdia_inv[i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + d_p[2*NN] = d_r[2*NN]; + d_p[2*NN + 1] = d_r[2*NN + 1]; + + d_d[2*NN] = d_p[2*NN]; + d_d[2*NN + 1] = d_p[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot2, const int &ii, double &lsum) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + lsum += d_r_hat[i] * d_z[i]; + lsum += d_r_hat[NN+i] * d_z[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + lsum += d_r_hat[2*NN] * d_z[2*NN]; + lsum += d_r_hat[2*NN + 1] * d_z[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot3, const int &ii, double &lsum) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + d_q[i] = d_r[i] - alpha*d_z[i]; + d_q[NN+i] = d_r[NN+i] - alpha*d_z[NN+i]; + + lsum += d_q[i] * d_q[i]; + lsum += d_q[NN+i] * d_q[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + d_q[2*NN] = d_r[2*NN] - alpha*d_z[2*NN]; + d_q[2*NN + 1] = d_r[2*NN + 1] - alpha*d_z[2*NN + 1]; + + lsum += d_q[2*NN] * d_q[2*NN]; + lsum += d_q[2*NN + 1] * d_q[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot4, const int &ii, double &lsum) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + lsum += d_y[i] * d_q[i]; + lsum += d_y[NN+i] * d_q[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + lsum += d_y[2*NN] * d_q[2*NN]; + lsum += d_y[2*NN + 1] * d_q[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Dot5, const int &ii, double &lsum) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + lsum += d_y[i] * d_y[i]; + lsum += d_y[NN+i] * d_y[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + lsum += d_y[2*NN] * d_y[2*NN]; + lsum += d_y[2*NN + 1] * d_y[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Add, const int &ii) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + d_s[i] += alpha * d_d[i]; + d_s[NN+i] += alpha * d_d[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + d_s[2*NN] += alpha*d_d[2*NN]; + d_s[2*NN + 1] += alpha*d_d[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Precon2, const int &ii) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + d_q_hat[i] = d_q[i]*d_Hdia_inv[i]; + d_q_hat[NN+i] = d_q[NN+i]*d_Xdia_inv[i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + d_q_hat[2*NN] = d_q[2*NN]; + d_q_hat[2*NN + 1] = d_q[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2Norm3, const int &ii, double &lsum) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + d_g[i] = alpha*d_d[i] + omega*d_q_hat[i]; + d_g[NN+i] = alpha*d_d[NN+i] + omega*d_q_hat[NN+i]; + + d_s[i] += d_g[i]; + d_s[NN+i] += d_g[NN+i]; + + d_r[i] = d_q[i] - omega*d_y[i]; + d_r[NN+i] = d_q[NN+i] - omega*d_y[NN+i]; + + lsum += d_r[i] * d_r[i]; + lsum += d_r[NN+i] * d_r[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + d_g[2*NN] = alpha*d_d[2*NN] + omega*d_q_hat[2*NN]; + d_g[2*NN + 1] = alpha*d_d[2*NN + 1] + omega*d_q_hat[2*NN + 1]; + + d_s[2*NN] += d_g[2*NN]; + d_s[2*NN + 1] += d_g[2*NN + 1]; + + d_r[2*NN] = d_q[2*NN] - omega*d_y[2*NN]; + d_r[2*NN + 1] = d_q[2*NN + 1] - omega*d_y[2*NN + 1]; + + lsum += d_r[2*NN] * d_r[2*NN]; + lsum += d_r[2*NN + 1] * d_r[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2CalculateQ1, const int &ii) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) { + + /* backup s */ + for (int k = nprev-1; k > 0; --k) { + d_s_hist(i,k) = d_s_hist(i,k-1); + d_s_hist_X(i,k) = d_s_hist_X(i,k-1); + } + d_s_hist(i,0) = d_s[i]; + d_s_hist_X(i,0) = d_s[NN+i]; + } + + // last two rows + if (comm_me_0_flag && ii == 0) { + for (int i = 0; i < 2; ++i) { + for (int k = nprev-1; k > 0; --k) + d_s_hist_last(i,k) = d_s_hist_last(i,k-1); + d_s_hist_last(i,0) = d_s[2*NN+i]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixACKS2ReaxFFKokkos::operator() (TagACKS2CalculateQ2, const int &ii) const +{ + const int i = d_ilist[ii]; + if (mask[i] & groupbit) + q(i) = d_s(i); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::cleanup_copy() +{ + id = style = NULL; +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +template +double FixACKS2ReaxFFKokkos::memory_usage() +{ + double bytes; + + int size = 2*nmax + 2; + + bytes = size*nprev * sizeof(double); // s_hist + bytes += nmax*4 * sizeof(double); // storage + bytes += size*11 * sizeof(double); // storage + bytes += n_cap*4 * sizeof(int); // matrix... + bytes += m_cap*2 * sizeof(int); + bytes += m_cap*2 * sizeof(double); + + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate fictitious charge arrays +------------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::grow_arrays(int nmax) +{ + k_s_hist.template sync(); + k_s_hist_X.template sync(); + + k_s_hist.template modify(); // force reallocation on host + k_s_hist_X.template modify(); + + memoryKK->grow_kokkos(k_s_hist,s_hist,nmax,nprev,"acks2:s_hist"); + memoryKK->grow_kokkos(k_s_hist_X,s_hist_X,nmax,nprev,"acks2:s_hist_X"); + + d_s_hist = k_s_hist.template view(); + d_s_hist_X = k_s_hist_X.template view(); + + k_s_hist.template modify(); + k_s_hist_X.template modify(); +} + +/* ---------------------------------------------------------------------- + copy values within fictitious charge arrays +------------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::copy_arrays(int i, int j, int delflag) +{ + k_s_hist.template sync(); + k_s_hist_X.template sync(); + + FixACKS2ReaxFF::copy_arrays(i,j,delflag); + + k_s_hist.template modify(); + k_s_hist_X.template modify(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based array for exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixACKS2ReaxFFKokkos::pack_exchange(int i, double *buf) +{ + k_s_hist.template sync(); + k_s_hist_X.template sync(); + + return FixACKS2ReaxFF::pack_exchange(i,buf); +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based array from exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixACKS2ReaxFFKokkos::unpack_exchange(int nlocal, double *buf) +{ + k_s_hist.template sync(); + k_s_hist_X.template sync(); + + int n = FixACKS2ReaxFF::unpack_exchange(nlocal,buf); + + k_s_hist.template modify(); + k_s_hist_X.template modify(); + + return n; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixACKS2ReaxFFKokkos::get_chi_field() +{ + FixQEqReaxFF::get_chi_field(); + k_chi_field.modify_host(); + k_chi_field.sync_device(); +} + +/* ---------------------------------------------------------------------- */ + +namespace LAMMPS_NS { +template class FixACKS2ReaxFFKokkos; +#ifdef KOKKOS_ENABLE_CUDA +template class FixACKS2ReaxFFKokkos; +#endif +} diff --git a/src/KOKKOS/fix_acks2_reaxff_kokkos.h b/src/KOKKOS/fix_acks2_reaxff_kokkos.h new file mode 100644 index 0000000000..2fbeafc6b3 --- /dev/null +++ b/src/KOKKOS/fix_acks2_reaxff_kokkos.h @@ -0,0 +1,368 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(acks2/reaxff/kk,FixACKS2ReaxFFKokkos) +FixStyle(acks2/reaxff/kk/device,FixACKS2ReaxFFKokkos) +FixStyle(acks2/reaxff/kk/host,FixACKS2ReaxFFKokkos) + +#else + +#ifndef LMP_FIX_ACKS2_REAXFF_KOKKOS_H +#define LMP_FIX_ACKS2_REAXFF_KOKKOS_H + +#include "fix_acks2_reaxff.h" +#include "kokkos_type.h" +#include "neigh_list.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +struct TagACKS2Zero{}; +struct TagACKS2InitMatvec{}; +struct TagACKS2SparseMatvec1{}; +struct TagACKS2SparseMatvec2{}; + +template +struct TagACKS2SparseMatvec3_Half{}; + +struct TagACKS2SparseMatvec3_Full{}; +struct TagACKS2Norm1{}; +struct TagACKS2Norm2{}; +struct TagACKS2Norm3{}; +struct TagACKS2Dot1{}; +struct TagACKS2Dot2{}; +struct TagACKS2Dot3{}; +struct TagACKS2Dot4{}; +struct TagACKS2Dot5{}; +struct TagACKS2Precon1A{}; +struct TagACKS2Precon1B{}; +struct TagACKS2Precon2{}; +struct TagACKS2Add{}; +struct TagACKS2ZeroQGhosts{}; +struct TagACKS2CalculateQ1{}; +struct TagACKS2CalculateQ2{}; + +template +class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF { + public: + typedef DeviceType device_type; + typedef double value_type; + typedef ArrayTypes AT; + FixACKS2ReaxFFKokkos(class LAMMPS *, int, char **); + ~FixACKS2ReaxFFKokkos(); + + void cleanup_copy(); + void init(); + void setup_pre_force(int); + void pre_force(int); + + DAT::tdual_ffloat_1d get_s() {return k_s;} + + KOKKOS_INLINE_FUNCTION + void num_neigh_item(int, int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Zero, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2InitMatvec, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void compute_h_item(int, int &, const bool &) const; + + template + KOKKOS_INLINE_FUNCTION + void compute_h_team(const typename Kokkos::TeamPolicy ::member_type &team, int, int) const; + + template + KOKKOS_INLINE_FUNCTION + void compute_x_item(int, int &, const bool &) const; + + template + KOKKOS_INLINE_FUNCTION + void compute_x_team(const typename Kokkos::TeamPolicy ::member_type &team, int, int) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2SparseMatvec1, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2SparseMatvec2, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2SparseMatvec3_Half, const int&) const; + + typedef typename Kokkos::TeamPolicy::member_type membertype; + KOKKOS_INLINE_FUNCTION + void operator() (TagACKS2SparseMatvec3_Full, const membertype &team) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Norm1, const int&, double&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Norm2, const int&, double&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Norm3, const int&, double&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Dot1, const int&, double&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Dot2, const int&, double&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Dot3, const int&, double&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Dot4, const int&, double&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Dot5, const int&, double&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Precon1A, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Precon1B, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Precon2, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2Add, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2ZeroQGhosts, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2CalculateQ1, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagACKS2CalculateQ2, const int&) const; + + KOKKOS_INLINE_FUNCTION + double calculate_H_k(const F_FLOAT &r, const F_FLOAT &shld) const; + + KOKKOS_INLINE_FUNCTION + double calculate_X_k(const F_FLOAT &r, const F_FLOAT &bcut) const; + + struct params_acks2{ + KOKKOS_INLINE_FUNCTION + params_acks2(){chi=0;eta=0;gamma=0;bcut_acks2=0;}; + KOKKOS_INLINE_FUNCTION + params_acks2(int i){chi=0;eta=0;gamma=0;bcut_acks2=0;}; + F_FLOAT chi, eta, gamma, bcut_acks2; + }; + + private: + int inum; + int allocated_flag, last_allocate; + int need_dup,comm_me_0_flag; + + typename AT::t_int_scalar d_mfill_offset; + + typedef Kokkos::DualView tdual_int_1d; + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const params; + + typename ArrayTypes::t_x_array x; + typename ArrayTypes::t_v_array v; + typename ArrayTypes::t_f_array_const f; + typename ArrayTypes::t_ffloat_1d_randomread mass; + typename ArrayTypes::t_ffloat_1d q; + typename ArrayTypes::t_int_1d type, mask; + typename ArrayTypes::t_tagint_1d tag; + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d_randomread d_ilist, d_numneigh; + + DAT::tdual_ffloat_1d k_tap; + typename AT::t_ffloat_1d d_tap; + + DAT::tdual_ffloat_2d k_bcut; + typename AT::t_ffloat_2d d_bcut; + + typename AT::t_int_1d d_firstnbr; + typename AT::t_int_1d d_numnbrs; + typename AT::t_int_1d d_jlist; + typename AT::t_ffloat_1d d_val; + + typename AT::t_int_1d d_firstnbr_X; + typename AT::t_int_1d d_numnbrs_X; + typename AT::t_int_1d d_jlist_X; + typename AT::t_ffloat_1d d_val_X; + + DAT::tdual_ffloat_1d k_s, k_X_diag, k_chi_field; + typename AT::t_ffloat_1d d_Hdia_inv,d_Xdia_inv, d_X_diag, d_chi_field, d_b_s, d_s; + typename AT::t_ffloat_1d_randomread r_b_s, r_s; + + DAT::tdual_ffloat_1d k_d, k_q_hat, k_z, k_y; + typename AT::t_ffloat_1d d_p, d_q, d_r, d_d, d_g, d_q_hat, d_r_hat, d_y, d_z, d_bb, d_xx; + typename AT::t_ffloat_1d_randomread r_p, r_r, r_d; + + DAT::tdual_ffloat_2d k_shield, k_s_hist, k_s_hist_X, k_s_hist_last; + typename AT::t_ffloat_2d d_shield, d_s_hist, d_s_hist_X, d_s_hist_last; + typename AT::t_ffloat_2d_randomread r_s_hist, r_s_hist_X, r_s_hist_last; + + Kokkos::Experimental::ScatterView::value, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated> dup_X_diag; + Kokkos::Experimental::ScatterView::value, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated> ndup_X_diag; + + Kokkos::Experimental::ScatterView::value, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated> dup_bb; + Kokkos::Experimental::ScatterView::value, Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated> ndup_bb; + + void init_shielding_k(); + void init_hist(); + void allocate_matrix(); + void allocate_array(); + void deallocate_array(); + int bicgstab_solve(); + void calculate_Q(); + + int neighflag; + int nlocal,nall,nmax,newton_pair; + int count, isuccess; + double alpha, beta, omega, cutsq; + + int iswap; + int first; + typename AT::t_int_2d d_sendlist; + typename AT::t_xfloat_1d_um v_buf; + + void grow_arrays(int); + void copy_arrays(int, int, int); + int pack_exchange(int, double *); + int unpack_exchange(int, double *); + void get_chi_field(); + double memory_usage(); + + void sparse_matvec_acks2(typename AT::t_ffloat_1d &, typename AT::t_ffloat_1d &); +}; + +template +struct FixACKS2ReaxFFKokkosNumNeighFunctor { + typedef DeviceType device_type; + typedef int value_type; + FixACKS2ReaxFFKokkos c; + FixACKS2ReaxFFKokkosNumNeighFunctor(FixACKS2ReaxFFKokkos* c_ptr):c(*c_ptr) { + c.cleanup_copy(); + }; + KOKKOS_INLINE_FUNCTION + void operator()(const int ii, int &maxneigh) const { + c.num_neigh_item(ii, maxneigh); + } +}; + +template +struct FixACKS2ReaxFFKokkosComputeHFunctor { + int atoms_per_team, vector_length; + typedef int value_type; + typedef Kokkos::ScratchMemorySpace scratch_space; + FixACKS2ReaxFFKokkos c; + + FixACKS2ReaxFFKokkosComputeHFunctor(FixACKS2ReaxFFKokkos* c_ptr):c(*c_ptr) { + c.cleanup_copy(); + }; + + FixACKS2ReaxFFKokkosComputeHFunctor(FixACKS2ReaxFFKokkos *c_ptr, + int _atoms_per_team, int _vector_length) + : c(*c_ptr), atoms_per_team(_atoms_per_team), + vector_length(_vector_length) { + c.cleanup_copy(); + }; + + KOKKOS_INLINE_FUNCTION + void operator()(const int ii, int &m_fill, const bool &final) const { + c.template compute_h_item(ii,m_fill,final); + } + + KOKKOS_INLINE_FUNCTION + void operator()( + const typename Kokkos::TeamPolicy::member_type &team) const { + c.template compute_h_team(team, atoms_per_team, vector_length); + } + + size_t team_shmem_size(int team_size) const { + size_t shmem_size = + Kokkos::View::shmem_size( + atoms_per_team) + // s_ilist + Kokkos::View::shmem_size( + atoms_per_team) + // s_numnbrs + Kokkos::View::shmem_size( + atoms_per_team) + // s_firstnbr + Kokkos::View:: + shmem_size(atoms_per_team, vector_length) + // s_jtype + Kokkos::View:: + shmem_size(atoms_per_team, vector_length) + // s_j + Kokkos::View::shmem_size(atoms_per_team, + vector_length); // s_r + return shmem_size; + } +}; + +template +struct FixACKS2ReaxFFKokkosComputeXFunctor { + int atoms_per_team, vector_length; + typedef int value_type; + typedef Kokkos::ScratchMemorySpace scratch_space; + FixACKS2ReaxFFKokkos c; + + FixACKS2ReaxFFKokkosComputeXFunctor(FixACKS2ReaxFFKokkos* c_ptr):c(*c_ptr) { + c.cleanup_copy(); + }; + + FixACKS2ReaxFFKokkosComputeXFunctor(FixACKS2ReaxFFKokkos *c_ptr, + int _atoms_per_team, int _vector_length) + : c(*c_ptr), atoms_per_team(_atoms_per_team), + vector_length(_vector_length) { + c.cleanup_copy(); + }; + + KOKKOS_INLINE_FUNCTION + void operator()(const int ii, int &m_fill, const bool &final) const { + c.template compute_x_item(ii,m_fill,final); + } + + KOKKOS_INLINE_FUNCTION + void operator()( + const typename Kokkos::TeamPolicy::member_type &team) const { + c.template compute_x_team(team, atoms_per_team, vector_length); + } + + size_t team_shmem_size(int team_size) const { + size_t shmem_size = + Kokkos::View::shmem_size( + atoms_per_team) + // s_ilist + Kokkos::View::shmem_size( + atoms_per_team) + // s_numnbrs + Kokkos::View::shmem_size( + atoms_per_team) + // s_firstnbr + Kokkos::View:: + shmem_size(atoms_per_team, vector_length) + // s_jtype + Kokkos::View:: + shmem_size(atoms_per_team, vector_length) + // s_j + Kokkos::View::shmem_size(atoms_per_team, + vector_length); // s_r + return shmem_size; + } +}; + +} + +#endif +#endif diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp index 6761ab9b0e..0aab94071f 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.cpp @@ -75,6 +75,7 @@ FixQEqReaxFFKokkos::~FixQEqReaxFFKokkos() memoryKK->destroy_kokkos(k_s_hist,s_hist); memoryKK->destroy_kokkos(k_t_hist,t_hist); + memoryKK->destroy_kokkos(k_chi_field,chi_field); } /* ---------------------------------------------------------------------- */ @@ -256,6 +257,9 @@ void FixQEqReaxFFKokkos::pre_force(int /*vflag*/) // init_matvec + if (field_flag) + get_chi_field(); + k_s_hist.template sync(); k_t_hist.template sync(); FixQEqReaxFFKokkosMatVecFunctor matvec_functor(this); @@ -373,10 +377,16 @@ void FixQEqReaxFFKokkos::allocate_array() k_d = DAT::tdual_ffloat_1d("qeq/kk:d",nmax); d_d = k_d.template view(); h_d = k_d.h_view; + + memoryKK->create_kokkos(k_chi_field,chi_field,nmax,"acks2/kk:chi_field"); + d_chi_field = k_chi_field.template view(); } // init_storage + if (field_flag) + get_chi_field(); + FixQEqReaxFFKokkosZeroFunctor zero_functor(this); Kokkos::parallel_for(ignum,zero_functor); @@ -392,7 +402,7 @@ void FixQEqReaxFFKokkos::zero_item(int ii) const if (mask[i] & groupbit) { d_Hdia_inv[i] = 1.0 / params(itype).eta; - d_b_s[i] = -params(itype).chi; + d_b_s[i] = -params(itype).chi - d_chi_field[i]; d_b_t[i] = -1.0; d_s[i] = 0.0; d_t[i] = 0.0; @@ -711,7 +721,7 @@ void FixQEqReaxFFKokkos::matvec_item(int ii) const if (mask[i] & groupbit) { d_Hdia_inv[i] = 1.0 / params(itype).eta; - d_b_s[i] = -params(itype).chi; + d_b_s[i] = -params(itype).chi - d_chi_field[i]; d_b_t[i] = -1.0; d_t[i] = d_t_hist(i,2) + 3*(d_t_hist(i,0) - d_t_hist(i,1)); d_s[i] = 4*(d_s_hist(i,0)+d_s_hist(i,2))-(6*d_s_hist(i,1)+d_s_hist(i,3)); @@ -1575,6 +1585,16 @@ int FixQEqReaxFFKokkos::unpack_exchange(int nlocal, double *buf) /* ---------------------------------------------------------------------- */ +template +void FixQEqReaxFFKokkos::get_chi_field() +{ + FixQEqReaxFF::get_chi_field(); + k_chi_field.modify_host(); + k_chi_field.sync_device(); +} + +/* ---------------------------------------------------------------------- */ + namespace LAMMPS_NS { template class FixQEqReaxFFKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_qeq_reaxff_kokkos.h b/src/KOKKOS/fix_qeq_reaxff_kokkos.h index 3965770236..3256e56aef 100644 --- a/src/KOKKOS/fix_qeq_reaxff_kokkos.h +++ b/src/KOKKOS/fix_qeq_reaxff_kokkos.h @@ -201,8 +201,8 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { typename AT::t_int_1d d_jlist; typename AT::t_ffloat_1d d_val; - DAT::tdual_ffloat_1d k_t, k_s; - typename AT::t_ffloat_1d d_Hdia_inv, d_b_s, d_b_t, d_t, d_s; + DAT::tdual_ffloat_1d k_t, k_s, k_chi_field; + typename AT::t_ffloat_1d d_Hdia_inv, d_b_s, d_b_t, d_t, d_s, d_chi_field; HAT::t_ffloat_1d h_t, h_s; typename AT::t_ffloat_1d_randomread r_b_s, r_b_t, r_t, r_s; @@ -241,6 +241,7 @@ class FixQEqReaxFFKokkos : public FixQEqReaxFF, public KokkosBase { void copy_arrays(int, int, int); int pack_exchange(int, double *); int unpack_exchange(int, double *); + void get_chi_field(); }; template diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 65990544ad..fa5ff42a44 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -61,9 +61,11 @@ class KokkosLMP : protected Pointers { int neigh_count(int); template - int need_dup() + int need_dup(int qeq_flag = 0) { int value = 0; + int neighflag = this->neighflag; + if (qeq_flag) neighflag = this->neighflag_qeq; if (neighflag == HALFTHREAD) value = std::is_same::value,Kokkos::Experimental::ScatterDuplicated>::value; diff --git a/src/KOKKOS/pair_reaxff_kokkos.cpp b/src/KOKKOS/pair_reaxff_kokkos.cpp index aac0229f87..5e5b439eea 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.cpp +++ b/src/KOKKOS/pair_reaxff_kokkos.cpp @@ -23,6 +23,7 @@ #include "comm.h" #include "error.h" #include "force.h" +#include "fix_acks2_reaxff_kokkos.h" #include "kokkos.h" #include "math_const.h" #include "math_special.h" @@ -141,6 +142,25 @@ void PairReaxFFKokkos::init_style() if (fix_reaxff) modify->delete_fix(fix_id); // not needed in the Kokkos version fix_reaxff = nullptr; + acks2_flag = api->system->acks2_flag; + if (acks2_flag) { + int ifix = modify->find_fix_by_style("^acks2/reax"); + Fix* fix = modify->fix[ifix]; + if (!fix->kokkosable) + error->all(FLERR,"Must use Kokkos version of acks2/reax with pair reaxc/kk"); + if (fix->execution_space == Host) { + FixACKS2ReaxFFKokkos* acks2_fix = (FixACKS2ReaxFFKokkos*) modify->fix[ifix]; + auto k_s = acks2_fix->get_s(); + k_s.sync(); + d_s = k_s.view(); + } else { + FixACKS2ReaxFFKokkos* acks2_fix = (FixACKS2ReaxFFKokkos*) modify->fix[ifix]; + auto k_s = acks2_fix->get_s(); + k_s.sync(); + d_s = k_s.view(); + } + } + // irequest = neigh request made by parent class neighflag = lmp->kokkos->neighflag; @@ -230,6 +250,9 @@ void PairReaxFFKokkos::setup() // hydrogen bond k_params_sing.h_view(i).p_hbond = api->system->reax_param.sbp[map[i]].p_hbond; + // acks2 + k_params_sing.h_view(i).bcut_acks2 = api->system->reax_param.sbp[map[i]].bcut_acks2; + for (j = 1; j <= n; j++) { if (map[j] == -1) continue; @@ -690,9 +713,11 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) tag = atomKK->k_tag.view(); type = atomKK->k_type.view(); nlocal = atomKK->nlocal; - nall = atom->nlocal + atom->nghost; newton_pair = force->newton_pair; + nn = list->inum; + NN = list->inum + list->gnum; + const int inum = list->inum; const int ignum = inum + list->gnum; NeighListKokkos* k_list = static_cast*>(list); @@ -700,6 +725,22 @@ void PairReaxFFKokkos::compute(int eflag_in, int vflag_in) d_neighbors = k_list->d_neighbors; d_ilist = k_list->d_ilist; + if (acks2_flag) { + int ifix = modify->find_fix_by_style("^acks2/reax"); + Fix* fix = modify->fix[ifix]; + if (fix->execution_space == Host) { + FixACKS2ReaxFFKokkos* acks2_fix = (FixACKS2ReaxFFKokkos*) modify->fix[ifix]; + auto k_s = acks2_fix->get_s(); + k_s.sync(); + d_s = k_s.view(); + } else { + FixACKS2ReaxFFKokkos* acks2_fix = (FixACKS2ReaxFFKokkos*) modify->fix[ifix]; + auto k_s = acks2_fix->get_s(); + k_s.sync(); + d_s = k_s.view(); + } + } + need_dup = lmp->kokkos->need_dup(); // allocate duplicated memory @@ -1057,7 +1098,12 @@ void PairReaxFFKokkos::operator()(PairReaxFFComputePolartemplate ev_tally(ev,i,i,epol,0.0,0.0,0.0,0.0); if (eflag_atom) this->template e_tally_single(ev,i,epol); @@ -1195,8 +1241,47 @@ void PairReaxFFKokkos::operator()(PairReaxFFComputeLJCoulomb 0.0) { + /* Coulombic energy contribution */ + const F_FLOAT effpot_diff = d_s[NN + i] + - d_s[NN + j]; + const F_FLOAT e_ele = -0.5 * KCALpMOL_to_EV * bond_softness + * SQR( effpot_diff ); + + ecoul += e_ele; + + /* forces contribution */ + F_FLOAT d_bond_softness; + d_bond_softness = gp[34] + * 3.0 / xcut * pow( d, 2.0 ) + * pow( 1.0 - d, 5.0 ) * (1.0 - 3.0 * d); + d_bond_softness = -0.5 * d_bond_softness + * SQR( effpot_diff ); + d_bond_softness = KCALpMOL_to_EV * d_bond_softness + / rij; + + fcoul += d_bond_softness; + } + } + } + const F_FLOAT ftotal = fvdwl + fcoul; fxtmp += delx*ftotal; @@ -1296,15 +1381,53 @@ void PairReaxFFKokkos::operator()(PairReaxFFComputeTabulatedLJCoulom const F_FLOAT evdwl = ((vdW.d*dif + vdW.c)*dif + vdW.b)*dif + vdW.a; - const F_FLOAT ecoul = (((ele.d*dif + ele.c)*dif + ele.b)*dif + + F_FLOAT ecoul = (((ele.d*dif + ele.c)*dif + ele.b)*dif + ele.a)*qi*qj; const F_FLOAT fvdwl = ((CEvd.d*dif + CEvd.c)*dif + CEvd.b)*dif + CEvd.a; - const F_FLOAT fcoul = (((CEclmb.d*dif+CEclmb.c)*dif+CEclmb.b)*dif + + F_FLOAT fcoul = (((CEclmb.d*dif+CEclmb.c)*dif+CEclmb.b)*dif + CEclmb.a)*qi*qj; + /* contribution to energy and gradients (atoms and cell) + * due to geometry-dependent terms in the ACKS2 + * kinetic energy */ + if (acks2_flag) { + + /* kinetic energy terms */ + double xcut = 0.5 * (paramssing(itype).bcut_acks2 + + paramssing(jtype).bcut_acks2); + + if (rij <= xcut) { + const F_FLOAT d = rij / xcut; + const F_FLOAT bond_softness = gp[34] * pow( d, 3.0 ) + * pow( 1.0 - d, 6.0 ); + + if (bond_softness > 0.0) { + /* Coulombic energy contribution */ + const F_FLOAT effpot_diff = d_s[NN + i] + - d_s[NN + j]; + const F_FLOAT e_ele = -0.5 * KCALpMOL_to_EV * bond_softness + * SQR( effpot_diff ); + + ecoul += e_ele; + + /* forces contribution */ + F_FLOAT d_bond_softness; + d_bond_softness = gp[34] + * 3.0 / xcut * pow( d, 2.0 ) + * pow( 1.0 - d, 5.0 ) * (1.0 - 3.0 * d); + d_bond_softness = -0.5 * d_bond_softness + * SQR( effpot_diff ); + d_bond_softness = KCALpMOL_to_EV * d_bond_softness + / rij; + + fcoul += d_bond_softness; + } + } + } + const F_FLOAT ftotal = fvdwl + fcoul; fxtmp += delx*ftotal; fytmp += dely*ftotal; diff --git a/src/KOKKOS/pair_reaxff_kokkos.h b/src/KOKKOS/pair_reaxff_kokkos.h index 5ed89bf62f..a88248a717 100644 --- a/src/KOKKOS/pair_reaxff_kokkos.h +++ b/src/KOKKOS/pair_reaxff_kokkos.h @@ -252,12 +252,12 @@ class PairReaxFFKokkos : public PairReaxFF { struct params_sing{ KOKKOS_INLINE_FUNCTION params_sing() {mass=0;chi=0;eta=0;r_s=0;r_pi=0;r_pi2=0;valency=0;valency_val=0;valency_e=0;valency_boc=0;nlp_opt=0; - p_lp2=0;p_ovun2=0;p_ovun5=0;p_val3=0;p_val5=0;p_hbond=0;}; + p_lp2=0;p_ovun2=0;p_ovun5=0;p_val3=0;p_val5=0;p_hbond=0;bcut_acks2=0;}; KOKKOS_INLINE_FUNCTION params_sing(int /*i*/) {mass=0;chi=0;eta=0;r_s=0;r_pi=0;r_pi2=0;valency=0;valency_val=0;valency_e=0;valency_boc=0;nlp_opt=0; - p_lp2=0;p_ovun2=0;p_ovun5=0;p_val3=0;p_val5=0;p_hbond=0;}; + p_lp2=0;p_ovun2=0;p_ovun5=0;p_val3=0;p_val5=0;p_hbond=0;bcut_acks2=0;}; F_FLOAT mass,chi,eta,r_s,r_pi,r_pi2,valency,valency_val,valency_e,valency_boc,nlp_opt, - p_lp2,p_ovun2,p_ovun5, p_val3, p_val5, p_hbond; + p_lp2,p_ovun2,p_ovun5, p_val3, p_val5, p_hbond, bcut_acks2; }; struct params_twbp{ @@ -377,7 +377,7 @@ class PairReaxFFKokkos : public PairReaxFF { typename AT::t_float_1d d_tap; HAT::t_float_1d h_tap; - typename AT::t_float_1d d_bo_rij, d_hb_rsq, d_Deltap, d_Deltap_boc, d_total_bo; + typename AT::t_float_1d d_bo_rij, d_hb_rsq, d_Deltap, d_Deltap_boc, d_total_bo, d_s; typename AT::t_float_1d d_Delta, d_Delta_boc, d_Delta_lp, d_dDelta_lp, d_Delta_lp_temp, d_CdDelta; typename AT::t_ffloat_2d_dl d_BO, d_BO_s, d_BO_pi, d_BO_pi2, d_dBOp; typename AT::t_ffloat_2d_dl d_dln_BOp_pix, d_dln_BOp_piy, d_dln_BOp_piz; @@ -426,7 +426,7 @@ class PairReaxFFKokkos : public PairReaxFF { typename AT::t_ffloat_2d_dl d_dBOpx, d_dBOpy, d_dBOpz; int neighflag, newton_pair, maxnumneigh, maxhb, maxbo; - int nlocal,nall,eflag,vflag; + int nlocal,nn,NN,eflag,vflag,acks2_flag; F_FLOAT cut_nbsq, cut_hbsq, cut_bosq, bo_cut, thb_cut, thb_cutsq; F_FLOAT bo_cut_bond; diff --git a/src/OPENMP/pair_reaxff_omp.cpp b/src/OPENMP/pair_reaxff_omp.cpp index 6cf3e90575..d39672f09b 100644 --- a/src/OPENMP/pair_reaxff_omp.cpp +++ b/src/OPENMP/pair_reaxff_omp.cpp @@ -101,13 +101,17 @@ PairReaxFFOMP::~PairReaxFFOMP() void PairReaxFFOMP::init_style() { - if (!atom->q_flag) - error->all(FLERR,"Pair style reaxff/omp requires atom attribute q"); - bool have_qeq = ((modify->find_fix_by_style("^qeq/reax") != -1) - || (modify->find_fix_by_style("^qeq/shielded") != -1)); + || (modify->find_fix_by_style("^qeq/shielded") != -1) + || (modify->find_fix_by_style("^acks2/reax") != -1)); if (!have_qeq && qeqflag == 1) - error->all(FLERR,"Pair reaxff/omp requires use of fix qeq/reaxff or qeq/shielded"); + error->all(FLERR,"Pair reaxff/omp requires use of fix qeq/reaxff or qeq/shielded" + " or fix acks2/reaxff"); + + int have_acks2 = (modify->find_fix_by_style("^acks2/reax") != -1); + api->system->acks2_flag = have_acks2; + if (api->system->acks2_flag) + error->all(FLERR,"Cannot (yet) use ACKS2 with OPENMP ReaxFF"); api->system->n = atom->nlocal; // my atoms api->system->N = atom->nlocal + atom->nghost; // mine + ghosts diff --git a/src/OPENMP/reaxff_nonbonded_omp.cpp b/src/OPENMP/reaxff_nonbonded_omp.cpp index a57542f550..a2888e1993 100644 --- a/src/OPENMP/reaxff_nonbonded_omp.cpp +++ b/src/OPENMP/reaxff_nonbonded_omp.cpp @@ -221,7 +221,7 @@ namespace ReaxFF { data->my_en.e_vdW = total_EvdW; data->my_en.e_ele = total_Eele; - Compute_Polarization_Energy(system, data); + Compute_Polarization_Energy(system, data, workspace); } /* ---------------------------------------------------------------------- */ @@ -343,6 +343,6 @@ namespace ReaxFF { data->my_en.e_vdW = total_EvdW; data->my_en.e_ele = total_Eele; - Compute_Polarization_Energy(system, data); + Compute_Polarization_Energy(system, data, workspace); } } diff --git a/src/REAXFF/fix_acks2_reaxff.cpp b/src/REAXFF/fix_acks2_reaxff.cpp new file mode 100644 index 0000000000..9240bb1cee --- /dev/null +++ b/src/REAXFF/fix_acks2_reaxff.cpp @@ -0,0 +1,1091 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (Sandia) +------------------------------------------------------------------------- */ + +#include "fix_acks2_reaxff.h" +#include +#include +#include +#include "pair_reaxff.h" +#include "atom.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "force.h" +#include "group.h" +#include "pair.h" +#include "respa.h" +#include "memory.h" +#include "citeme.h" +#include "error.h" +#include "fix_efield.h" +#include "utils.h" +#include "reaxff_api.h" + +#include "text_file_reader.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +class parser_error : public std::exception { + std::string message; +public: + parser_error(const std::string &mesg) { message = mesg; } + const char *what() const noexcept { return message.c_str(); } +}; + +static const char cite_fix_acks2_reax[] = + "fix acks2/reaxff command:\n\n" + "@Article{O'Hearn2020,\n" + " author = {K. A. O'Hearn, A. Alperen, and H. M. Aktulga},\n" + " title = {Fast Solvers for Charge Distribution Models on Shared Memory Platforms},\n" + " journal = {SIAM J. Sci. Comput.},\n" + " year = 2020,\n" + " volume = 42,\n" + " pages = {1--22}\n" + "}\n\n"; + +/* ---------------------------------------------------------------------- */ + +FixACKS2ReaxFF::FixACKS2ReaxFF(LAMMPS *lmp, int narg, char **arg) : + FixQEqReaxFF(lmp, narg, arg) +{ + bcut = NULL; + + X_diag = NULL; + Xdia_inv = NULL; + + // BiCGStab + g = NULL; + q_hat = NULL; + r_hat = NULL; + y = NULL; + z = NULL; + + // X matrix + X.firstnbr = NULL; + X.numnbrs = NULL; + X.jlist = NULL; + X.val = NULL; + + // Update comm sizes for this fix + comm_forward = comm_reverse = 2; + + s_hist_X = s_hist_last = NULL; +} + +/* ---------------------------------------------------------------------- */ + +FixACKS2ReaxFF::~FixACKS2ReaxFF() +{ + if (copymode) return; + + memory->destroy(bcut); + + if (!reaxflag) + memory->destroy(bcut_acks2); + + memory->destroy(s_hist_X); + memory->destroy(s_hist_last); + + deallocate_storage(); + deallocate_matrix(); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::post_constructor() +{ + if (lmp->citeme) lmp->citeme->add(cite_fix_acks2_reax); + + memory->create(s_hist_last,2,nprev,"acks2/reax:s_hist_last"); + for (int i = 0; i < 2; i++) + for (int j = 0; j < nprev; ++j) + s_hist_last[i][j] = 0.0; + + grow_arrays(atom->nmax); + for (int i = 0; i < atom->nmax; i++) + for (int j = 0; j < nprev; ++j) + s_hist[i][j] = s_hist_X[i][j] = 0.0; + + pertype_parameters(pertype_option); + if (dual_enabled) + error->all(FLERR,"Dual keyword only supported with fix qeq/reax/omp"); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::pertype_parameters(char *arg) +{ + // match either new keyword "reaxff" or old keyword "reax/c" + if (utils::strmatch(arg,"^reax..$")) { + reaxflag = 1; + Pair *pair = force->pair_match("^reax..",0); + if (!pair) error->all(FLERR,"No reaxff pair style for fix qeq/reaxff"); + + int tmp; + chi = (double *) pair->extract("chi",tmp); + eta = (double *) pair->extract("eta",tmp); + gamma = (double *) pair->extract("gamma",tmp); + bcut_acks2 = (double *) pair->extract("bcut_acks2",tmp); + double* bond_softness_ptr = (double *) pair->extract("bond_softness",tmp); + + if (chi == NULL || eta == NULL || gamma == NULL || + bcut_acks2 == NULL || bond_softness_ptr == NULL) + error->all(FLERR, + "Fix acks2/reaxff could not extract params from pair reaxff"); + bond_softness = *bond_softness_ptr; + return; + } + + reaxflag = 0; + const int ntypes = atom->ntypes; + + memory->create(chi,ntypes+1,"acks2/reaxff:chi"); + memory->create(eta,ntypes+1,"acks2/reaxff:eta"); + memory->create(gamma,ntypes+1,"acks2/reaxff:gamma"); + memory->create(bcut_acks2,ntypes+1,"acks2/reaxff:bcut_acks2"); + + if (comm->me == 0) { + bond_softness = chi[0] = eta[0] = gamma[0] = 0.0; + try { + TextFileReader reader(arg,"acks2/reaxff parameter"); + reader.ignore_comments = false; + + const char *line = reader.next_line(); + if (!line) + throw parser_error("Invalid param file for fix acks2/reaxff"); + ValueTokenizer values(line); + + if (values.count() != 1) + throw parser_error("Fix acks2/reaxff: Incorrect format of param file"); + + bond_softness = values.next_double(); + + for (int i = 1; i <= ntypes; i++) { + const char *line = reader.next_line(); + if (!line) + throw parser_error("Invalid param file for fix acks2/reaxff"); + ValueTokenizer values(line); + + if (values.count() != 5) + throw parser_error("Fix acks2/reaxff: Incorrect format of param file"); + + int itype = values.next_int(); + if ((itype < 1) || (itype > ntypes)) + throw parser_error("Fix acks2/reaxff: invalid atom type in param file"); + + chi[itype] = values.next_double(); + eta[itype] = values.next_double(); + gamma[itype] = values.next_double(); + bcut_acks2[itype] = values.next_double(); + } + } catch (std::exception &e) { + error->one(FLERR,e.what()); + } + } + + MPI_Bcast(chi,ntypes+1,MPI_DOUBLE,0,world); + MPI_Bcast(eta,ntypes+1,MPI_DOUBLE,0,world); + MPI_Bcast(gamma,ntypes+1,MPI_DOUBLE,0,world); + MPI_Bcast(bcut_acks2,ntypes+1,MPI_DOUBLE,0,world); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::allocate_storage() +{ + nmax = atom->nmax; + int size = nmax*2 + 2; + + // 0 to nn-1: owned atoms related to H matrix + // nn to NN-1: ghost atoms related to H matrix + // NN to NN+nn-1: owned atoms related to X matrix + // NN+nn to 2*NN-1: ghost atoms related X matrix + // 2*NN to 2*NN+1: last two rows, owned by proc 0 + + memory->create(s,size,"acks2:s"); + memory->create(b_s,size,"acks2:b_s"); + + memory->create(Hdia_inv,nmax,"acks2:Hdia_inv"); + memory->create(chi_field,nmax,"acks2:chi_field"); + + memory->create(X_diag,nmax,"acks2:X_diag"); + memory->create(Xdia_inv,nmax,"acks2:Xdia_inv"); + + memory->create(p,size,"acks2:p"); + memory->create(q,size,"acks2:q"); + memory->create(r,size,"acks2:r"); + memory->create(d,size,"acks2:d"); + + memory->create(g,size,"acks2:g"); + memory->create(q_hat,size,"acks2:q_hat"); + memory->create(r_hat,size,"acks2:r_hat"); + memory->create(y,size,"acks2:y"); + memory->create(z,size,"acks2:z"); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::deallocate_storage() +{ + FixQEqReaxFF::deallocate_storage(); + + memory->destroy(X_diag); + memory->destroy(Xdia_inv); + + memory->destroy(g); + memory->destroy(q_hat); + memory->destroy(r_hat); + memory->destroy(y); + memory->destroy(z); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::allocate_matrix() +{ + FixQEqReaxFF::allocate_matrix(); + + X.n = n_cap; + X.m = m_cap; + memory->create(X.firstnbr,n_cap,"acks2:X.firstnbr"); + memory->create(X.numnbrs,n_cap,"acks2:X.numnbrs"); + memory->create(X.jlist,m_cap,"acks2:X.jlist"); + memory->create(X.val,m_cap,"acks2:X.val"); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::deallocate_matrix() +{ + FixQEqReaxFF::deallocate_matrix(); + + memory->destroy(X.firstnbr); + memory->destroy(X.numnbrs); + memory->destroy(X.jlist); + memory->destroy(X.val); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::init() +{ + FixQEqReaxFF::init(); + + init_bondcut(); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::init_bondcut() +{ + int i,j; + int ntypes; + + ntypes = atom->ntypes; + if (bcut == NULL) + memory->create(bcut,ntypes+1,ntypes+1,"acks2:bondcut"); + + for (i = 1; i <= ntypes; ++i) + for (j = 1; j <= ntypes; ++j) { + bcut[i][j] = 0.5*(bcut_acks2[i] + bcut_acks2[j]); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::init_storage() +{ + if (field_flag) + get_chi_field(); + + for (int ii = 0; ii < NN; ii++) { + int i = ilist[ii]; + if (atom->mask[i] & groupbit) { + b_s[i] = -chi[atom->type[i]]; + if (field_flag) + b_s[i] -= chi_field[i]; + b_s[NN + i] = 0.0; + s[i] = 0.0; + s[NN + i] = 0.0; + } + } + + for (int i = 0; i < 2; i++) { + b_s[2*NN + i] = 0.0; + s[2*NN + i] = 0.0; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::pre_force(int /*vflag*/) +{ + double t_start, t_end; + + if (update->ntimestep % nevery) return; + if (comm->me == 0) t_start = MPI_Wtime(); + + int n = atom->nlocal; + + if (reaxff) { + nn = reaxff->list->inum; + NN = reaxff->list->inum + reaxff->list->gnum; + ilist = reaxff->list->ilist; + numneigh = reaxff->list->numneigh; + firstneigh = reaxff->list->firstneigh; + } else { + nn = list->inum; + NN = list->inum + list->gnum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + } + + // grow arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) reallocate_storage(); + if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) + reallocate_matrix(); + + if (field_flag) + get_chi_field(); + + init_matvec(); + + matvecs = BiCGStab(b_s, s); // BiCGStab on s - parallel + + calculate_Q(); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::init_matvec() +{ + /* fill-in H matrix */ + compute_H(); + + /* fill-in X matrix */ + compute_X(); + pack_flag = 4; + comm->reverse_comm_fix(this); //Coll_Vector(X_diag); + + int ii, i; + + for (ii = 0; ii < nn; ++ii) { + if (X_diag[ii] == 0.0) + Xdia_inv[ii] = 1.0; + else + Xdia_inv[ii] = 1.0 / X_diag[ii]; + + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + + /* init pre-conditioner for H and init solution vectors */ + Hdia_inv[i] = 1. / eta[atom->type[i]]; + b_s[i] = -chi[atom->type[i]]; + if (field_flag) + b_s[i] -= chi_field[i]; + b_s[NN+i] = 0.0; + + /* cubic extrapolation for s from previous solutions */ + s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]); + s[NN+i] = 4*(s_hist_X[i][0]+s_hist_X[i][2])-(6*s_hist_X[i][1]+s_hist_X[i][3]); + } + } + + // last two rows + if (comm->me == 0) { + for (i = 0; i < 2; i++) { + b_s[2*NN+i] = 0.0; + s[2*NN+i] = 4*(s_hist_last[i][0]+s_hist_last[i][2])-(6*s_hist_last[i][1]+s_hist_last[i][3]); + } + } + + pack_flag = 2; + comm->forward_comm_fix(this); //Dist_vector(s); + more_forward_comm(s); +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::compute_X() +{ + int jnum; + int i, j, ii, jj, flag; + double dx, dy, dz, r_sqr; + const double SMALL = 0.0001; + + int *type = atom->type; + tagint *tag = atom->tag; + double **x = atom->x; + int *mask = atom->mask; + + memset(X_diag,0.0,atom->nmax*sizeof(double)); + + // fill in the X matrix + m_fill = 0; + r_sqr = 0; + for (ii = 0; ii < nn; ii++) { + i = ilist[ii]; + if (mask[i] & groupbit) { + jlist = firstneigh[i]; + jnum = numneigh[i]; + X.firstnbr[i] = m_fill; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + dx = x[j][0] - x[i][0]; + dy = x[j][1] - x[i][1]; + dz = x[j][2] - x[i][2]; + r_sqr = SQR(dx) + SQR(dy) + SQR(dz); + + flag = 0; + if (r_sqr <= SQR(swb)) { + if (j < atom->nlocal) flag = 1; + else if (tag[i] < tag[j]) flag = 1; + else if (tag[i] == tag[j]) { + if (dz > SMALL) flag = 1; + else if (fabs(dz) < SMALL) { + if (dy > SMALL) flag = 1; + else if (fabs(dy) < SMALL && dx > SMALL) + flag = 1; + } + } + } + + if (flag) { + double bcutoff = bcut[type[i]][type[j]]; + double bcutoff2 = bcutoff*bcutoff; + if (r_sqr <= bcutoff2) { + X.jlist[m_fill] = j; + double X_val = calculate_X(sqrt(r_sqr), bcutoff); + X.val[m_fill] = X_val; + X_diag[i] -= X_val; + X_diag[j] -= X_val; + m_fill++; + } + } + } + + X.numnbrs[i] = m_fill - X.firstnbr[i]; + } + } + + if (m_fill >= X.m) { + char str[128]; + sprintf(str,"X matrix size has been exceeded: m_fill=%d X.m=%d\n", + m_fill, X.m); + error->warning(FLERR,str); + error->all(FLERR,"Fix acks2/reaxff has insufficient ACKS2 matrix size"); + } +} + +/* ---------------------------------------------------------------------- */ + +double FixACKS2ReaxFF::calculate_X(double r, double bcut) +{ + double d = r/bcut; + double d3 = d*d*d; + double omd = 1.0 - d; + double omd2 = omd*omd; + double omd6 = omd2*omd2*omd2; + + return bond_softness*d3*omd6; +} + +/* ---------------------------------------------------------------------- */ + +int FixACKS2ReaxFF::BiCGStab(double *b, double *x) +{ + int i, j; + double tmp, alpha, beta, omega, sigma, rho, rho_old, rnorm, bnorm; + + int jj; + + sparse_matvec_acks2(&H, &X, x, d); + pack_flag = 1; + comm->reverse_comm_fix(this); //Coll_Vector(d); + more_reverse_comm(d); + + vector_sum(r , 1., b, -1., d, nn); + bnorm = parallel_norm(b, nn); + rnorm = parallel_norm(r, nn); + + if (bnorm == 0.0) bnorm = 1.0; + vector_copy(r_hat, r, nn); + omega = 1.0; + rho = 1.0; + + for (i = 1; i < imax && rnorm / bnorm > tolerance; ++i) { + rho = parallel_dot(r_hat, r, nn); + if (rho == 0.0) break; + + if (i > 1) { + beta = (rho / rho_old) * (alpha / omega); + vector_sum(q , 1., p, -omega, z, nn); + vector_sum(p , 1., r, beta, q, nn); + } else { + vector_copy(p, r, nn); + } + + // pre-conditioning + for (jj = 0; jj < nn; ++jj) { + j = ilist[jj]; + if (atom->mask[j] & groupbit) { + d[j] = p[j] * Hdia_inv[j]; + d[NN+j] = p[NN+j] * Xdia_inv[j]; + } + } + // last two rows + if (comm->me == 0) { + d[2*NN] = p[2*NN]; + d[2*NN + 1] = p[2*NN + 1]; + } + + pack_flag = 1; + comm->forward_comm_fix(this); //Dist_vector(d); + more_forward_comm(d); + sparse_matvec_acks2(&H, &X, d, z); + pack_flag = 2; + comm->reverse_comm_fix(this); //Coll_vector(z); + more_reverse_comm(z); + + tmp = parallel_dot(r_hat, z, nn); + alpha = rho / tmp; + + vector_sum(q , 1., r, -alpha, z, nn); + + tmp = parallel_dot(q, q, nn); + + // early convergence check + if (tmp < tolerance) { + vector_add(x, alpha, d, nn); + break; + } + + // pre-conditioning + for (jj = 0; jj < nn; ++jj) { + j = ilist[jj]; + if (atom->mask[j] & groupbit) { + q_hat[j] = q[j] * Hdia_inv[j]; + q_hat[NN+j] = q[NN+j] * Xdia_inv[j]; + } + } + // last two rows + if (comm->me == 0) { + q_hat[2*NN] = q[2*NN]; + q_hat[2*NN + 1] = q[2*NN + 1]; + } + + pack_flag = 3; + comm->forward_comm_fix(this); //Dist_vector(q_hat); + more_forward_comm(q_hat); + sparse_matvec_acks2(&H, &X, q_hat, y); + pack_flag = 3; + comm->reverse_comm_fix(this); //Dist_vector(y); + more_reverse_comm(y); + + sigma = parallel_dot(y, q, nn); + tmp = parallel_dot(y, y, nn); + omega = sigma / tmp; + + vector_sum(g , alpha, d, omega, q_hat, nn); + vector_add(x, 1., g, nn); + vector_sum(r , 1., q, -omega, y, nn); + + rnorm = parallel_norm(r, nn); + if (omega == 0) break; + rho_old = rho; + } + + if (comm->me == 0) { + if (omega == 0 || rho == 0) { + char str[128]; + sprintf(str,"Fix acks2/reaxff BiCGStab numerical breakdown, omega = %g, rho = %g",omega,rho); + error->warning(FLERR,str); + } else if (i >= imax) { + char str[128]; + sprintf(str,"Fix acks2/reaxff BiCGStab convergence failed after %d iterations " + "at " BIGINT_FORMAT " step",i,update->ntimestep); + error->warning(FLERR,str); + } + } + + return i; +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::sparse_matvec_acks2(sparse_matrix *H, sparse_matrix *X, double *x, double *b) +{ + int i, j, itr_j; + int ii; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + b[i] = eta[atom->type[i]] * x[i]; + b[NN + i] = X_diag[i] * x[NN + i]; + } + } + + for (ii = nn; ii < NN; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + b[i] = 0; + b[NN + i] = 0; + } + // last two rows + b[2*NN] = 0; + b[2*NN + 1] = 0; + + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + // H Matrix + for (itr_j=H->firstnbr[i]; itr_jfirstnbr[i]+H->numnbrs[i]; itr_j++) { + j = H->jlist[itr_j]; + b[i] += H->val[itr_j] * x[j]; + b[j] += H->val[itr_j] * x[i]; + } + + // X Matrix + for (itr_j=X->firstnbr[i]; itr_jfirstnbr[i]+X->numnbrs[i]; itr_j++) { + j = X->jlist[itr_j]; + b[NN + i] += X->val[itr_j] * x[NN + j]; + b[NN + j] += X->val[itr_j] * x[NN + i]; + } + + // Identity Matrix + b[NN + i] += x[i]; + b[i] += x[NN + i]; + + // Second-to-last row/column + b[2*NN] += x[NN + i]; + b[NN + i] += x[2*NN]; + + // Last row/column + b[2*NN + 1] += x[i]; + b[i] += x[2*NN + 1]; + } + } + +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::calculate_Q() +{ + int i, k; + + for (int ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + + /* backup s */ + for (k = nprev-1; k > 0; --k) { + s_hist[i][k] = s_hist[i][k-1]; + s_hist_X[i][k] = s_hist_X[i][k-1]; + } + s_hist[i][0] = s[i]; + s_hist_X[i][0] = s[NN+i]; + } + } + // last two rows + if (comm->me == 0) { + for (int i = 0; i < 2; ++i) { + for (k = nprev-1; k > 0; --k) + s_hist_last[i][k] = s_hist_last[i][k-1]; + s_hist_last[i][0] = s[2*NN+i]; + } + } + + pack_flag = 2; + comm->forward_comm_fix(this); //Dist_vector(s); + + for (int ii = 0; ii < NN; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) + atom->q[i] = s[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +int FixACKS2ReaxFF::pack_forward_comm(int n, int *list, double *buf, + int /*pbc_flag*/, int * /*pbc*/) +{ + int m = 0; + + if (pack_flag == 1) { + for(int i = 0; i < n; i++) { + int j = list[i]; + buf[m++] = d[j]; + buf[m++] = d[NN+j]; + } + } else if (pack_flag == 2) { + for(int i = 0; i < n; i++) { + int j = list[i]; + buf[m++] = s[j]; + buf[m++] = s[NN+j]; + } + } else if (pack_flag == 3) { + for(int i = 0; i < n; i++) { + int j = list[i]; + buf[m++] = q_hat[j]; + buf[m++] = q_hat[NN+j]; + } + } + return m; +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::unpack_forward_comm(int n, int first, double *buf) +{ + int i, m; + + int last = first + n; + m = 0; + + if (pack_flag == 1) { + for(i = first; i < last; i++) { + d[i] = buf[m++]; + d[NN+i] = buf[m++]; + } + } else if (pack_flag == 2) { + for(i = first; i < last; i++) { + s[i] = buf[m++]; + s[NN+i] = buf[m++]; + } + } else if (pack_flag == 3) { + for(i = first; i < last; i++) { + q_hat[i] = buf[m++]; + q_hat[NN+i] = buf[m++]; + } + } +} + +/* ---------------------------------------------------------------------- */ + +int FixACKS2ReaxFF::pack_reverse_comm(int n, int first, double *buf) +{ + int i, m; + m = 0; + int last = first + n; + + if (pack_flag == 1) { + for(i = first; i < last; i++) { + buf[m++] = d[i]; + buf[m++] = d[NN+i]; + } + } else if (pack_flag == 2) { + for(i = first; i < last; i++) { + buf[m++] = z[i]; + buf[m++] = z[NN+i]; + } + } else if (pack_flag == 3) { + for(i = first; i < last; i++) { + buf[m++] = y[i]; + buf[m++] = y[NN+i]; + } + } else if (pack_flag == 4) { + for(i = first; i < last; i++) + buf[m++] = X_diag[i]; + } + + return m; +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::unpack_reverse_comm(int n, int *list, double *buf) +{ + int j; + int m = 0; + if (pack_flag == 1) { + for(int i = 0; i < n; i++) { + j = list[i]; + d[j] += buf[m++]; + d[NN+j] += buf[m++]; + } + } else if (pack_flag == 2) { + for(int i = 0; i < n; i++) { + j = list[i]; + z[j] += buf[m++]; + z[NN+j] += buf[m++]; + } + } else if (pack_flag == 3) { + for(int i = 0; i < n; i++) { + j = list[i]; + y[j] += buf[m++]; + y[NN+j] += buf[m++]; + } + } else if (pack_flag == 4) { + for(int i = 0; i < n; i++) { + j = list[i]; + X_diag[j] += buf[m++]; + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 broadcasts last two rows of vector to everyone else +------------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::more_forward_comm(double *vec) +{ + MPI_Bcast(&vec[2*NN],2,MPI_DOUBLE,0,world); +} + +/* ---------------------------------------------------------------------- + reduce last two rows of vector and give to proc 0 +------------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::more_reverse_comm(double *vec) +{ + if (comm->me == 0) + MPI_Reduce(MPI_IN_PLACE,&vec[2*NN],2,MPI_DOUBLE,MPI_SUM,0,world); + else + MPI_Reduce(&vec[2*NN],NULL,2,MPI_DOUBLE,MPI_SUM,0,world); +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based arrays +------------------------------------------------------------------------- */ + +double FixACKS2ReaxFF::memory_usage() +{ + double bytes; + + int size = 2*nmax + 2; + + bytes = size*nprev * sizeof(double); // s_hist + bytes += nmax*4 * sizeof(double); // storage + bytes += size*11 * sizeof(double); // storage + bytes += n_cap*4 * sizeof(int); // matrix... + bytes += m_cap*2 * sizeof(int); + bytes += m_cap*2 * sizeof(double); + + return bytes; +} + +/* ---------------------------------------------------------------------- + allocate solution history array +------------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::grow_arrays(int nmax) +{ + memory->grow(s_hist,nmax,nprev,"acks2:s_hist"); + memory->grow(s_hist_X,nmax,nprev,"acks2:s_hist_X"); +} + +/* ---------------------------------------------------------------------- + copy values within solution history array +------------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::copy_arrays(int i, int j, int /*delflag*/) +{ + for (int m = 0; m < nprev; m++) { + s_hist[j][m] = s_hist[i][m]; + s_hist_X[j][m] = s_hist_X[i][m]; + } +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based array for exchange with another proc +------------------------------------------------------------------------- */ + +int FixACKS2ReaxFF::pack_exchange(int i, double *buf) +{ + for (int m = 0; m < nprev; m++) buf[m] = s_hist[i][m]; + for (int m = 0; m < nprev; m++) buf[nprev+m] = s_hist_X[i][m]; + return nprev*2; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based array from exchange with another proc +------------------------------------------------------------------------- */ + +int FixACKS2ReaxFF::unpack_exchange(int nlocal, double *buf) +{ + for (int m = 0; m < nprev; m++) s_hist[nlocal][m] = buf[m]; + for (int m = 0; m < nprev; m++) s_hist_X[nlocal][m] = buf[nprev+m]; + return nprev*2; +} + +/* ---------------------------------------------------------------------- */ + +double FixACKS2ReaxFF::parallel_norm(double *v, int n) +{ + int i; + double my_sum, norm_sqr; + + int ii; + + my_sum = 0.0; + norm_sqr = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + my_sum += SQR(v[i]); + my_sum += SQR(v[NN+i]); + } + } + + // last two rows + if (comm->me == 0) { + my_sum += SQR(v[2*NN]); + my_sum += SQR(v[2*NN + 1]); + } + + MPI_Allreduce(&my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world); + + return sqrt(norm_sqr); +} + +/* ---------------------------------------------------------------------- */ + +double FixACKS2ReaxFF::parallel_dot(double *v1, double *v2, int n) +{ + int i; + double my_dot, res; + + int ii; + + my_dot = 0.0; + res = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + my_dot += v1[i] * v2[i]; + my_dot += v1[NN+i] * v2[NN+i]; + } + } + + // last two rows + if (comm->me == 0) { + my_dot += v1[2*NN] * v2[2*NN]; + my_dot += v1[2*NN + 1] * v2[2*NN + 1]; + } + + MPI_Allreduce(&my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world); + + return res; +} + +/* ---------------------------------------------------------------------- */ + +double FixACKS2ReaxFF::parallel_vector_acc(double *v, int n) +{ + int i; + double my_acc, res; + + int ii; + + my_acc = 0.0; + res = 0.0; + for (ii = 0; ii < n; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + my_acc += v[i]; + my_acc += v[NN+i]; + } + } + + // last two rows + if (comm->me == 0) { + my_acc += v[2*NN]; + my_acc += v[2*NN + 1]; + } + + MPI_Allreduce(&my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world); + + return res; +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::vector_sum(double* dest, double c, double* v, + double d, double* y, int k) +{ + int kk; + + for (--k; k>=0; --k) { + kk = ilist[k]; + if (atom->mask[kk] & groupbit) { + dest[kk] = c * v[kk] + d * y[kk]; + dest[NN + kk] = c * v[NN + kk] + d * y[NN + kk]; + } + } + + // last two rows + if (comm->me == 0) { + dest[2*NN] = c * v[2*NN] + d * y[2*NN]; + dest[2*NN + 1] = c * v[2*NN + 1] + d * y[2*NN + 1]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::vector_add(double* dest, double c, double* v, int k) +{ + int kk; + + for (--k; k>=0; --k) { + kk = ilist[k]; + if (atom->mask[kk] & groupbit) { + dest[kk] += c * v[kk]; + dest[NN + kk] += c * v[NN + kk]; + } + } + + // last two rows + if (comm->me == 0) { + dest[2*NN] += c * v[2*NN]; + dest[2*NN + 1] += c * v[2*NN + 1]; + } +} + + +/* ---------------------------------------------------------------------- */ + +void FixACKS2ReaxFF::vector_copy(double* dest, double* v, int k) +{ + int kk; + + for (--k; k>=0; --k) { + kk = ilist[k]; + if (atom->mask[kk] & groupbit) { + dest[kk] = v[kk]; + dest[NN + kk] = v[NN + kk]; + } + } + + // last two rows + if (comm->me == 0) { + dest[2*NN] = v[2*NN]; + dest[2*NN + 1] = v[2*NN + 1]; + } +} + diff --git a/src/REAXFF/fix_acks2_reaxff.h b/src/REAXFF/fix_acks2_reaxff.h new file mode 100644 index 0000000000..221c751bb2 --- /dev/null +++ b/src/REAXFF/fix_acks2_reaxff.h @@ -0,0 +1,90 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(acks2/reax,FixACKS2ReaxFF) +FixStyle(acks2/reaxff,FixACKS2ReaxFF) + +#else + +#ifndef LMP_FIX_ACKS2_REAXFF_H +#define LMP_FIX_ACKS2_REAXFF_H + +#include "fix_qeq_reaxff.h" + +namespace LAMMPS_NS { + +class FixACKS2ReaxFF : public FixQEqReaxFF { + public: + FixACKS2ReaxFF(class LAMMPS *, int, char **); + virtual ~FixACKS2ReaxFF(); + void post_constructor(); + virtual void init(); + void init_storage(); + virtual void pre_force(int); + + double* get_s() {return s;} + + protected: + double **s_hist_X,**s_hist_last; + + double *bcut_acks2,bond_softness,**bcut; // acks2 parameters + + sparse_matrix X; + double *Xdia_inv; + double *X_diag; + + //BiCGStab storage + double *g, *q_hat, *r_hat, *y, *z; + + void pertype_parameters(char*); + void init_bondcut(); + void allocate_storage(); + void deallocate_storage(); + void allocate_matrix(); + void deallocate_matrix(); + + void init_matvec(); + void compute_X(); + double calculate_X(double,double); + void calculate_Q(); + + int BiCGStab(double*,double*); + void sparse_matvec_acks2(sparse_matrix*,sparse_matrix*,double*,double*); + + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + void more_forward_comm(double *); + void more_reverse_comm(double *); + double memory_usage(); + virtual void grow_arrays(int); + virtual void copy_arrays(int, int, int); + virtual int pack_exchange(int, double *); + virtual int unpack_exchange(int, double *); + + double parallel_norm( double*, int ); + double parallel_dot( double*, double*, int ); + double parallel_vector_acc( double*, int ); + + void vector_sum(double*,double,double*,double,double*,int); + void vector_add(double*, double, double*,int); + void vector_copy(double*, double*,int); +}; + +} + +#endif +#endif diff --git a/src/REAXFF/fix_qeq_reaxff.cpp b/src/REAXFF/fix_qeq_reaxff.cpp index 06c9414fbd..55d7958e07 100644 --- a/src/REAXFF/fix_qeq_reaxff.cpp +++ b/src/REAXFF/fix_qeq_reaxff.cpp @@ -25,6 +25,7 @@ #include "citeme.h" #include "comm.h" #include "error.h" +#include "fix_efield.h" #include "force.h" #include "group.h" #include "memory.h" @@ -97,10 +98,10 @@ FixQEqReaxFF::FixQEqReaxFF(LAMMPS *lmp, int narg, char **arg) : else if (strcmp(arg[iarg],"nowarn") == 0) maxwarn = 0; else if (strcmp(arg[iarg],"maxiter") == 0) { if (iarg+1 > narg-1) - error->all(FLERR,"Illegal fix qeq/reaxff command"); + error->all(FLERR,fmt::format("Illegal fix {} command", style)); imax = utils::numeric(FLERR,arg[iarg+1],false,lmp); iarg++; - } else error->all(FLERR,"Illegal fix qeq/reaxff command"); + } else error->all(FLERR,fmt::format("Illegal fix {} command", style)); iarg++; } shld = nullptr; @@ -115,6 +116,7 @@ FixQEqReaxFF::FixQEqReaxFF(LAMMPS *lmp, int narg, char **arg) : Hdia_inv = nullptr; b_s = nullptr; + chi_field = nullptr; b_t = nullptr; b_prc = nullptr; b_prm = nullptr; @@ -271,6 +273,7 @@ void FixQEqReaxFF::allocate_storage() memory->create(Hdia_inv,nmax,"qeq:Hdia_inv"); memory->create(b_s,nmax,"qeq:b_s"); + memory->create(chi_field,nmax,"qeq:chi_field"); memory->create(b_t,nmax,"qeq:b_t"); memory->create(b_prc,nmax,"qeq:b_prc"); memory->create(b_prm,nmax,"qeq:b_prm"); @@ -297,6 +300,7 @@ void FixQEqReaxFF::deallocate_storage() memory->destroy(b_t); memory->destroy(b_prc); memory->destroy(b_prm); + memory->destroy(chi_field); memory->destroy(p); memory->destroy(q); @@ -378,6 +382,11 @@ void FixQEqReaxFF::init() if (group->count(igroup) == 0) error->all(FLERR,"Fix qeq/reaxff group has no atoms"); + field_flag = 0; + for (int n = 0; n < modify->nfix; n++) + if (utils::strmatch(modify->fix[n]->style,"^efield")) + field_flag = 1; + // need a half neighbor list w/ Newton off and ghost neighbors // built whenever re-neighboring occurs @@ -502,22 +511,16 @@ void FixQEqReaxFF::min_setup_pre_force(int vflag) void FixQEqReaxFF::init_storage() { - int NN; - int *ilist; - - if (reaxff) { - NN = reaxff->list->inum + reaxff->list->gnum; - ilist = reaxff->list->ilist; - } else { - NN = list->inum + list->gnum; - ilist = list->ilist; - } + if (field_flag) + get_chi_field(); for (int ii = 0; ii < NN; ii++) { int i = ilist[ii]; if (atom->mask[i] & groupbit) { Hdia_inv[i] = 1. / eta[atom->type[i]]; b_s[i] = -chi[atom->type[i]]; + if (field_flag) + b_s[i] -= chi_field[i]; b_t[i] = -1.0; b_prc[i] = 0; b_prm[i] = 0; @@ -555,6 +558,9 @@ void FixQEqReaxFF::pre_force(int /*vflag*/) if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) reallocate_matrix(); + if (field_flag) + get_chi_field(); + init_matvec(); matvecs_s = CG(b_s, s); // CG on s - parallel @@ -594,6 +600,8 @@ void FixQEqReaxFF::init_matvec() /* init pre-conditioner for H and init solution vectors */ Hdia_inv[i] = 1. / eta[atom->type[i]]; b_s[i] = -chi[atom->type[i]]; + if (field_flag) + b_s[i] -= chi_field[i]; b_t[i] = -1.0; /* quadratic extrapolation for s & t from previous solutions */ @@ -1060,3 +1068,29 @@ void FixQEqReaxFF::vector_add(double* dest, double c, double* v, int k) } } +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxFF::get_chi_field() +{ + int nlocal = atom->nlocal; + + memset(&chi_field[0],0.0,atom->nmax*sizeof(double)); + + if (!(strcmp(update->unit_style,"real") == 0)) + error->all(FLERR,"Must use unit_style real with fix qeq/reax and external fields"); + + double factor = 1.0/force->qe2f; + + // loop over all fixes, find fix efield + + for (int n = 0; n < modify->nfix; n++) { + if (utils::strmatch(modify->fix[n]->style,"^efield")) { + + FixEfield* fix_efield = (FixEfield*) modify->fix[n]; + double* field_energy = fix_efield->get_energy(); // Real units of kcal/mol/angstrom, need to convert to eV + + for (int i = 0; i < nlocal; i++) + chi_field[i] += field_energy[i]*factor; + } + } +} diff --git a/src/REAXFF/fix_qeq_reaxff.h b/src/REAXFF/fix_qeq_reaxff.h index ab037aea6b..cc5bd09791 100644 --- a/src/REAXFF/fix_qeq_reaxff.h +++ b/src/REAXFF/fix_qeq_reaxff.h @@ -45,7 +45,7 @@ class FixQEqReaxFF : public Fix { virtual void init(); void init_list(int, class NeighList *); virtual void init_storage(); - void setup_pre_force(int); + virtual void setup_pre_force(int); virtual void pre_force(int); void setup_pre_force_respa(int, int); @@ -92,6 +92,7 @@ class FixQEqReaxFF : public Fix { double *Hdia_inv; double *b_s, *b_t; double *b_prc, *b_prm; + double *chi_field; //CG storage double *p, *q, *r, *d; @@ -105,7 +106,7 @@ class FixQEqReaxFF : public Fix { virtual void deallocate_storage(); void reallocate_storage(); virtual void allocate_matrix(); - void deallocate_matrix(); + virtual void deallocate_matrix(); void reallocate_matrix(); virtual void init_matvec(); @@ -134,9 +135,13 @@ class FixQEqReaxFF : public Fix { virtual void vector_sum(double *, double, double *, double, double *, int); virtual void vector_add(double *, double, double *, int); + virtual void get_chi_field(); + // dual CG support int dual_enabled; // 0: Original, separate s & t optimization; 1: dual optimization int matvecs_s, matvecs_t; // Iteration count for each system + + int field_flag; // 1: field enabled }; } // namespace LAMMPS_NS diff --git a/src/REAXFF/pair_reaxff.cpp b/src/REAXFF/pair_reaxff.cpp index 758ee70ab7..27c85ffb1c 100644 --- a/src/REAXFF/pair_reaxff.cpp +++ b/src/REAXFF/pair_reaxff.cpp @@ -36,6 +36,7 @@ #include "neigh_request.h" #include "neighbor.h" #include "update.h" +#include "fix_acks2_reaxff.h" #include #include @@ -155,6 +156,7 @@ PairReaxFF::~PairReaxFF() delete [] chi; delete [] eta; delete [] gamma; + delete [] bcut_acks2; } memory->destroy(tmpid); @@ -179,6 +181,7 @@ void PairReaxFF::allocate() chi = new double[n+1]; eta = new double[n+1]; gamma = new double[n+1]; + bcut_acks2 = new double[n+1]; } /* ---------------------------------------------------------------------- */ @@ -337,9 +340,19 @@ void PairReaxFF::init_style() error->all(FLERR,"Pair style reaxff requires atom attribute q"); bool have_qeq = ((modify->find_fix_by_style("^qeq/reax") != -1) - || (modify->find_fix_by_style("^qeq/shielded") != -1)); + || (modify->find_fix_by_style("^qeq/shielded") != -1) + || (modify->find_fix_by_style("^acks2/reax") != -1)); if (!have_qeq && qeqflag == 1) - error->all(FLERR,"Pair reaxff requires use of fix qeq/reaxff or qeq/shielded"); + error->all(FLERR,"Pair reax/c requires use of fix qeq/reax or qeq/shielded" + " or fix acks2/reax"); + + int have_acks2 = (modify->find_fix_by_style("^acks2/reax") != -1); + api->system->acks2_flag = have_acks2; + if (api->system->acks2_flag) { + int ifix = modify->find_fix_by_style("^acks2/reax"); + FixACKS2ReaxFF* acks2_fix = (FixACKS2ReaxFF*) modify->fix[ifix]; + api->workspace->s = acks2_fix->get_s(); + } api->system->n = atom->nlocal; // my atoms api->system->N = atom->nlocal + atom->nghost; // mine + ghosts @@ -466,6 +479,12 @@ void PairReaxFF::compute(int eflag, int vflag) api->system->N = atom->nlocal + atom->nghost; // mine + ghosts api->system->bigN = static_cast (atom->natoms); // all atoms in the system + if (api->system->acks2_flag) { + int ifix = modify->find_fix_by_style("^acks2/reax"); + FixACKS2ReaxFF* acks2_fix = (FixACKS2ReaxFF*) modify->fix[ifix]; + api->workspace->s = acks2_fix->get_s(); + } + // setup data structures setup(); @@ -731,6 +750,16 @@ void *PairReaxFF::extract(const char *str, int &dim) if (map[i] >= 0) gamma[i] = api->system->reax_param.sbp[map[i]].gamma; else gamma[i] = 0.0; return (void *) gamma; + } + if (strcmp(str,"bcut_acks2") == 0 && bcut_acks2) { + for (int i = 1; i <= atom->ntypes; i++) + if (map[i] >= 0) bcut_acks2[i] = api->system->reax_param.sbp[map[i]].bcut_acks2; + else bcut_acks2[i] = 0.0; + return (void *) bcut_acks2; + } + if (strcmp(str,"bond_softness") == 0) { + double* bond_softness = &api->system->reax_param.gp.l[34]; + return (void *) bond_softness; } return nullptr; } diff --git a/src/REAXFF/pair_reaxff.h b/src/REAXFF/pair_reaxff.h index 846d4e8408..23b2ae894a 100644 --- a/src/REAXFF/pair_reaxff.h +++ b/src/REAXFF/pair_reaxff.h @@ -63,7 +63,7 @@ class PairReaxFF : public Pair { double cutmax; class FixReaxFF *fix_reaxff; - double *chi, *eta, *gamma; + double *chi, *eta, *gamma, *bcut_acks2; int qeqflag; int setup_flag; int firstwarn; diff --git a/src/REAXFF/reaxff_api.h b/src/REAXFF/reaxff_api.h index 68eb7d25a2..5c3221c188 100644 --- a/src/REAXFF/reaxff_api.h +++ b/src/REAXFF/reaxff_api.h @@ -125,7 +125,7 @@ extern void Atom_Energy(reax_system *, control_params *, simulation_data *, stor // nonbonded -extern void Compute_Polarization_Energy(reax_system *, simulation_data *); +extern void Compute_Polarization_Energy(reax_system *, simulation_data *, storage *); extern void vdW_Coulomb_Energy(reax_system *, control_params *, simulation_data *, storage *, reax_list **); extern void Tabulated_vdW_Coulomb_Energy(reax_system *, control_params *, simulation_data *, diff --git a/src/REAXFF/reaxff_ffield.cpp b/src/REAXFF/reaxff_ffield.cpp index f43ab440ff..4c3068a089 100644 --- a/src/REAXFF/reaxff_ffield.cpp +++ b/src/REAXFF/reaxff_ffield.cpp @@ -204,6 +204,7 @@ namespace ReaxFF { sbp[i].b_o_131 = values.next_double(); sbp[i].b_o_132 = values.next_double(); sbp[i].b_o_133 = values.next_double(); + sbp[i].bcut_acks2 = values.next_double(); // line four diff --git a/src/REAXFF/reaxff_nonbonded.cpp b/src/REAXFF/reaxff_nonbonded.cpp index 8a1e41da29..f7ca9e750d 100644 --- a/src/REAXFF/reaxff_nonbonded.cpp +++ b/src/REAXFF/reaxff_nonbonded.cpp @@ -32,7 +32,7 @@ #include namespace ReaxFF { - void Compute_Polarization_Energy(reax_system *system, simulation_data *data) + void Compute_Polarization_Energy(reax_system *system, simulation_data *data, storage *workspace) { int i, type_i; double q, en_tmp; @@ -45,6 +45,12 @@ namespace ReaxFF { en_tmp = KCALpMOL_to_EV * (system->reax_param.sbp[type_i].chi * q + (system->reax_param.sbp[type_i].eta / 2.) * SQR(q)); + + if (system->acks2_flag) { + /* energy due to coupling with kinetic energy potential */ + en_tmp += KCALpMOL_to_EV * q * workspace->s[ system->N + i ]; + } + data->my_en.e_pol += en_tmp; /* tally energy into global or per-atom energy accumulators */ @@ -67,6 +73,7 @@ namespace ReaxFF { double dr3gamij_1, dr3gamij_3; double e_ele, e_vdW, e_core, SMALL = 0.0001; double e_lg, de_lg, r_ij5, r_ij6, re6; + double xcut, bond_softness, d_bond_softness, d, effpot_diff; two_body_parameters *twbp; far_neighbor_data *nbr_pj; reax_list *far_nbrs; @@ -207,7 +214,83 @@ namespace ReaxFF { } } - Compute_Polarization_Energy(system, data); + /* contribution to energy and gradients (atoms and cell) + * due to geometry-dependent terms in the ACKS2 + * kinetic energy */ + if (system->acks2_flag) + for( i = 0; i < natoms; ++i ) { + if (system->my_atoms[i].type < 0) continue; + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + orig_i = system->my_atoms[i].orig_id; + + for( pj = start_i; pj < end_i; ++pj ) { + nbr_pj = &(far_nbrs->select.far_nbr_list[pj]); + j = nbr_pj->nbr; + if (system->my_atoms[j].type < 0) continue; + orig_j = system->my_atoms[j].orig_id; + + flag = 0; + + /* kinetic energy terms */ + double xcut = 0.5 * ( system->reax_param.sbp[ system->my_atoms[i].type ].bcut_acks2 + + system->reax_param.sbp[ system->my_atoms[j].type ].bcut_acks2 ); + + if(nbr_pj->d <= xcut) { + if (j < natoms) flag = 1; + else if (orig_i < orig_j) flag = 1; + else if (orig_i == orig_j) { + if (nbr_pj->dvec[2] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[2]) < SMALL) { + if (nbr_pj->dvec[1] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[1]) < SMALL && nbr_pj->dvec[0] > SMALL) + flag = 1; + } + } + } + + if (flag) { + + d = nbr_pj->d / xcut; + bond_softness = system->reax_param.gp.l[34] * pow( d, 3.0 ) + * pow( 1.0 - d, 6.0 ); + + if ( bond_softness > 0.0 ) + { + /* Coulombic energy contribution */ + effpot_diff = workspace->s[system->N + i] + - workspace->s[system->N + j]; + e_ele = -0.5 * KCALpMOL_to_EV * bond_softness + * SQR( effpot_diff ); + + data->my_en.e_ele += e_ele; + + /* forces contribution */ + d_bond_softness = system->reax_param.gp.l[34] + * 3.0 / xcut * pow( d, 2.0 ) + * pow( 1.0 - d, 5.0 ) * (1.0 - 3.0 * d); + d_bond_softness = -0.5 * d_bond_softness + * SQR( effpot_diff ); + d_bond_softness = KCALpMOL_to_EV * d_bond_softness + / nbr_pj->d; + + /* tally into per-atom energy */ + if (system->pair_ptr->evflag || system->pair_ptr->vflag_atom) { + rvec_ScaledSum( delij, 1., system->my_atoms[i].x, + -1., system->my_atoms[j].x ); + f_tmp = -d_bond_softness; + system->pair_ptr->ev_tally(i,j,natoms,1,0.0,e_ele, + f_tmp,delij[0],delij[1],delij[2]); + } + + rvec_ScaledAdd( workspace->f[i], -d_bond_softness, nbr_pj->dvec ); + rvec_ScaledAdd( workspace->f[j], d_bond_softness, nbr_pj->dvec ); + } + } + } + } + + Compute_Polarization_Energy( system, data, workspace ); } void Tabulated_vdW_Coulomb_Energy(reax_system *system, control_params *control, @@ -222,6 +305,7 @@ namespace ReaxFF { double e_vdW, e_ele; double CEvd, CEclmb, SMALL = 0.0001; double f_tmp, delij[3]; + double xcut, bond_softness, d_bond_softness, d, effpot_diff; far_neighbor_data *nbr_pj; reax_list *far_nbrs; @@ -306,7 +390,83 @@ namespace ReaxFF { } } - Compute_Polarization_Energy(system, data); + /* contribution to energy and gradients (atoms and cell) + * due to geometry-dependent terms in the ACKS2 + * kinetic energy */ + if (system->acks2_flag) + for( i = 0; i < natoms; ++i ) { + if (system->my_atoms[i].type < 0) continue; + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + orig_i = system->my_atoms[i].orig_id; + + for( pj = start_i; pj < end_i; ++pj ) { + nbr_pj = &(far_nbrs->select.far_nbr_list[pj]); + j = nbr_pj->nbr; + if (system->my_atoms[j].type < 0) continue; + orig_j = system->my_atoms[j].orig_id; + + flag = 0; + + /* kinetic energy terms */ + xcut = 0.5 * ( system->reax_param.sbp[ system->my_atoms[i].type ].bcut_acks2 + + system->reax_param.sbp[ system->my_atoms[j].type ].bcut_acks2 ); + + if(nbr_pj->d <= xcut) { + if (j < natoms) flag = 1; + else if (orig_i < orig_j) flag = 1; + else if (orig_i == orig_j) { + if (nbr_pj->dvec[2] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[2]) < SMALL) { + if (nbr_pj->dvec[1] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[1]) < SMALL && nbr_pj->dvec[0] > SMALL) + flag = 1; + } + } + } + + if (flag) { + + d = nbr_pj->d / xcut; + bond_softness = system->reax_param.gp.l[34] * pow( d, 3.0 ) + * pow( 1.0 - d, 6.0 ); + + if ( bond_softness > 0.0 ) + { + /* Coulombic energy contribution */ + effpot_diff = workspace->s[system->N + i] + - workspace->s[system->N + j]; + e_ele = -0.5 * KCALpMOL_to_EV * bond_softness + * SQR( effpot_diff ); + + data->my_en.e_ele += e_ele; + + /* forces contribution */ + d_bond_softness = system->reax_param.gp.l[34] + * 3.0 / xcut * pow( d, 2.0 ) + * pow( 1.0 - d, 5.0 ) * (1.0 - 3.0 * d); + d_bond_softness = -0.5 * d_bond_softness + * SQR( effpot_diff ); + d_bond_softness = KCALpMOL_to_EV * d_bond_softness + / nbr_pj->d; + + /* tally into per-atom energy */ + if (system->pair_ptr->evflag || system->pair_ptr->vflag_atom) { + rvec_ScaledSum( delij, 1., system->my_atoms[i].x, + -1., system->my_atoms[j].x ); + f_tmp = -d_bond_softness; + system->pair_ptr->ev_tally(i,j,natoms,1,0.0,e_ele, + f_tmp,delij[0],delij[1],delij[2]); + } + + rvec_ScaledAdd( workspace->f[i], -d_bond_softness, nbr_pj->dvec ); + rvec_ScaledAdd( workspace->f[j], d_bond_softness, nbr_pj->dvec ); + } + } + } + } + + Compute_Polarization_Energy(system, data, workspace); } void LR_vdW_Coulomb(reax_system *system, storage *workspace, diff --git a/src/REAXFF/reaxff_types.h b/src/REAXFF/reaxff_types.h index 6bd752f58a..57df7fe975 100644 --- a/src/REAXFF/reaxff_types.h +++ b/src/REAXFF/reaxff_types.h @@ -77,6 +77,7 @@ struct single_body_parameters { double b_o_131; double b_o_132; double b_o_133; + double bcut_acks2; // ACKS2 bond cutoff /* Line four in the field file */ double p_ovun2; @@ -212,6 +213,7 @@ struct reax_system { LR_lookup_table **LR; int omp_active; + int acks2_flag; }; /* system control parameters */ @@ -340,6 +342,9 @@ struct storage { double *CdDeltaReduction; int *valence_angle_atom_myoffset; + /* acks2 */ + double *s; + reallocate_data realloc; }; diff --git a/src/fix_efield.cpp b/src/fix_efield.cpp index 98cecd8a01..4009e3de3d 100644 --- a/src/fix_efield.cpp +++ b/src/fix_efield.cpp @@ -42,7 +42,7 @@ enum{NONE,CONSTANT,EQUAL,ATOM}; FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), xstr(nullptr), ystr(nullptr), zstr(nullptr), - estr(nullptr), idregion(nullptr), efield(nullptr) + estr(nullptr), idregion(nullptr), efield(nullptr), energy(nullptr) { if (narg < 6) error->all(FLERR,"Illegal fix efield command"); @@ -111,6 +111,8 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) : maxatom = atom->nmax; memory->create(efield,maxatom,4,"efield:efield"); + + maxatom_energy = 0; } /* ---------------------------------------------------------------------- */ @@ -123,6 +125,7 @@ FixEfield::~FixEfield() delete [] estr; delete [] idregion; memory->destroy(efield); + memory->destroy(energy); } /* ---------------------------------------------------------------------- */ @@ -449,3 +452,71 @@ double FixEfield::compute_vector(int n) } return fsum_all[n+1]; } + +/* ---------------------------------------------------------------------- + get E +------------------------------------------------------------------------- */ + +double* FixEfield::get_energy() +{ + double **x = atom->x; + double *q = atom->q; + int *mask = atom->mask; + imageint *image = atom->image; + + int nlocal = atom->nlocal; + + // reallocate energy array if necessary + + if (atom->nmax > maxatom_energy) { + maxatom_energy = atom->nmax; + memory->destroy(energy); + memory->create(energy,maxatom_energy,"efield:energy"); + } + memset(&energy[0],0.0,maxatom_energy*sizeof(double)); + + // update region if necessary + + Region *region = NULL; + if (iregion >= 0) { + region = domain->regions[iregion]; + region->prematch(); + } + + int warn_flag_local = 0; + + // constant efield + + if (varflag == CONSTANT) { + double unwrap[3]; + + // charge interactions + // force = qE, potential energy = F dot x in unwrapped coords + + if (qflag) { + for (int i = 0; i < nlocal; i++) { + if (mask[i] & groupbit) { + if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue; + const double fx = ex; + const double fy = ey; + const double fz = ez; + + domain->unmap(x[i],image[i],unwrap); + energy[i] -= fx*unwrap[0] + fy*unwrap[1] + fz*unwrap[2]; + if (fabs(fx*(x[i][0]-unwrap[0])) + fabs(fy*(x[i][1]-unwrap[1])) + + fabs(fz*(x[i][2]-unwrap[2])) > 0.0) + warn_flag_local = 1; + } + } + } + } else { + error->all(FLERR,"Cannot yet use fix qeq/reaxff with variable efield"); + } + + int warn_flag; + MPI_Allreduce(&warn_flag_local,&warn_flag,1,MPI_INT,MPI_SUM,world); + if (warn_flag && comm->me == 0) + error->warning(FLERR,"Using non-zero image flags in field direction with fix qeq/reaxff"); + + return energy; +} diff --git a/src/fix_efield.h b/src/fix_efield.h index c9496e29d2..86ccdae7e5 100644 --- a/src/fix_efield.h +++ b/src/fix_efield.h @@ -38,6 +38,7 @@ class FixEfield : public Fix { double memory_usage(); double compute_scalar(); double compute_vector(int); + double* get_energy(); private: double ex, ey, ez; @@ -49,8 +50,8 @@ class FixEfield : public Fix { double qe2f; int qflag, muflag; - int maxatom; - double **efield; + int maxatom,maxatom_energy; + double **efield,*energy; int force_flag; double fsum[4], fsum_all[4];