From bee5afcd0832cba5468696c8a8a5fa5cc7c4ba1f Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 21 Apr 2023 00:22:36 -0500 Subject: [PATCH 001/107] Added yukawa/colloid/kk --- src/KOKKOS/Install.sh | 2 + src/KOKKOS/pair_yukawa_colloid_kokkos.cpp | 271 ++++++++++++++++++++++ src/KOKKOS/pair_yukawa_colloid_kokkos.h | 121 ++++++++++ 3 files changed, 394 insertions(+) create mode 100644 src/KOKKOS/pair_yukawa_colloid_kokkos.cpp create mode 100644 src/KOKKOS/pair_yukawa_colloid_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ede766cbf8..766daaff19 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -361,6 +361,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp action pair_vashishta_kokkos.h pair_vashishta.h action pair_yukawa_kokkos.cpp action pair_yukawa_kokkos.h +action pair_yukawa_colloid_kokkos.cpp pair_yukawa_colloid.cpp +action pair_yukawa_colloid_kokkos.h pair_yukawa_colloid.h action pair_zbl_kokkos.cpp action pair_zbl_kokkos.h action pppm_kokkos.cpp pppm.cpp diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp new file mode 100644 index 0000000000..ca491a3800 --- /dev/null +++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp @@ -0,0 +1,271 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Nguyen (U Chicago) +------------------------------------------------------------------------- */ + +#include "pair_yukawa_colloid_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +PairYukawaColloidKokkos::PairYukawaColloidKokkos(LAMMPS *lmp) : PairYukawaColloid(lmp) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | RADIUS_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairYukawaColloidKokkos::~PairYukawaColloidKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairYukawaColloidKokkos::allocate() +{ + PairYukawaColloid::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + k_params = Kokkos::DualView( + "PairYukawaColloid::params",n+1,n+1); + + params = k_params.template view(); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairYukawaColloidKokkos::init_style() +{ + PairYukawaColloid::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ +// Rewrite this. +template +double PairYukawaColloidKokkos::init_one(int i, int j) +{ + double cutone = PairYukawaColloid::init_one(i,j); + + k_params.h_view(i,j).a = a[i][j]; + k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cutsq = cutone*cutone; + k_params.h_view(j,i) = k_params.h_view(i,j); + + if (i(); + k_params.template modify(); + + return cutone; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairYukawaColloidKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + radius = atomKK->k_radius.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + + // loop over neighbors of my atoms + + EV_FLOAT ev = pair_compute,void >( + this,(NeighListKokkos*)list); + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } +} + + + +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairYukawaColloidKokkos:: +compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT radi = radius[i]; + const F_FLOAT radj = radius[j]; + const F_FLOAT rr = sqrt(rsq); + // Fetch the params either off the stack or from some mapped memory? + const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a + : params(itype,jtype).a; + + // U = a * exp(-kappa*r-(radi+radj)) / kappa + // f = a * exp(-kappa*r) + // f/r = a * exp(-kappa*r) / r + const F_FLOAT rinv = 1.0 / rr; + const F_FLOAT rinv2 = rinv*rinv; + const F_FLOAT screening = exp(-kappa*(rr-(radi+radj))); + const F_FLOAT forceyukawa = aa * screening; + const F_FLOAT fpair = forceyukawa * rinv; + + return fpair; +} + +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairYukawaColloidKokkos:: +compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT radi = radius[i]; + const F_FLOAT radj = radius[j]; + const F_FLOAT rr = sqrt(rsq); + const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a + : params(itype,jtype).a; + const F_FLOAT offset = STACKPARAMS ? m_params[itype][jtype].offset + : params(itype,jtype).offset; + + // U = a * exp(-kappa*r) / kappa + const F_FLOAT rinv = 1.0 / rr; + const F_FLOAT screening = exp(-kappa*(rr-(radi+radj))); + + return aa / kappa * screening - offset; +} + + +namespace LAMMPS_NS { +template class PairYukawaColloidKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairYukawaColloidKokkos; +#endif +} diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.h b/src/KOKKOS/pair_yukawa_colloid_kokkos.h new file mode 100644 index 0000000000..060b621a8a --- /dev/null +++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.h @@ -0,0 +1,121 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(yukawa/colloid/kk,PairYukawaColloidKokkos); +PairStyle(yukawa/colloid/kk/device,PairYukawaColloidKokkos); +PairStyle(yukawa/colloid/kk/host,PairYukawaColloidKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_YUKAWA_COLLOID_KOKKOS_H +#define LMP_PAIR_YUKAWA_COLLOID_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_yukawa_colloid.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class PairYukawaColloidKokkos : public PairYukawaColloid { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + + PairYukawaColloidKokkos(class LAMMPS *); + ~PairYukawaColloidKokkos() override; + + void compute(int, int) override; + void init_style() override; + double init_one(int,int) override; + + struct params_yukawa { + KOKKOS_INLINE_FUNCTION + params_yukawa() { cutsq=0, a = 0; offset = 0; } + KOKKOS_INLINE_FUNCTION + params_yukawa(int /*i*/) { cutsq=0, a = 0; offset = 0; } + F_FLOAT cutsq, a, offset; + }; + + + protected: + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& /*rsq*/, const int& /*i*/, const int& /*j*/, + const int& /*itype*/, const int& /*jtype*/) const { return 0; } + + + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + params_yukawa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_float_1d_randomread radius; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + int newton_pair; + double special_lj[4]; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + + + int neighflag; + int nlocal,nall,eflag,vflag; + + void allocate() override; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend EV_FLOAT pair_compute_neighlist( + PairYukawaColloidKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist( + PairYukawaColloidKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist( + PairYukawaColloidKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute( + PairYukawaColloidKokkos*,NeighListKokkos*); + friend void pair_virial_fdotr_compute(PairYukawaColloidKokkos*); + +}; + +} + +#endif +#endif + From bacfcd205093c626a30f43e157704118d001f945 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 7 Jul 2023 07:36:05 -0700 Subject: [PATCH 002/107] change triclinic logic to not depend on exact I,J atom coords --- src/npair_half_bin_newton_tri.cpp | 39 ++++++++++++++++++++++++++++--- src/npair_half_nsq_newton.cpp | 14 ++++++++++- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 88ef993a41..227a25c321 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -16,6 +16,7 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" +#include "force.h" #include "molecule.h" #include "domain.h" #include "my_page.h" @@ -36,10 +37,12 @@ NPairHalfBinNewtonTri::NPairHalfBinNewtonTri(LAMMPS *lmp) : NPair(lmp) {} void NPairHalfBinNewtonTri::build(NeighList *list) { int i,j,k,n,itype,jtype,ibin,which,imol,iatom,moltemplate; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; - + + double angstrom = force->angstrom; + double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -68,6 +71,7 @@ void NPairHalfBinNewtonTri::build(NeighList *list) n = 0; neighptr = ipage->vget(); + itag = tag[i]; itype = type[i]; xtmp = x[i][0]; ytmp = x[i][1]; @@ -87,6 +91,34 @@ void NPairHalfBinNewtonTri::build(NeighList *list) ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + + if (fabs(x[j][2]-ztmp) > angstrom) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > angstrom) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + + /* + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + */ + } + } + + /* if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { if (x[j][1] < ytmp) continue; @@ -95,7 +127,8 @@ void NPairHalfBinNewtonTri::build(NeighList *list) if (x[j][0] == xtmp && j <= i) continue; } } - + */ + jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_nsq_newton.cpp b/src/npair_half_nsq_newton.cpp index e5f3138f0a..20ffbf6977 100644 --- a/src/npair_half_nsq_newton.cpp +++ b/src/npair_half_nsq_newton.cpp @@ -16,6 +16,7 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" +#include "force.h" #include "group.h" #include "molecule.h" #include "domain.h" @@ -41,6 +42,9 @@ void NPairHalfNsqNewton::build(NeighList *list) double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; + double angstrom = force->angstrom; + int triclinic = domain->triclinic; + double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -96,7 +100,15 @@ void NPairHalfNsqNewton::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; - } else { + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > angstrom) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > angstrom) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { if (x[j][1] < ytmp) continue; From 129264aa148bb2b10ce0d6dbfda866443f58d749 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 7 Jul 2023 08:42:46 -0700 Subject: [PATCH 003/107] debugging --- src/npair_half_bin_newton_tri.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 227a25c321..00b5722673 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -92,6 +92,7 @@ void NPairHalfBinNewtonTri::build(NeighList *list) for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + /* if (j >= nlocal) { jtag = tag[j]; if (itag > jtag) { @@ -108,17 +109,15 @@ void NPairHalfBinNewtonTri::build(NeighList *list) if (x[j][0] < xtmp) continue; } - /* if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { if (x[j][1] < ytmp) continue; if (x[j][1] == ytmp && x[j][0] < xtmp) continue; } - */ } } - - /* + */ + if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { if (x[j][1] < ytmp) continue; @@ -127,7 +126,6 @@ void NPairHalfBinNewtonTri::build(NeighList *list) if (x[j][0] == xtmp && j <= i) continue; } } - */ jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; From 42f2a4b5b0e8021b01b1655b2c555cd392539c18 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 7 Jul 2023 08:58:14 -0700 Subject: [PATCH 004/107] exclude self interactions and double counting of own/own --- src/npair_half_bin_newton_tri.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 00b5722673..4cd12b2e7c 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -92,7 +92,7 @@ void NPairHalfBinNewtonTri::build(NeighList *list) for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - /* + if (j <= i) continue; if (j >= nlocal) { jtag = tag[j]; if (itag > jtag) { @@ -109,15 +109,15 @@ void NPairHalfBinNewtonTri::build(NeighList *list) if (x[j][0] < xtmp) continue; } - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp && x[j][0] < xtmp) continue; - } + //if (x[j][2] < ztmp) continue; + //if (x[j][2] == ztmp) { + // if (x[j][1] < ytmp) continue; + // if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + // } } } - */ - + + /* if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { if (x[j][1] < ytmp) continue; @@ -126,6 +126,7 @@ void NPairHalfBinNewtonTri::build(NeighList *list) if (x[j][0] == xtmp && j <= i) continue; } } + */ jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; @@ -136,6 +137,8 @@ void NPairHalfBinNewtonTri::build(NeighList *list) rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq[itype][jtype]) { + //printf("NEIGH i,j %d %d ijtag %d %d dist %g\n", + // i,j,tag[i],tag[j],sqrt(rsq)); if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); From abadf9412afb695eabeb996d507e6533ec686d07 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Sat, 8 Jul 2023 10:26:34 -0700 Subject: [PATCH 005/107] check old results --- src/npair_half_bin_newton_tri.cpp | 7 ++++--- src/npair_half_nsq_newton.cpp | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 4cd12b2e7c..61d67fed2a 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -92,6 +92,7 @@ void NPairHalfBinNewtonTri::build(NeighList *list) for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + /* if (j <= i) continue; if (j >= nlocal) { jtag = tag[j]; @@ -116,8 +117,9 @@ void NPairHalfBinNewtonTri::build(NeighList *list) // } } } - - /* + */ + + if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { if (x[j][1] < ytmp) continue; @@ -126,7 +128,6 @@ void NPairHalfBinNewtonTri::build(NeighList *list) if (x[j][0] == xtmp && j <= i) continue; } } - */ jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_nsq_newton.cpp b/src/npair_half_nsq_newton.cpp index 20ffbf6977..995c9cfbed 100644 --- a/src/npair_half_nsq_newton.cpp +++ b/src/npair_half_nsq_newton.cpp @@ -100,7 +100,8 @@ void NPairHalfNsqNewton::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; - } else if (triclinic) { + /* + } else if (triclinic) { if (fabs(x[j][2]-ztmp) > angstrom) { if (x[j][2] < ztmp) continue; } else if (fabs(x[j][1]-ytmp) > angstrom) { @@ -108,6 +109,7 @@ void NPairHalfNsqNewton::build(NeighList *list) } else { if (x[j][0] < xtmp) continue; } + */ } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { From ce1a084a0efba244310fd4d8448b726210030f8d Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Sat, 8 Jul 2023 18:31:55 -0700 Subject: [PATCH 006/107] expand stencil for triclinic neighbor build --- src/npair_half_bin_newton_tri.cpp | 15 ++++++--------- src/npair_half_nsq_newton.cpp | 10 ++++------ src/npair_halffull_newton.cpp | 15 +++++++++++++++ src/nstencil_half_bin_3d_tri.cpp | 3 ++- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 61d67fed2a..17504ac3af 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -41,7 +41,7 @@ void NPairHalfBinNewtonTri::build(NeighList *list) double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; - double angstrom = force->angstrom; + double delta = 0.01 * force->angstrom; double **x = atom->x; int *type = atom->type; @@ -92,7 +92,6 @@ void NPairHalfBinNewtonTri::build(NeighList *list) for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - /* if (j <= i) continue; if (j >= nlocal) { jtag = tag[j]; @@ -101,15 +100,13 @@ void NPairHalfBinNewtonTri::build(NeighList *list) } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; } else { - - if (fabs(x[j][2]-ztmp) > angstrom) { + if (fabs(x[j][2]-ztmp) > delta) { if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > angstrom) { + } else if (fabs(x[j][1]-ytmp) > delta) { if (x[j][1] < ytmp) continue; } else { if (x[j][0] < xtmp) continue; } - //if (x[j][2] < ztmp) continue; //if (x[j][2] == ztmp) { // if (x[j][1] < ytmp) continue; @@ -117,9 +114,8 @@ void NPairHalfBinNewtonTri::build(NeighList *list) // } } } - */ - - + + /* if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { if (x[j][1] < ytmp) continue; @@ -128,6 +124,7 @@ void NPairHalfBinNewtonTri::build(NeighList *list) if (x[j][0] == xtmp && j <= i) continue; } } + */ jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_nsq_newton.cpp b/src/npair_half_nsq_newton.cpp index 995c9cfbed..be06393f58 100644 --- a/src/npair_half_nsq_newton.cpp +++ b/src/npair_half_nsq_newton.cpp @@ -42,7 +42,7 @@ void NPairHalfNsqNewton::build(NeighList *list) double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; - double angstrom = force->angstrom; + double delta = 0.01 * force->angstrom; int triclinic = domain->triclinic; double **x = atom->x; @@ -100,16 +100,14 @@ void NPairHalfNsqNewton::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; - /* - } else if (triclinic) { - if (fabs(x[j][2]-ztmp) > angstrom) { + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > angstrom) { + } else if (fabs(x[j][1]-ytmp) > delta) { if (x[j][1] < ytmp) continue; } else { if (x[j][0] < xtmp) continue; } - */ } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { diff --git a/src/npair_halffull_newton.cpp b/src/npair_halffull_newton.cpp index 407a71e614..d1e894943c 100644 --- a/src/npair_halffull_newton.cpp +++ b/src/npair_halffull_newton.cpp @@ -14,7 +14,9 @@ #include "npair_halffull_newton.h" #include "atom.h" +#include "domain.h" #include "error.h" +#include "force.h" #include "my_page.h" #include "neigh_list.h" @@ -37,6 +39,9 @@ void NPairHalffullNewton::build(NeighList *list) int *neighptr, *jlist; double xtmp, ytmp, ztmp; + double delta = 0.01 * force->angstrom; + int triclinic = domain->triclinic; + double **x = atom->x; int nlocal = atom->nlocal; @@ -72,8 +77,17 @@ void NPairHalffullNewton::build(NeighList *list) for (jj = 0; jj < jnum; jj++) { joriginal = jlist[jj]; j = joriginal & NEIGHMASK; + if (j < nlocal) { if (i > j) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { @@ -81,6 +95,7 @@ void NPairHalffullNewton::build(NeighList *list) if (x[j][1] == ytmp && x[j][0] < xtmp) continue; } } + neighptr[n++] = joriginal; } diff --git a/src/nstencil_half_bin_3d_tri.cpp b/src/nstencil_half_bin_3d_tri.cpp index d066a24ee6..f94bfc5e63 100644 --- a/src/nstencil_half_bin_3d_tri.cpp +++ b/src/nstencil_half_bin_3d_tri.cpp @@ -29,7 +29,8 @@ void NStencilHalfBin3dTri::create() nstencil = 0; - for (k = 0; k <= sz; k++) + //for (k = 0; k <= sz; k++) + for (k = -sz; k <= sz; k++) for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) if (bin_distance(i, j, k) < cutneighmaxsq) From e3349581c757ad49a187bc86dd429abb4420dd1f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 9 Jul 2023 15:14:36 -0400 Subject: [PATCH 007/107] fix whitespace and remove debug code --- src/npair_half_bin_newton_tri.cpp | 56 +++++++++++-------------------- src/npair_half_nsq_newton.cpp | 24 ++++++------- src/npair_halffull_newton.cpp | 22 ++++++------ src/nstencil_half_bin_3d_tri.cpp | 1 - 4 files changed, 42 insertions(+), 61 deletions(-) diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 17504ac3af..2917055214 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -40,9 +40,9 @@ void NPairHalfBinNewtonTri::build(NeighList *list) tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; - - double delta = 0.01 * force->angstrom; - + + const double delta = 0.01 * force->angstrom; + double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -92,40 +92,24 @@ void NPairHalfBinNewtonTri::build(NeighList *list) for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - if (j <= i) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - //if (x[j][2] < ztmp) continue; - //if (x[j][2] == ztmp) { - // if (x[j][1] < ytmp) continue; - // if (x[j][1] == ytmp && x[j][0] < xtmp) continue; - // } - } - } - - /* - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } } - */ - + jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; @@ -135,8 +119,6 @@ void NPairHalfBinNewtonTri::build(NeighList *list) rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq[itype][jtype]) { - //printf("NEIGH i,j %d %d ijtag %d %d dist %g\n", - // i,j,tag[i],tag[j],sqrt(rsq)); if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); diff --git a/src/npair_half_nsq_newton.cpp b/src/npair_half_nsq_newton.cpp index be06393f58..295b7de18c 100644 --- a/src/npair_half_nsq_newton.cpp +++ b/src/npair_half_nsq_newton.cpp @@ -42,9 +42,9 @@ void NPairHalfNsqNewton::build(NeighList *list) double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; - double delta = 0.01 * force->angstrom; - int triclinic = domain->triclinic; - + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; + double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -100,15 +100,15 @@ void NPairHalfNsqNewton::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; - } else if (triclinic) { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } else { + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { if (x[j][1] < ytmp) continue; diff --git a/src/npair_halffull_newton.cpp b/src/npair_halffull_newton.cpp index d1e894943c..0192ed5729 100644 --- a/src/npair_halffull_newton.cpp +++ b/src/npair_halffull_newton.cpp @@ -39,8 +39,8 @@ void NPairHalffullNewton::build(NeighList *list) int *neighptr, *jlist; double xtmp, ytmp, ztmp; - double delta = 0.01 * force->angstrom; - int triclinic = domain->triclinic; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; double **x = atom->x; int nlocal = atom->nlocal; @@ -77,17 +77,17 @@ void NPairHalffullNewton::build(NeighList *list) for (jj = 0; jj < jnum; jj++) { joriginal = jlist[jj]; j = joriginal & NEIGHMASK; - + if (j < nlocal) { if (i > j) continue; } else if (triclinic) { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { @@ -95,7 +95,7 @@ void NPairHalffullNewton::build(NeighList *list) if (x[j][1] == ytmp && x[j][0] < xtmp) continue; } } - + neighptr[n++] = joriginal; } diff --git a/src/nstencil_half_bin_3d_tri.cpp b/src/nstencil_half_bin_3d_tri.cpp index f94bfc5e63..5887e389fb 100644 --- a/src/nstencil_half_bin_3d_tri.cpp +++ b/src/nstencil_half_bin_3d_tri.cpp @@ -29,7 +29,6 @@ void NStencilHalfBin3dTri::create() nstencil = 0; - //for (k = 0; k <= sz; k++) for (k = -sz; k <= sz; k++) for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) From 9db5d4523282972f30dd6cb172fddcbb12921c1d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Sun, 9 Jul 2023 15:29:18 -0400 Subject: [PATCH 008/107] port neighbor list build changes to corresponding OPENMP package files --- src/OPENMP/npair_half_bin_newton_tri_omp.cpp | 29 ++++++++++++++------ src/OPENMP/npair_half_nsq_newton_omp.cpp | 24 ++++++++++++---- src/OPENMP/npair_halffull_newton_omp.cpp | 13 +++++++++ src/npair_half_nsq_newton.cpp | 7 +++-- 4 files changed, 56 insertions(+), 17 deletions(-) diff --git a/src/OPENMP/npair_half_bin_newton_tri_omp.cpp b/src/OPENMP/npair_half_bin_newton_tri_omp.cpp index e754456ef1..3ad07acd56 100644 --- a/src/OPENMP/npair_half_bin_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_bin_newton_tri_omp.cpp @@ -18,6 +18,7 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" +#include "force.h" #include "molecule.h" #include "domain.h" #include "my_page.h" @@ -40,6 +41,7 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; + const double delta = 0.01 * force->angstrom; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -48,7 +50,7 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) NPAIR_OMP_SETUP(nlocal); int i,j,k,n,itype,jtype,ibin,which,imol,iatom; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; @@ -79,6 +81,7 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) n = 0; neighptr = ipage.vget(); + itag = tag[i]; itype = type[i]; xtmp = x[i][0]; ytmp = x[i][1]; @@ -98,12 +101,22 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; + + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } } @@ -119,7 +132,7 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/OPENMP/npair_half_nsq_newton_omp.cpp b/src/OPENMP/npair_half_nsq_newton_omp.cpp index cb08cb7f7a..726814c6f0 100644 --- a/src/OPENMP/npair_half_nsq_newton_omp.cpp +++ b/src/OPENMP/npair_half_nsq_newton_omp.cpp @@ -15,14 +15,16 @@ #include "omp_compat.h" #include "npair_half_nsq_newton_omp.h" #include "npair_omp.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" +#include "domain.h" +#include "error.h" +#include "force.h" #include "group.h" #include "molecule.h" -#include "domain.h" #include "my_page.h" -#include "error.h" +#include "neigh_list.h" using namespace LAMMPS_NS; @@ -42,6 +44,8 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list) const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -49,8 +53,8 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list) #endif NPAIR_OMP_SETUP(nlocal); - int i,j,n,itype,jtype,itag,jtag,which,imol,iatom; - tagint tagprev; + int i,j,n,itype,jtype,which,imol,iatom; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; @@ -106,6 +110,14 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { @@ -127,7 +139,7 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/OPENMP/npair_halffull_newton_omp.cpp b/src/OPENMP/npair_halffull_newton_omp.cpp index abd5f7eacb..e833ab3095 100644 --- a/src/OPENMP/npair_halffull_newton_omp.cpp +++ b/src/OPENMP/npair_halffull_newton_omp.cpp @@ -15,7 +15,9 @@ #include "npair_halffull_newton_omp.h" #include "atom.h" +#include "domain.h" #include "error.h" +#include "force.h" #include "my_page.h" #include "neigh_list.h" #include "npair_omp.h" @@ -38,6 +40,8 @@ NPairHalffullNewtonOmp::NPairHalffullNewtonOmp(LAMMPS *lmp) : NPair(lmp) {} void NPairHalffullNewtonOmp::build(NeighList *list) { const int inum_full = list->listfull->inum; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -83,8 +87,17 @@ void NPairHalffullNewtonOmp::build(NeighList *list) for (jj = 0; jj < jnum; jj++) { joriginal = jlist[jj]; j = joriginal & NEIGHMASK; + if (j < nlocal) { if (i > j) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { diff --git a/src/npair_half_nsq_newton.cpp b/src/npair_half_nsq_newton.cpp index 295b7de18c..023ece69c9 100644 --- a/src/npair_half_nsq_newton.cpp +++ b/src/npair_half_nsq_newton.cpp @@ -13,15 +13,16 @@ ------------------------------------------------------------------------- */ #include "npair_half_nsq_newton.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" +#include "domain.h" +#include "error.h" #include "force.h" #include "group.h" #include "molecule.h" -#include "domain.h" #include "my_page.h" -#include "error.h" +#include "neigh_list.h" using namespace LAMMPS_NS; From 07f42930ff711b09bf46f9641ae964617edf3cad Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Mon, 10 Jul 2023 12:53:02 -0700 Subject: [PATCH 009/107] clean up code and comments --- src/compute_property_local.cpp | 3 ++- src/npair_half_bin_newton_tri.cpp | 29 ++++++----------------------- src/npair_half_nsq_newton.cpp | 5 +++++ src/npair_halffull_newton.cpp | 5 +++++ src/nstencil_half_bin_2d_tri.cpp | 12 ++++++++++-- src/nstencil_half_bin_3d_tri.cpp | 8 +++++++- 6 files changed, 35 insertions(+), 27 deletions(-) diff --git a/src/compute_property_local.cpp b/src/compute_property_local.cpp index d0523a1bec..92036c4bd2 100644 --- a/src/compute_property_local.cpp +++ b/src/compute_property_local.cpp @@ -405,7 +405,8 @@ int ComputePropertyLocal::count_pairs(int allflag, int forceflag) if (!(mask[j] & groupbit)) continue; // itag = jtag is possible for long cutoffs that include images of self - + // do not need triclinic logic here b/c neighbor list itself is correct + if (newton_pair == 0 && j >= nlocal) { jtag = tag[j]; if (itag > jtag) { diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 17504ac3af..71a15df59e 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -83,11 +83,12 @@ void NPairHalfBinNewtonTri::build(NeighList *list) } // loop over all atoms in bins in stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms - + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { @@ -107,25 +108,9 @@ void NPairHalfBinNewtonTri::build(NeighList *list) } else { if (x[j][0] < xtmp) continue; } - //if (x[j][2] < ztmp) continue; - //if (x[j][2] == ztmp) { - // if (x[j][1] < ytmp) continue; - // if (x[j][1] == ytmp && x[j][0] < xtmp) continue; - // } } } - /* - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; - } - } - */ - jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; @@ -135,8 +120,6 @@ void NPairHalfBinNewtonTri::build(NeighList *list) rsq = delx*delx + dely*dely + delz*delz; if (rsq <= cutneighsq[itype][jtype]) { - //printf("NEIGH i,j %d %d ijtag %d %d dist %g\n", - // i,j,tag[i],tag[j],sqrt(rsq)); if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); diff --git a/src/npair_half_nsq_newton.cpp b/src/npair_half_nsq_newton.cpp index be06393f58..0174a78900 100644 --- a/src/npair_half_nsq_newton.cpp +++ b/src/npair_half_nsq_newton.cpp @@ -89,7 +89,12 @@ void NPairHalfNsqNewton::build(NeighList *list) } // loop over remaining atoms, owned and ghost + // use itag/jtap comparision to eliminate half the interactions // itag = jtag is possible for long cutoffs that include images of self + // for triclinic, must use delta to eliminate half the I/J interactions + // cannot use direct I/J coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon for (j = i+1; j < nall; j++) { if (includegroup && !(mask[j] & bitmask)) continue; diff --git a/src/npair_halffull_newton.cpp b/src/npair_halffull_newton.cpp index d1e894943c..af15b27eac 100644 --- a/src/npair_halffull_newton.cpp +++ b/src/npair_halffull_newton.cpp @@ -70,6 +70,11 @@ void NPairHalffullNewton::build(NeighList *list) ztmp = x[i][2]; // loop over full neighbor list + // use i < j < nlocal to eliminate half the local/local interactions + // for triclinic, must use delta to eliminate half the local/ghost interactions + // cannot use direct I/J coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon jlist = firstneigh_full[i]; jnum = numneigh_full[i]; diff --git a/src/nstencil_half_bin_2d_tri.cpp b/src/nstencil_half_bin_2d_tri.cpp index 06831730fd..920918fe09 100644 --- a/src/nstencil_half_bin_2d_tri.cpp +++ b/src/nstencil_half_bin_2d_tri.cpp @@ -27,9 +27,17 @@ void NStencilHalfBin2dTri::create() { int i, j; + // for triclinic, need to use full stencil in all dims + // not a half stencil in y + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift both coords by epsilon + // thus for an I/J owned/ghost pair, the xy coords + // and bin assignments can be different on I proc vs J proc + nstencil = 0; - for (j = 0; j <= sy; j++) + for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) - if (bin_distance(i, j, 0) < cutneighmaxsq) stencil[nstencil++] = j * mbinx + i; + if (bin_distance(i, j, 0) < cutneighmaxsq) + stencil[nstencil++] = j * mbinx + i; } diff --git a/src/nstencil_half_bin_3d_tri.cpp b/src/nstencil_half_bin_3d_tri.cpp index f94bfc5e63..d146b92cd1 100644 --- a/src/nstencil_half_bin_3d_tri.cpp +++ b/src/nstencil_half_bin_3d_tri.cpp @@ -27,9 +27,15 @@ void NStencilHalfBin3dTri::create() { int i, j, k; + // for triclinic, need to use full stencil in all dims + // not a half stencil in z + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon + // thus for an I/J owned/ghost pair, the xyz coords + // and bin assignments can be different on I proc vs J proc + nstencil = 0; - //for (k = 0; k <= sz; k++) for (k = -sz; k <= sz; k++) for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) From fbbf44fb8e2948fea648f048f4ab1cb088cd93bc Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Mon, 10 Jul 2023 18:25:29 -0700 Subject: [PATCH 010/107] same changes to other NPair and NStencil methods --- src/npair_half_bin_newton_tri.cpp | 4 +- src/npair_half_multi_newton_tri.cpp | 119 +++++++++++-------- src/npair_half_multi_old_newton_tri.cpp | 44 ++++--- src/npair_half_respa_bin_newton_tri.cpp | 45 ++++--- src/npair_half_respa_nsq_newton.cpp | 24 +++- src/npair_half_respa_nsq_newton.h | 2 +- src/npair_half_size_bin_newton_tri.cpp | 41 +++++-- src/npair_half_size_multi_newton_tri.cpp | 107 ++++++++++------- src/npair_half_size_multi_old_newton_tri.cpp | 45 ++++--- src/npair_half_size_nsq_newton.cpp | 22 +++- src/npair_halffull_newton_trim.cpp | 19 +++ src/nstencil_half_multi_2d_tri.cpp | 2 +- src/nstencil_half_multi_3d_tri.cpp | 2 +- src/nstencil_half_multi_old_2d_tri.cpp | 2 +- src/nstencil_half_multi_old_3d_tri.cpp | 2 +- 15 files changed, 313 insertions(+), 167 deletions(-) diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index f6cbf1b1af..9c0688af68 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -16,11 +16,11 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" +#include "domain.h" +#include "error.h" #include "force.h" #include "molecule.h" -#include "domain.h" #include "my_page.h" -#include "error.h" using namespace LAMMPS_NS; diff --git a/src/npair_half_multi_newton_tri.cpp b/src/npair_half_multi_newton_tri.cpp index 9bebfe71e2..316acb5049 100644 --- a/src/npair_half_multi_newton_tri.cpp +++ b/src/npair_half_multi_newton_tri.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "my_page.h" #include "neighbor.h" @@ -39,11 +40,13 @@ NPairHalfMultiNewtonTri::NPairHalfMultiNewtonTri(LAMMPS *lmp) : NPair(lmp) {} void NPairHalfMultiNewtonTri::build(NeighList *list) { int i,j,k,n,itype,jtype,icollection,jcollection,ibin,jbin,which,ns,imol,iatom,moltemplate; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*s; int js; + const double delta = 0.01 * force->angstrom; + int *collection = neighbor->collection; double **x = atom->x; int *type = atom->type; @@ -72,6 +75,8 @@ void NPairHalfMultiNewtonTri::build(NeighList *list) for (i = 0; i < nlocal; i++) { n = 0; neighptr = ipage->vget(); + + itag = tag[i]; itype = type[i]; icollection = collection[i]; xtmp = x[i][0]; @@ -86,65 +91,79 @@ void NPairHalfMultiNewtonTri::build(NeighList *list) ibin = atom2bin[i]; // loop through stencils for all collections + for (jcollection = 0; jcollection < ncollections; jcollection++) { // if same collection use own bin + if (icollection == jcollection) jbin = ibin; - else jbin = coord2bin(x[i], jcollection); + else jbin = coord2bin(x[i], jcollection); // loop over all atoms in bins in stencil - // stencil is empty if i larger than j - // stencil is half if i same size as j - // stencil is full if i smaller than j - // if half: pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // for triclinic: + // stencil is empty if i larger than j + // stencil is full if i smaller than j + // stencil is full if i same size as j + // for i smaller than j: + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon + + s = stencil_multi[icollection][jcollection]; + ns = nstencil_multi[icollection][jcollection]; - s = stencil_multi[icollection][jcollection]; - ns = nstencil_multi[icollection][jcollection]; + for (k = 0; k < ns; k++) { + js = binhead_multi[jcollection][jbin + s[k]]; + for (j = js; j >= 0; j = bins[j]) { + + // if same size (same collection), exclude half of interactions + + if (cutcollectionsq[icollection][icollection] == + cutcollectionsq[jcollection][jcollection]) { + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } + } - for (k = 0; k < ns; k++) { - js = binhead_multi[jcollection][jbin + s[k]]; - for (j = js; j >= 0; j = bins[j]) { + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - // if same size (same collection), use half stencil - if(cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){ - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; - } - } - } + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - - if (rsq <= cutneighsq[itype][jtype]) { - if (molecular != Atom::ATOMIC) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = j; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = j; - else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); - } else neighptr[n++] = j; - } - } - } + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular != Atom::ATOMIC) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } + } } ilist[inum++] = i; diff --git a/src/npair_half_multi_old_newton_tri.cpp b/src/npair_half_multi_old_newton_tri.cpp index fbb9a8e504..9dcbcff9f4 100644 --- a/src/npair_half_multi_old_newton_tri.cpp +++ b/src/npair_half_multi_old_newton_tri.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "my_page.h" #include "neigh_list.h" @@ -38,11 +39,13 @@ NPairHalfMultiOldNewtonTri::NPairHalfMultiOldNewtonTri(LAMMPS *lmp) : NPair(lmp) void NPairHalfMultiOldNewtonTri::build(NeighList *list) { int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom,moltemplate; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*s; double *cutsq,*distsq; + const double delta = 0.01 * force->angstrom; + double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -71,6 +74,7 @@ void NPairHalfMultiOldNewtonTri::build(NeighList *list) n = 0; neighptr = ipage->vget(); + itag = tag[i]; itype = type[i]; xtmp = x[i][0]; ytmp = x[i][1]; @@ -81,13 +85,12 @@ void NPairHalfMultiOldNewtonTri::build(NeighList *list) tagprev = tag[i] - iatom - 1; } - // loop over all atoms in bins, including self, in stencil - // skip if i,j neighbor cutoff is less than bin distance - // bins below self are excluded from stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // loop over all atoms in bins in stencil + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; s = stencil_multi_old[itype]; @@ -98,14 +101,23 @@ void NPairHalfMultiOldNewtonTri::build(NeighList *list) for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) { jtype = type[j]; if (cutsq[jtype] < distsq[k]) continue; - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; - } - } + + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_respa_bin_newton_tri.cpp b/src/npair_half_respa_bin_newton_tri.cpp index b2749bd7a7..05b839869a 100644 --- a/src/npair_half_respa_bin_newton_tri.cpp +++ b/src/npair_half_respa_bin_newton_tri.cpp @@ -16,10 +16,11 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" -#include "molecule.h" #include "domain.h" -#include "my_page.h" #include "error.h" +#include "force.h" +#include "molecule.h" +#include "my_page.h" using namespace LAMMPS_NS; @@ -38,10 +39,12 @@ NPairHalfRespaBinNewtonTri::NPairHalfRespaBinNewtonTri(LAMMPS *lmp) : void NPairHalfRespaBinNewtonTri::build(NeighList *list) { int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom,moltemplate; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*neighptr_inner,*neighptr_middle; + const double delta = 0.01 * force->angstrom; + double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -94,6 +97,7 @@ void NPairHalfRespaBinNewtonTri::build(NeighList *list) neighptr_middle = ipage_middle->vget(); } + itag = tag[i]; itype = type[i]; xtmp = x[i][0]; ytmp = x[i][1]; @@ -105,22 +109,33 @@ void NPairHalfRespaBinNewtonTri::build(NeighList *list) } // loop over all atoms in bins in stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; - } - } + + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_respa_nsq_newton.cpp b/src/npair_half_respa_nsq_newton.cpp index 77d6af141f..d0292eec92 100644 --- a/src/npair_half_respa_nsq_newton.cpp +++ b/src/npair_half_respa_nsq_newton.cpp @@ -16,9 +16,10 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" +#include "domain.h" +#include "force.h" #include "group.h" #include "molecule.h" -#include "domain.h" #include "my_page.h" #include "error.h" @@ -38,12 +39,15 @@ NPairHalfRespaNsqNewton::NPairHalfRespaNsqNewton(LAMMPS *lmp) : NPair(lmp) {} void NPairHalfRespaNsqNewton::build(NeighList *list) { - int i,j,n,itype,jtype,itag,jtag,n_inner,n_middle,bitmask; + int i,j,n,itype,jtype,n_inner,n_middle,bitmask; int imol,iatom,moltemplate; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*neighptr_inner,*neighptr_middle; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; + double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -112,6 +116,12 @@ void NPairHalfRespaNsqNewton::build(NeighList *list) } // loop over remaining atoms, owned and ghost + // use itag/jtap comparision to eliminate half the interactions + // itag = jtag is possible for long cutoffs that include images of self + // for triclinic, must use delta to eliminate half the I/J interactions + // cannot use I/J exact coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon for (j = i+1; j < nall; j++) { if (includegroup && !(mask[j] & bitmask)) continue; @@ -122,6 +132,14 @@ void NPairHalfRespaNsqNewton::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { diff --git a/src/npair_half_respa_nsq_newton.h b/src/npair_half_respa_nsq_newton.h index e5233f5e9d..4a5ae23aef 100644 --- a/src/npair_half_respa_nsq_newton.h +++ b/src/npair_half_respa_nsq_newton.h @@ -15,7 +15,7 @@ // clang-format off NPairStyle(half/respa/nsq/newton, NPairHalfRespaNsqNewton, - NP_HALF | NP_RESPA | NP_NSQ | NP_NEWTON | NP_ORTHO); + NP_HALF | NP_RESPA | NP_NSQ | NP_NEWTON | NP_ORTHO | NP_TRI); // clang-format on #else diff --git a/src/npair_half_size_bin_newton_tri.cpp b/src/npair_half_size_bin_newton_tri.cpp index 47bb9d01e1..e6a236eecb 100644 --- a/src/npair_half_size_bin_newton_tri.cpp +++ b/src/npair_half_size_bin_newton_tri.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "my_page.h" #include "neigh_list.h" @@ -39,11 +40,13 @@ NPairHalfSizeBinNewtonTri::NPairHalfSizeBinNewtonTri(LAMMPS *lmp) : void NPairHalfSizeBinNewtonTri::build(NeighList *list) { int i,j,jh,k,n,ibin,which,imol,iatom,moltemplate; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; int *neighptr; + const double delta = 0.01 * force->angstrom; + double **x = atom->x; double *radius = atom->radius; int *type = atom->type; @@ -76,6 +79,7 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list) n = 0; neighptr = ipage->vget(); + itag = tag[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; @@ -87,22 +91,33 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list) } // loop over all atoms in bins in stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; - } - } + + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; diff --git a/src/npair_half_size_multi_newton_tri.cpp b/src/npair_half_size_multi_newton_tri.cpp index 5d8a0f05ef..a363ae6e1e 100644 --- a/src/npair_half_size_multi_newton_tri.cpp +++ b/src/npair_half_size_multi_newton_tri.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "my_page.h" #include "neighbor.h" @@ -41,11 +42,13 @@ void NPairHalfSizeMultiNewtonTri::build(NeighList *list) { int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns,js; int which,imol,iatom,moltemplate; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutdistsq; int *neighptr,*s; + const double delta = 0.01 * force->angstrom; + int *collection = neighbor->collection; double **x = atom->x; double *radius = atom->radius; @@ -78,6 +81,8 @@ void NPairHalfSizeMultiNewtonTri::build(NeighList *list) for (i = 0; i < nlocal; i++) { n = 0; neighptr = ipage->vget(); + + itag = tag[i]; itype = type[i]; icollection = collection[i]; xtmp = x[i][0]; @@ -93,11 +98,13 @@ void NPairHalfSizeMultiNewtonTri::build(NeighList *list) ibin = atom2bin[i]; // loop through stencils for all collections + for (jcollection = 0; jcollection < ncollections; jcollection++) { // if same collection use own bin + if (icollection == jcollection) jbin = ibin; - else jbin = coord2bin(x[i], jcollection); + else jbin = coord2bin(x[i], jcollection); // loop over all atoms in bins in stencil // stencil is empty if i larger than j @@ -108,56 +115,66 @@ void NPairHalfSizeMultiNewtonTri::build(NeighList *list) // (equal zyx and j <= i) // latter excludes self-self interaction but allows superposed atoms - s = stencil_multi[icollection][jcollection]; - ns = nstencil_multi[icollection][jcollection]; + s = stencil_multi[icollection][jcollection]; + ns = nstencil_multi[icollection][jcollection]; - for (k = 0; k < ns; k++) { - js = binhead_multi[jcollection][jbin + s[k]]; - for (j = js; j >= 0; j = bins[j]) { + for (k = 0; k < ns; k++) { + js = binhead_multi[jcollection][jbin + s[k]]; + for (j = js; j >= 0; j = bins[j]) { - // if same size (same collection), use half stencil - if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){ - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; - } - } + // if same size (same collection), exclude half of interactions + + if (cutcollectionsq[icollection][icollection] == + cutcollectionsq[jcollection][jcollection]) { + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } } jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - radsum = radi + radius[j]; - cutdistsq = (radsum+skin) * (radsum+skin); - - if (rsq <= cutdistsq) { - jh = j; - if (history && rsq < radsum*radsum) - jh = jh ^ mask_history; - - if (molecular != Atom::ATOMIC) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = jh; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = jh; - else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS); - } else neighptr[n++] = jh; - } - } - } + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radsum = radi + radius[j]; + cutdistsq = (radsum+skin) * (radsum+skin); + + if (rsq <= cutdistsq) { + jh = j; + if (history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = jh; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = jh; + else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS); + } else neighptr[n++] = jh; + } + } + } } ilist[inum++] = i; diff --git a/src/npair_half_size_multi_old_newton_tri.cpp b/src/npair_half_size_multi_old_newton_tri.cpp index ea3f271956..974500d6b8 100644 --- a/src/npair_half_size_multi_old_newton_tri.cpp +++ b/src/npair_half_size_multi_old_newton_tri.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "my_page.h" #include "neigh_list.h" @@ -38,12 +39,14 @@ NPairHalfSizeMultiOldNewtonTri::NPairHalfSizeMultiOldNewtonTri(LAMMPS *lmp) : NP void NPairHalfSizeMultiOldNewtonTri::build(NeighList *list) { int i,j,jh,k,n,itype,jtype,ibin,ns,which,imol,iatom,moltemplate; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutdistsq; int *neighptr,*s; double *cutsq,*distsq; + const double delta = 0.01 * force->angstrom; + double **x = atom->x; double *radius = atom->radius; int *type = atom->type; @@ -76,6 +79,7 @@ void NPairHalfSizeMultiOldNewtonTri::build(NeighList *list) n = 0; neighptr = ipage->vget(); + itag = tag[i]; itype = type[i]; xtmp = x[i][0]; ytmp = x[i][1]; @@ -87,13 +91,12 @@ void NPairHalfSizeMultiOldNewtonTri::build(NeighList *list) tagprev = tag[i] - iatom - 1; } - // loop over all atoms in bins, including self, in stencil - // skip if i,j neighbor cutoff is less than bin distance - // bins below self are excluded from stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // loop over all atoms in bins in stencil + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; s = stencil_multi_old[itype]; @@ -104,14 +107,24 @@ void NPairHalfSizeMultiOldNewtonTri::build(NeighList *list) for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) { jtype = type[j]; if (cutsq[jtype] < distsq[k]) continue; - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; - } - } + + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_size_nsq_newton.cpp b/src/npair_half_size_nsq_newton.cpp index 8b596e6968..abd2a4faff 100644 --- a/src/npair_half_size_nsq_newton.cpp +++ b/src/npair_half_size_nsq_newton.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "group.h" #include "my_page.h" @@ -39,12 +40,15 @@ NPairHalfSizeNsqNewton::NPairHalfSizeNsqNewton(LAMMPS *lmp) : NPair(lmp) {} void NPairHalfSizeNsqNewton::build(NeighList *list) { - int i,j,jh,n,itag,jtag,bitmask,which,imol,iatom,moltemplate; - tagint tagprev; + int i,j,jh,n,bitmask,which,imol,iatom,moltemplate; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; int *neighptr; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; + double **x = atom->x; double *radius = atom->radius; tagint *tag = atom->tag; @@ -93,6 +97,12 @@ void NPairHalfSizeNsqNewton::build(NeighList *list) } // loop over remaining atoms, owned and ghost + // use itag/jtap comparision to eliminate half the interactions + // itag = jtag is possible for long cutoffs that include images of self + // for triclinic, must use delta to eliminate half the I/J interactions + // cannot use I/J exact coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon for (j = i+1; j < nall; j++) { if (includegroup && !(mask[j] & bitmask)) continue; @@ -103,6 +113,14 @@ void NPairHalfSizeNsqNewton::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { diff --git a/src/npair_halffull_newton_trim.cpp b/src/npair_halffull_newton_trim.cpp index b7bb72c990..7d420f88af 100644 --- a/src/npair_halffull_newton_trim.cpp +++ b/src/npair_halffull_newton_trim.cpp @@ -14,7 +14,9 @@ #include "npair_halffull_newton_trim.h" #include "atom.h" +#include "domain.h" #include "error.h" +#include "force.h" #include "my_page.h" #include "neigh_list.h" @@ -38,6 +40,9 @@ void NPairHalffullNewtonTrim::build(NeighList *list) double xtmp, ytmp, ztmp; double delx, dely, delz, rsq; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; + double **x = atom->x; int nlocal = atom->nlocal; @@ -68,6 +73,11 @@ void NPairHalffullNewtonTrim::build(NeighList *list) ztmp = x[i][2]; // loop over full neighbor list + // use i < j < nlocal to eliminate half the local/local interactions + // for triclinic, must use delta to eliminate half the local/ghost interactions + // cannot use I/J exact coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon jlist = firstneigh_full[i]; jnum = numneigh_full[i]; @@ -75,8 +85,17 @@ void NPairHalffullNewtonTrim::build(NeighList *list) for (jj = 0; jj < jnum; jj++) { joriginal = jlist[jj]; j = joriginal & NEIGHMASK; + if (j < nlocal) { if (i > j) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { diff --git a/src/nstencil_half_multi_2d_tri.cpp b/src/nstencil_half_multi_2d_tri.cpp index bf39c04099..85bbe94c86 100644 --- a/src/nstencil_half_multi_2d_tri.cpp +++ b/src/nstencil_half_multi_2d_tri.cpp @@ -80,7 +80,7 @@ void NStencilHalfMulti2dTri::create() cutsq = cutcollectionsq[icollection][jcollection]; if (flag_half_multi[icollection][jcollection]) { - for (j = 0; j <= sy; j++) + for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) if (bin_distance_multi(i, j, 0, bin_collection) < cutsq) stencil_multi[icollection][jcollection][ns++] = j * mbinx + i; diff --git a/src/nstencil_half_multi_3d_tri.cpp b/src/nstencil_half_multi_3d_tri.cpp index f2d4d051ad..9761e15854 100644 --- a/src/nstencil_half_multi_3d_tri.cpp +++ b/src/nstencil_half_multi_3d_tri.cpp @@ -81,7 +81,7 @@ void NStencilHalfMulti3dTri::create() cutsq = cutcollectionsq[icollection][jcollection]; if (flag_half_multi[icollection][jcollection]) { - for (k = 0; k <= sz; k++) + for (k = -sz; k <= sz; k++) for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) if (bin_distance_multi(i, j, k, bin_collection) < cutsq) diff --git a/src/nstencil_half_multi_old_2d_tri.cpp b/src/nstencil_half_multi_old_2d_tri.cpp index 1438aef843..0aeb65bebd 100644 --- a/src/nstencil_half_multi_old_2d_tri.cpp +++ b/src/nstencil_half_multi_old_2d_tri.cpp @@ -37,7 +37,7 @@ void NStencilHalfMultiOld2dTri::create() s = stencil_multi_old[itype]; distsq = distsq_multi_old[itype]; n = 0; - for (j = 0; j <= sy; j++) + for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) { rsq = bin_distance(i, j, 0); if (rsq < typesq) { diff --git a/src/nstencil_half_multi_old_3d_tri.cpp b/src/nstencil_half_multi_old_3d_tri.cpp index 836eee6039..3717b7836b 100644 --- a/src/nstencil_half_multi_old_3d_tri.cpp +++ b/src/nstencil_half_multi_old_3d_tri.cpp @@ -37,7 +37,7 @@ void NStencilHalfMultiOld3dTri::create() s = stencil_multi_old[itype]; distsq = distsq_multi_old[itype]; n = 0; - for (k = 0; k <= sz; k++) + for (k = -sz; k <= sz; k++) for (j = -sy; j <= sy; j++) for (i = -sx; i <= sx; i++) { rsq = bin_distance(i, j, k); From 6bd965f0df72b23b1a4819f9382eefc642619e72 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 10 Jul 2023 22:35:36 -0400 Subject: [PATCH 011/107] fix whitespace (again) --- src/compute_property_local.cpp | 2 +- src/npair_half_bin_newton_tri.cpp | 36 +++---- src/npair_half_multi_newton_tri.cpp | 98 +++++++++--------- src/npair_half_multi_old_newton_tri.cpp | 32 +++--- src/npair_half_respa_bin_newton_tri.cpp | 36 +++---- src/npair_half_size_bin_newton_tri.cpp | 34 +++---- src/npair_half_size_multi_newton_tri.cpp | 100 +++++++++---------- src/npair_half_size_multi_old_newton_tri.cpp | 34 +++---- src/npair_halffull_newton_trim.cpp | 2 +- src/nstencil_half_bin_3d_tri.cpp | 2 +- 10 files changed, 188 insertions(+), 188 deletions(-) diff --git a/src/compute_property_local.cpp b/src/compute_property_local.cpp index 92036c4bd2..87517a3e05 100644 --- a/src/compute_property_local.cpp +++ b/src/compute_property_local.cpp @@ -406,7 +406,7 @@ int ComputePropertyLocal::count_pairs(int allflag, int forceflag) // itag = jtag is possible for long cutoffs that include images of self // do not need triclinic logic here b/c neighbor list itself is correct - + if (newton_pair == 0 && j >= nlocal) { jtag = tag[j]; if (itag > jtag) { diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 9c0688af68..453d10096e 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -88,28 +88,28 @@ void NPairHalfBinNewtonTri::build(NeighList *list) // cannot use I/J exact coord comparision // b/c transforming orthog -> lambda -> orthog for ghost atoms // with an added PBC offset can shift all 3 coords by epsilon - + ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - if (j <= i) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_multi_newton_tri.cpp b/src/npair_half_multi_newton_tri.cpp index 316acb5049..1d75d6a3ef 100644 --- a/src/npair_half_multi_newton_tri.cpp +++ b/src/npair_half_multi_newton_tri.cpp @@ -91,11 +91,11 @@ void NPairHalfMultiNewtonTri::build(NeighList *list) ibin = atom2bin[i]; // loop through stencils for all collections - + for (jcollection = 0; jcollection < ncollections; jcollection++) { // if same collection use own bin - + if (icollection == jcollection) jbin = ibin; else jbin = coord2bin(x[i], jcollection); @@ -109,60 +109,60 @@ void NPairHalfMultiNewtonTri::build(NeighList *list) // cannot use I/J exact coord comparision // b/c transforming orthog -> lambda -> orthog for ghost atoms // with an added PBC offset can shift all 3 coords by epsilon - + s = stencil_multi[icollection][jcollection]; ns = nstencil_multi[icollection][jcollection]; for (k = 0; k < ns; k++) { - js = binhead_multi[jcollection][jbin + s[k]]; - for (j = js; j >= 0; j = bins[j]) { - - // if same size (same collection), exclude half of interactions - - if (cutcollectionsq[icollection][icollection] == - cutcollectionsq[jcollection][jcollection]) { - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } - } + js = binhead_multi[jcollection][jbin + s[k]]; + for (j = js; j >= 0; j = bins[j]) { - jtype = type[j]; - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + // if same size (same collection), exclude half of interactions - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; + if (cutcollectionsq[icollection][icollection] == + cutcollectionsq[jcollection][jcollection]) { + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } + } - if (rsq <= cutneighsq[itype][jtype]) { - if (molecular != Atom::ATOMIC) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = j; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = j; - else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); - } else neighptr[n++] = j; - } - } + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular != Atom::ATOMIC) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; + } + } } } diff --git a/src/npair_half_multi_old_newton_tri.cpp b/src/npair_half_multi_old_newton_tri.cpp index 9dcbcff9f4..72d46d042f 100644 --- a/src/npair_half_multi_old_newton_tri.cpp +++ b/src/npair_half_multi_old_newton_tri.cpp @@ -102,22 +102,22 @@ void NPairHalfMultiOldNewtonTri::build(NeighList *list) jtype = type[j]; if (cutsq[jtype] < distsq[k]) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_respa_bin_newton_tri.cpp b/src/npair_half_respa_bin_newton_tri.cpp index 05b839869a..eac67b8bd5 100644 --- a/src/npair_half_respa_bin_newton_tri.cpp +++ b/src/npair_half_respa_bin_newton_tri.cpp @@ -118,24 +118,24 @@ void NPairHalfRespaBinNewtonTri::build(NeighList *list) ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - - if (j <= i) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } + + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_size_bin_newton_tri.cpp b/src/npair_half_size_bin_newton_tri.cpp index e6a236eecb..0d1a0a7329 100644 --- a/src/npair_half_size_bin_newton_tri.cpp +++ b/src/npair_half_size_bin_newton_tri.cpp @@ -101,23 +101,23 @@ void NPairHalfSizeBinNewtonTri::build(NeighList *list) for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - if (j <= i) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } if (exclude && exclusion(i,j,type[i],type[j],mask,molecule)) continue; diff --git a/src/npair_half_size_multi_newton_tri.cpp b/src/npair_half_size_multi_newton_tri.cpp index a363ae6e1e..f597789dee 100644 --- a/src/npair_half_size_multi_newton_tri.cpp +++ b/src/npair_half_size_multi_newton_tri.cpp @@ -98,11 +98,11 @@ void NPairHalfSizeMultiNewtonTri::build(NeighList *list) ibin = atom2bin[i]; // loop through stencils for all collections - + for (jcollection = 0; jcollection < ncollections; jcollection++) { // if same collection use own bin - + if (icollection == jcollection) jbin = ibin; else jbin = coord2bin(x[i], jcollection); @@ -119,61 +119,61 @@ void NPairHalfSizeMultiNewtonTri::build(NeighList *list) ns = nstencil_multi[icollection][jcollection]; for (k = 0; k < ns; k++) { - js = binhead_multi[jcollection][jbin + s[k]]; - for (j = js; j >= 0; j = bins[j]) { + js = binhead_multi[jcollection][jbin + s[k]]; + for (j = js; j >= 0; j = bins[j]) { + + // if same size (same collection), exclude half of interactions - // if same size (same collection), exclude half of interactions - if (cutcollectionsq[icollection][icollection] == - cutcollectionsq[jcollection][jcollection]) { - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } + cutcollectionsq[jcollection][jcollection]) { + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } } jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - radsum = radi + radius[j]; - cutdistsq = (radsum+skin) * (radsum+skin); - - if (rsq <= cutdistsq) { - jh = j; - if (history && rsq < radsum*radsum) - jh = jh ^ mask_history; - - if (molecular != Atom::ATOMIC) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = jh; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = jh; - else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS); - } else neighptr[n++] = jh; - } - } + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + radsum = radi + radius[j]; + cutdistsq = (radsum+skin) * (radsum+skin); + + if (rsq <= cutdistsq) { + jh = j; + if (history && rsq < radsum*radsum) + jh = jh ^ mask_history; + + if (molecular != Atom::ATOMIC) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = jh; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = jh; + else if (which > 0) neighptr[n++] = jh ^ (which << SBBITS); + } else neighptr[n++] = jh; + } + } } } diff --git a/src/npair_half_size_multi_old_newton_tri.cpp b/src/npair_half_size_multi_old_newton_tri.cpp index 974500d6b8..848a19aa39 100644 --- a/src/npair_half_size_multi_old_newton_tri.cpp +++ b/src/npair_half_size_multi_old_newton_tri.cpp @@ -108,23 +108,23 @@ void NPairHalfSizeMultiOldNewtonTri::build(NeighList *list) jtype = type[j]; if (cutsq[jtype] < distsq[k]) continue; - if (j <= i) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_halffull_newton_trim.cpp b/src/npair_halffull_newton_trim.cpp index 7d420f88af..e758c04284 100644 --- a/src/npair_halffull_newton_trim.cpp +++ b/src/npair_halffull_newton_trim.cpp @@ -85,7 +85,7 @@ void NPairHalffullNewtonTrim::build(NeighList *list) for (jj = 0; jj < jnum; jj++) { joriginal = jlist[jj]; j = joriginal & NEIGHMASK; - + if (j < nlocal) { if (i > j) continue; } else if (triclinic) { diff --git a/src/nstencil_half_bin_3d_tri.cpp b/src/nstencil_half_bin_3d_tri.cpp index d146b92cd1..72bef7fb76 100644 --- a/src/nstencil_half_bin_3d_tri.cpp +++ b/src/nstencil_half_bin_3d_tri.cpp @@ -33,7 +33,7 @@ void NStencilHalfBin3dTri::create() // with an added PBC offset can shift all 3 coords by epsilon // thus for an I/J owned/ghost pair, the xyz coords // and bin assignments can be different on I proc vs J proc - + nstencil = 0; for (k = -sz; k <= sz; k++) From 2eeea4332076c4a11193bd6c4a71d29ab649778c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Jul 2023 01:09:44 -0400 Subject: [PATCH 012/107] port neighbor list changes to OPENMP package --- src/OPENMP/npair_half_bin_newton_tri_omp.cpp | 20 +-- .../npair_half_multi_newton_tri_omp.cpp | 118 ++++++++++-------- .../npair_half_multi_old_newton_tri_omp.cpp | 50 +++++--- src/OPENMP/npair_half_nsq_newton_omp.cpp | 7 +- .../npair_half_respa_bin_newton_tri_omp.cpp | 47 ++++--- .../npair_half_respa_nsq_newton_omp.cpp | 35 ++++-- .../npair_half_size_bin_newton_tri_omp.cpp | 40 +++--- .../npair_half_size_multi_newton_tri_omp.cpp | 43 ++++--- ...air_half_size_multi_old_newton_tri_omp.cpp | 42 ++++--- src/OPENMP/npair_half_size_nsq_newton_omp.cpp | 27 +++- src/npair_half_bin_newton_tri.cpp | 3 +- src/npair_half_multi_newton_tri.cpp | 4 +- src/npair_half_respa_bin_newton_tri.cpp | 3 +- src/npair_half_respa_nsq_newton.cpp | 7 +- src/npair_half_size_nsq_newton.cpp | 2 +- 15 files changed, 278 insertions(+), 170 deletions(-) diff --git a/src/OPENMP/npair_half_bin_newton_tri_omp.cpp b/src/OPENMP/npair_half_bin_newton_tri_omp.cpp index 3ad07acd56..47524474ed 100644 --- a/src/OPENMP/npair_half_bin_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_bin_newton_tri_omp.cpp @@ -12,17 +12,18 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "omp_compat.h" #include "npair_half_bin_newton_tri_omp.h" #include "npair_omp.h" -#include "neigh_list.h" +#include "omp_compat.h" + #include "atom.h" #include "atom_vec.h" +#include "domain.h" +#include "error.h" #include "force.h" #include "molecule.h" -#include "domain.h" #include "my_page.h" -#include "error.h" +#include "neigh_list.h" using namespace LAMMPS_NS; @@ -54,8 +55,6 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; - // loop over each atom, storing neighbors - double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -93,10 +92,11 @@ void NPairHalfBinNewtonTriOmp::build(NeighList *list) } // loop over all atoms in bins in stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { diff --git a/src/OPENMP/npair_half_multi_newton_tri_omp.cpp b/src/OPENMP/npair_half_multi_newton_tri_omp.cpp index a152d011a7..b18cba0261 100644 --- a/src/OPENMP/npair_half_multi_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_multi_newton_tri_omp.cpp @@ -12,17 +12,19 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "omp_compat.h" #include "npair_half_multi_newton_tri_omp.h" -#include "npair_omp.h" -#include "neighbor.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" -#include "molecule.h" #include "domain.h" -#include "my_page.h" #include "error.h" +#include "force.h" +#include "molecule.h" +#include "my_page.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "npair_omp.h" +#include "omp_compat.h" using namespace LAMMPS_NS; @@ -43,6 +45,7 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list) const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; + const double delta = 0.01 * force->angstrom; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -51,13 +54,11 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list) NPAIR_OMP_SETUP(nlocal); int i,j,k,n,itype,jtype,ibin,jbin,icollection,jcollection,which,ns,imol,iatom; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*s; int js; - // loop over each atom, storing neighbors - int *collection = neighbor->collection; double **x = atom->x; int *type = atom->type; @@ -84,6 +85,7 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list) n = 0; neighptr = ipage.vget(); + itag = tag[i]; itype = type[i]; icollection = collection[i]; xtmp = x[i][0]; @@ -98,65 +100,79 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list) ibin = atom2bin[i]; // loop through stencils for all collections + for (jcollection = 0; jcollection < ncollections; jcollection++) { // if same collection use own bin + if (icollection == jcollection) jbin = ibin; - else jbin = coord2bin(x[i], jcollection); + else jbin = coord2bin(x[i], jcollection); // loop over all atoms in bins in stencil - // stencil is empty if i larger than j - // stencil is half if i same size as j - // stencil is full if i smaller than j - // if half: pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // for triclinic: + // stencil is empty if i larger than j + // stencil is full if i smaller than j + // stencil is full if i same size as j + // for i smaller than j: + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon - s = stencil_multi[icollection][jcollection]; - ns = nstencil_multi[icollection][jcollection]; + s = stencil_multi[icollection][jcollection]; + ns = nstencil_multi[icollection][jcollection]; - for (k = 0; k < ns; k++) { - js = binhead_multi[jcollection][jbin + s[k]]; - for (j = js; j >= 0; j = bins[j]) { + for (k = 0; k < ns; k++) { + js = binhead_multi[jcollection][jbin + s[k]]; + for (j = js; j >= 0; j = bins[j]) { - // if same size (same collection), use half stencil - if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){ - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; + // if same size (same collection), exclude half of interactions + + if (cutcollectionsq[icollection][icollection] == + cutcollectionsq[jcollection][jcollection]) { + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } } } jtype = type[j]; - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; - if (rsq <= cutneighsq[itype][jtype]) { - if (molecular != Atom::ATOMIC) { - if (!moltemplate) - which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >= 0) - which = find_special(onemols[imol]->special[iatom], - onemols[imol]->nspecial[iatom], - tag[j]-tagprev); - else which = 0; - if (which == 0) neighptr[n++] = j; - else if (domain->minimum_image_check(delx,dely,delz)) - neighptr[n++] = j; - else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); - } else neighptr[n++] = j; - } - } + if (rsq <= cutneighsq[itype][jtype]) { + if (molecular != Atom::ATOMIC) { + if (!moltemplate) + which = find_special(special[i],nspecial[i],tag[j]); + else if (imol >= 0) + which = find_special(onemols[imol]->special[iatom], + onemols[imol]->nspecial[iatom], + tag[j]-tagprev); + else which = 0; + if (which == 0) neighptr[n++] = j; + else if (domain->minimum_image_check(delx,dely,delz)) + neighptr[n++] = j; + else if (which > 0) neighptr[n++] = j ^ (which << SBBITS); + } else neighptr[n++] = j; } + } + } } ilist[i] = i; diff --git a/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp b/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp index e4895ff1a9..38f645abad 100644 --- a/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_multi_old_newton_tri_omp.cpp @@ -15,13 +15,15 @@ #include "omp_compat.h" #include "npair_half_multi_old_newton_tri_omp.h" #include "npair_omp.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" -#include "molecule.h" #include "domain.h" -#include "my_page.h" #include "error.h" +#include "force.h" +#include "molecule.h" +#include "my_page.h" +#include "neigh_list.h" using namespace LAMMPS_NS; @@ -42,6 +44,7 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list) const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; + const double delta = 0.01 * force->angstrom; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -50,13 +53,11 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list) NPAIR_OMP_SETUP(nlocal); int i,j,k,n,itype,jtype,ibin,which,ns,imol,iatom; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*s; double *cutsq,*distsq; - // loop over each atom, storing neighbors - double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -82,6 +83,7 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list) n = 0; neighptr = ipage.vget(); + itag = tag[i]; itype = type[i]; xtmp = x[i][0]; ytmp = x[i][1]; @@ -92,13 +94,12 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list) tagprev = tag[i] - iatom - 1; } - // loop over all atoms in bins, including self, in stencil - // skip if i,j neighbor cutoff is less than bin distance - // bins below self are excluded from stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // loop over all atoms in bins in stencil + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; s = stencil_multi_old[itype]; @@ -109,12 +110,21 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list) for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) { jtype = type[j]; if (cutsq[jtype] < distsq[k]) continue; - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; + + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } } @@ -129,7 +139,7 @@ void NPairHalfMultiOldNewtonTriOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/OPENMP/npair_half_nsq_newton_omp.cpp b/src/OPENMP/npair_half_nsq_newton_omp.cpp index 726814c6f0..42cf63278a 100644 --- a/src/OPENMP/npair_half_nsq_newton_omp.cpp +++ b/src/OPENMP/npair_half_nsq_newton_omp.cpp @@ -58,8 +58,6 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list) double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr; - // loop over each atom, storing neighbors - double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -99,7 +97,12 @@ void NPairHalfNsqNewtonOmp::build(NeighList *list) } // loop over remaining atoms, owned and ghost + // use itag/jtap comparision to eliminate half the interactions // itag = jtag is possible for long cutoffs that include images of self + // for triclinic, must use delta to eliminate half the I/J interactions + // cannot use I/J exact coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon for (j = i+1; j < nall; j++) { if (includegroup && !(mask[j] & bitmask)) continue; diff --git a/src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp b/src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp index c998f71290..78b3abdd66 100644 --- a/src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_respa_bin_newton_tri_omp.cpp @@ -15,13 +15,15 @@ #include "omp_compat.h" #include "npair_half_respa_bin_newton_tri_omp.h" #include "npair_omp.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" -#include "molecule.h" #include "domain.h" -#include "my_page.h" #include "error.h" +#include "force.h" +#include "molecule.h" +#include "my_page.h" +#include "neigh_list.h" using namespace LAMMPS_NS; @@ -42,6 +44,7 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; + const double delta = 0.01 * force->angstrom; NPAIR_OMP_INIT; @@ -53,12 +56,10 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) NPAIR_OMP_SETUP(nlocal); int i,j,k,n,itype,jtype,ibin,n_inner,n_middle,imol,iatom; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*neighptr_inner,*neighptr_middle; - // loop over each atom, storing neighbors - double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -111,6 +112,7 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) neighptr_middle = ipage_middle->vget(); } + itag = tag[i]; itype = type[i]; xtmp = x[i][0]; ytmp = x[i][1]; @@ -122,20 +124,31 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) } // loop over all atoms in bins in stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; + + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } } @@ -151,7 +164,7 @@ void NPairHalfRespaBinNewtonTriOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/OPENMP/npair_half_respa_nsq_newton_omp.cpp b/src/OPENMP/npair_half_respa_nsq_newton_omp.cpp index 6604861f74..a9745edc64 100644 --- a/src/OPENMP/npair_half_respa_nsq_newton_omp.cpp +++ b/src/OPENMP/npair_half_respa_nsq_newton_omp.cpp @@ -15,21 +15,22 @@ #include "omp_compat.h" #include "npair_half_respa_nsq_newton_omp.h" #include "npair_omp.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" +#include "domain.h" +#include "error.h" +#include "force.h" #include "group.h" #include "molecule.h" -#include "domain.h" #include "my_page.h" -#include "error.h" +#include "neigh_list.h" using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfRespaNsqNewtonOmp::NPairHalfRespaNsqNewtonOmp(LAMMPS *lmp) : - NPair(lmp) {} +NPairHalfRespaNsqNewtonOmp::NPairHalfRespaNsqNewtonOmp(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- multiple respa lists @@ -45,6 +46,8 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) const int bitmask = (includegroup) ? group->bitmask[includegroup] : 0; const int molecular = atom->molecular; const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; NPAIR_OMP_INIT; @@ -55,13 +58,11 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) #endif NPAIR_OMP_SETUP(nlocal); - int i,j,n,itype,jtype,itag,jtag,n_inner,n_middle,imol,iatom; - tagint tagprev; + int i,j,n,itype,jtype,n_inner,n_middle,imol,iatom; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*neighptr_inner,*neighptr_middle; - // loop over each atom, storing neighbors - double **x = atom->x; int *type = atom->type; int *mask = atom->mask; @@ -128,6 +129,12 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) } // loop over remaining atoms, owned and ghost + // use itag/jtap comparision to eliminate half the interactions + // itag = jtag is possible for long cutoffs that include images of self + // for triclinic, must use delta to eliminate half the I/J interactions + // cannot use I/J exact coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon for (j = i+1; j < nall; j++) { if (includegroup && !(mask[j] & bitmask)) continue; @@ -138,6 +145,14 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { @@ -159,7 +174,7 @@ void NPairHalfRespaNsqNewtonOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp b/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp index c320296442..7fcf07e9c8 100644 --- a/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_size_bin_newton_tri_omp.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "my_page.h" #include "neigh_list.h" @@ -46,6 +47,7 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const int history = list->history; const int mask_history = 1 << HISTBITS; + const double delta = 0.01 * force->angstrom; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -54,13 +56,11 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) NPAIR_OMP_SETUP(nlocal); int i,j,jh,k,n,ibin,which,imol,iatom; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; int *neighptr; - // loop over each atom, storing neighbors - double **x = atom->x; double *radius = atom->radius; int *type = atom->type; @@ -87,6 +87,7 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) n = 0; neighptr = ipage.vget(); + itag = tag[i]; xtmp = x[i][0]; ytmp = x[i][1]; ztmp = x[i][2]; @@ -98,20 +99,31 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) } // loop over all atoms in bins in stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; for (k = 0; k < nstencil; k++) { for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; + + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } } @@ -132,7 +144,7 @@ void NPairHalfSizeBinNewtonTriOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp b/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp index 9a0ead482b..916b7bfbc3 100644 --- a/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "my_page.h" #include "neighbor.h" @@ -48,6 +49,7 @@ void NPairHalfSizeMultiNewtonTriOmp::build(NeighList *list) const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const int history = list->history; const int mask_history = 1 << HISTBITS; + const double delta = 0.01 * force->angstrom; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -55,15 +57,12 @@ void NPairHalfSizeMultiNewtonTriOmp::build(NeighList *list) #endif NPAIR_OMP_SETUP(nlocal); - int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns; + int i,j,jh,k,n,itype,jtype,icollection,jcollection,ibin,jbin,ns,js; int which,imol,iatom; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutdistsq; int *neighptr,*s; - int js; - - // loop over each atom, storing neighbors int *collection = neighbor->collection; double **x = atom->x; @@ -92,6 +91,7 @@ void NPairHalfSizeMultiNewtonTriOmp::build(NeighList *list) n = 0; neighptr = ipage.vget(); + itag = tag[i]; itype = type[i]; icollection = collection[i]; xtmp = x[i][0]; @@ -107,12 +107,13 @@ void NPairHalfSizeMultiNewtonTriOmp::build(NeighList *list) ibin = atom2bin[i]; // loop through stencils for all collections + for (jcollection = 0; jcollection < ncollections; jcollection++) { // if same collection use own bin - if(icollection == jcollection) jbin = ibin; - else jbin = coord2bin(x[i], jcollection); + if (icollection == jcollection) jbin = ibin; + else jbin = coord2bin(x[i], jcollection); // loop over all atoms in bins in stencil // stencil is empty if i larger than j @@ -130,14 +131,24 @@ void NPairHalfSizeMultiNewtonTriOmp::build(NeighList *list) js = binhead_multi[jcollection][jbin + s[k]]; for (j = js; j >= 0; j = bins[j]) { - // if same size (same collection), use half stencil - if(cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]){ - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; + // if same size (same collection), exclude half of interactions + + if (cutcollectionsq[icollection][icollection] == + cutcollectionsq[jcollection][jcollection]) { + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } } } @@ -160,7 +171,7 @@ void NPairHalfSizeMultiNewtonTriOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.cpp b/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.cpp index c74b191f66..7faa210107 100644 --- a/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_size_multi_old_newton_tri_omp.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "my_page.h" #include "neigh_list.h" @@ -32,7 +33,6 @@ NPairHalfSizeMultiOldNewtonTriOmp::NPairHalfSizeMultiOldNewtonTriOmp(LAMMPS *lmp NPair(lmp) {} /* ---------------------------------------------------------------------- - size particles binned neighbor list construction with Newton's 3rd law for triclinic each owned atom i checks its own bin and other bins in triclinic stencil multi-type stencil is itype dependent and is distance checked @@ -46,6 +46,7 @@ void NPairHalfSizeMultiOldNewtonTriOmp::build(NeighList *list) const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const int history = list->history; const int mask_history = 1 << HISTBITS; + const double delta = 0.01 * force->angstrom; NPAIR_OMP_INIT; #if defined(_OPENMP) @@ -54,7 +55,7 @@ void NPairHalfSizeMultiOldNewtonTriOmp::build(NeighList *list) NPAIR_OMP_SETUP(nlocal); int i,j,jh,k,n,itype,jtype,ibin,ns,which,imol,iatom; - tagint tagprev; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutdistsq; int *neighptr,*s; @@ -97,13 +98,12 @@ void NPairHalfSizeMultiOldNewtonTriOmp::build(NeighList *list) tagprev = tag[i] - iatom - 1; } - // loop over all atoms in bins, including self, in stencil - // skip if i,j neighbor cutoff is less than bin distance - // bins below self are excluded from stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms + // loop over all atoms in bins in stencil + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon ibin = atom2bin[i]; s = stencil_multi_old[itype]; @@ -114,12 +114,22 @@ void NPairHalfSizeMultiOldNewtonTriOmp::build(NeighList *list) for (j = binhead[ibin+s[k]]; j >= 0; j = bins[j]) { jtype = type[j]; if (cutsq[jtype] < distsq[k]) continue; - if (x[j][2] < ztmp) continue; - if (x[j][2] == ztmp) { - if (x[j][1] < ytmp) continue; - if (x[j][1] == ytmp) { - if (x[j][0] < xtmp) continue; - if (x[j][0] == xtmp && j <= i) continue; + + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } } @@ -140,7 +150,7 @@ void NPairHalfSizeMultiOldNewtonTriOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/OPENMP/npair_half_size_nsq_newton_omp.cpp b/src/OPENMP/npair_half_size_nsq_newton_omp.cpp index 35dc42ec5b..0628478c0b 100644 --- a/src/OPENMP/npair_half_size_nsq_newton_omp.cpp +++ b/src/OPENMP/npair_half_size_nsq_newton_omp.cpp @@ -18,6 +18,7 @@ #include "atom_vec.h" #include "domain.h" #include "error.h" +#include "force.h" #include "molecule.h" #include "group.h" #include "my_page.h" @@ -30,13 +31,11 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfSizeNsqNewtonOmp::NPairHalfSizeNsqNewtonOmp(LAMMPS *lmp) : - NPair(lmp) {} +NPairHalfSizeNsqNewtonOmp::NPairHalfSizeNsqNewtonOmp(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- size particles N^2 / 2 search for neighbor pairs with full Newton's 3rd law - shear history must be accounted for when a neighbor pair is added pair added to list if atoms i and j are both owned and i < j if j is ghost only me or other proc adds pair decision based on itag,jtag tests @@ -50,6 +49,8 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) const int moltemplate = (molecular == Atom::TEMPLATE) ? 1 : 0; const int history = list->history; const int mask_history = 1 << HISTBITS; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; NPAIR_OMP_INIT; @@ -58,8 +59,8 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) #endif NPAIR_OMP_SETUP(nlocal); - int i,j,jh,n,itag,jtag,which,imol,iatom; - tagint tagprev; + int i,j,jh,n,which,imol,iatom; + tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; double radi,radsum,cutsq; int *neighptr; @@ -104,6 +105,12 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) } // loop over remaining atoms, owned and ghost + // use itag/jtap comparision to eliminate half the interactions + // itag = jtag is possible for long cutoffs that include images of self + // for triclinic, must use delta to eliminate half the I/J interactions + // cannot use I/J exact coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon for (j = i+1; j < nall; j++) { if (includegroup && !(mask[j] & bitmask)) continue; @@ -114,6 +121,14 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; + } else if (triclinic) { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } } else { if (x[j][2] < ztmp) continue; if (x[j][2] == ztmp) { @@ -140,7 +155,7 @@ void NPairHalfSizeNsqNewtonOmp::build(NeighList *list) if (molecular != Atom::ATOMIC) { if (!moltemplate) which = find_special(special[i],nspecial[i],tag[j]); - else if (imol >=0) + else if (imol >= 0) which = find_special(onemols[imol]->special[iatom], onemols[imol]->nspecial[iatom], tag[j]-tagprev); diff --git a/src/npair_half_bin_newton_tri.cpp b/src/npair_half_bin_newton_tri.cpp index 453d10096e..d261363b0e 100644 --- a/src/npair_half_bin_newton_tri.cpp +++ b/src/npair_half_bin_newton_tri.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ #include "npair_half_bin_newton_tri.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" #include "domain.h" @@ -21,6 +21,7 @@ #include "force.h" #include "molecule.h" #include "my_page.h" +#include "neigh_list.h" using namespace LAMMPS_NS; diff --git a/src/npair_half_multi_newton_tri.cpp b/src/npair_half_multi_newton_tri.cpp index 1d75d6a3ef..2b753af499 100644 --- a/src/npair_half_multi_newton_tri.cpp +++ b/src/npair_half_multi_newton_tri.cpp @@ -21,8 +21,8 @@ #include "force.h" #include "molecule.h" #include "my_page.h" -#include "neighbor.h" #include "neigh_list.h" +#include "neighbor.h" using namespace LAMMPS_NS; @@ -39,7 +39,7 @@ NPairHalfMultiNewtonTri::NPairHalfMultiNewtonTri(LAMMPS *lmp) : NPair(lmp) {} void NPairHalfMultiNewtonTri::build(NeighList *list) { - int i,j,k,n,itype,jtype,icollection,jcollection,ibin,jbin,which,ns,imol,iatom,moltemplate; + int i,j,k,n,itype,jtype,ibin,jbin,icollection,jcollection,which,ns,imol,iatom,moltemplate; tagint itag,jtag,tagprev; double xtmp,ytmp,ztmp,delx,dely,delz,rsq; int *neighptr,*s; diff --git a/src/npair_half_respa_bin_newton_tri.cpp b/src/npair_half_respa_bin_newton_tri.cpp index eac67b8bd5..4cd4ead0fa 100644 --- a/src/npair_half_respa_bin_newton_tri.cpp +++ b/src/npair_half_respa_bin_newton_tri.cpp @@ -13,7 +13,7 @@ ------------------------------------------------------------------------- */ #include "npair_half_respa_bin_newton_tri.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" #include "domain.h" @@ -21,6 +21,7 @@ #include "force.h" #include "molecule.h" #include "my_page.h" +#include "neigh_list.h" using namespace LAMMPS_NS; diff --git a/src/npair_half_respa_nsq_newton.cpp b/src/npair_half_respa_nsq_newton.cpp index d0292eec92..ae56d62fb5 100644 --- a/src/npair_half_respa_nsq_newton.cpp +++ b/src/npair_half_respa_nsq_newton.cpp @@ -13,15 +13,16 @@ ------------------------------------------------------------------------- */ #include "npair_half_respa_nsq_newton.h" -#include "neigh_list.h" + #include "atom.h" #include "atom_vec.h" #include "domain.h" +#include "error.h" #include "force.h" #include "group.h" #include "molecule.h" #include "my_page.h" -#include "error.h" +#include "neigh_list.h" using namespace LAMMPS_NS; @@ -132,7 +133,7 @@ void NPairHalfRespaNsqNewton::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; - } else if (triclinic) { + } else if (triclinic) { if (fabs(x[j][2]-ztmp) > delta) { if (x[j][2] < ztmp) continue; } else if (fabs(x[j][1]-ytmp) > delta) { diff --git a/src/npair_half_size_nsq_newton.cpp b/src/npair_half_size_nsq_newton.cpp index abd2a4faff..ce0c7f9562 100644 --- a/src/npair_half_size_nsq_newton.cpp +++ b/src/npair_half_size_nsq_newton.cpp @@ -113,7 +113,7 @@ void NPairHalfSizeNsqNewton::build(NeighList *list) if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { if ((itag+jtag) % 2 == 1) continue; - } else if (triclinic) { + } else if (triclinic) { if (fabs(x[j][2]-ztmp) > delta) { if (x[j][2] < ztmp) continue; } else if (fabs(x[j][1]-ytmp) > delta) { From bb6e4d844088cea152ebd0cf5007a6ba5c2cbfe2 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Tue, 11 Jul 2023 14:30:14 -0700 Subject: [PATCH 013/107] add forgotten line to multi and mutli/old --- src/npair_half_multi_newton_tri.cpp | 1 + src/npair_half_multi_old_newton_tri.cpp | 1 + src/npair_half_size_multi_newton_tri.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/src/npair_half_multi_newton_tri.cpp b/src/npair_half_multi_newton_tri.cpp index 316acb5049..2595c3ce21 100644 --- a/src/npair_half_multi_newton_tri.cpp +++ b/src/npair_half_multi_newton_tri.cpp @@ -121,6 +121,7 @@ void NPairHalfMultiNewtonTri::build(NeighList *list) if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]) { + if (j <= i) continue; if (j >= nlocal) { jtag = tag[j]; if (itag > jtag) { diff --git a/src/npair_half_multi_old_newton_tri.cpp b/src/npair_half_multi_old_newton_tri.cpp index 9dcbcff9f4..8700db3b32 100644 --- a/src/npair_half_multi_old_newton_tri.cpp +++ b/src/npair_half_multi_old_newton_tri.cpp @@ -102,6 +102,7 @@ void NPairHalfMultiOldNewtonTri::build(NeighList *list) jtype = type[j]; if (cutsq[jtype] < distsq[k]) continue; + if (j <= i) continue; if (j >= nlocal) { jtag = tag[j]; if (itag > jtag) { diff --git a/src/npair_half_size_multi_newton_tri.cpp b/src/npair_half_size_multi_newton_tri.cpp index a363ae6e1e..4ff50870b6 100644 --- a/src/npair_half_size_multi_newton_tri.cpp +++ b/src/npair_half_size_multi_newton_tri.cpp @@ -126,6 +126,7 @@ void NPairHalfSizeMultiNewtonTri::build(NeighList *list) if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]) { + if (j <= i) continue; if (j >= nlocal) { jtag = tag[j]; if (itag > jtag) { From 2a7ac115d8563dab636f10ba516c461ba23f0694 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Jul 2023 18:25:29 -0400 Subject: [PATCH 014/107] fix whitespace (one more time) --- src/npair_half_multi_newton_tri.cpp | 50 ++++++++++++------------ src/npair_half_multi_old_newton_tri.cpp | 34 ++++++++-------- src/npair_half_size_multi_newton_tri.cpp | 36 ++++++++--------- 3 files changed, 60 insertions(+), 60 deletions(-) diff --git a/src/npair_half_multi_newton_tri.cpp b/src/npair_half_multi_newton_tri.cpp index 1c95e73151..24300f6929 100644 --- a/src/npair_half_multi_newton_tri.cpp +++ b/src/npair_half_multi_newton_tri.cpp @@ -114,31 +114,31 @@ void NPairHalfMultiNewtonTri::build(NeighList *list) ns = nstencil_multi[icollection][jcollection]; for (k = 0; k < ns; k++) { - js = binhead_multi[jcollection][jbin + s[k]]; - for (j = js; j >= 0; j = bins[j]) { - - // if same size (same collection), exclude half of interactions - - if (cutcollectionsq[icollection][icollection] == - cutcollectionsq[jcollection][jcollection]) { - if (j <= i) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } - } + js = binhead_multi[jcollection][jbin + s[k]]; + for (j = js; j >= 0; j = bins[j]) { + + // if same size (same collection), exclude half of interactions + + if (cutcollectionsq[icollection][icollection] == + cutcollectionsq[jcollection][jcollection]) { + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } + } jtype = type[j]; if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_multi_old_newton_tri.cpp b/src/npair_half_multi_old_newton_tri.cpp index 8700db3b32..ce3149ebf5 100644 --- a/src/npair_half_multi_old_newton_tri.cpp +++ b/src/npair_half_multi_old_newton_tri.cpp @@ -102,23 +102,23 @@ void NPairHalfMultiOldNewtonTri::build(NeighList *list) jtype = type[j]; if (cutsq[jtype] < distsq[k]) continue; - if (j <= i) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; diff --git a/src/npair_half_size_multi_newton_tri.cpp b/src/npair_half_size_multi_newton_tri.cpp index c521034406..aa0d8e3f42 100644 --- a/src/npair_half_size_multi_newton_tri.cpp +++ b/src/npair_half_size_multi_newton_tri.cpp @@ -125,24 +125,24 @@ void NPairHalfSizeMultiNewtonTri::build(NeighList *list) // if same size (same collection), exclude half of interactions if (cutcollectionsq[icollection][icollection] == - cutcollectionsq[jcollection][jcollection]) { - if (j <= i) continue; - if (j >= nlocal) { - jtag = tag[j]; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) continue; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) continue; - } else { - if (fabs(x[j][2]-ztmp) > delta) { - if (x[j][2] < ztmp) continue; - } else if (fabs(x[j][1]-ytmp) > delta) { - if (x[j][1] < ytmp) continue; - } else { - if (x[j][0] < xtmp) continue; - } - } - } + cutcollectionsq[jcollection][jcollection]) { + if (j <= i) continue; + if (j >= nlocal) { + jtag = tag[j]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x[j][2]-ztmp) > delta) { + if (x[j][2] < ztmp) continue; + } else if (fabs(x[j][1]-ytmp) > delta) { + if (x[j][1] < ytmp) continue; + } else { + if (x[j][0] < xtmp) continue; + } + } + } } jtype = type[j]; From 89fb236144902530091f279d012b90f907b93b50 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 11 Jul 2023 18:35:16 -0400 Subject: [PATCH 015/107] port bugfix for colloid test failure --- src/OPENMP/npair_half_multi_newton_tri_omp.cpp | 1 + src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/OPENMP/npair_half_multi_newton_tri_omp.cpp b/src/OPENMP/npair_half_multi_newton_tri_omp.cpp index b18cba0261..e26bea990f 100644 --- a/src/OPENMP/npair_half_multi_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_multi_newton_tri_omp.cpp @@ -130,6 +130,7 @@ void NPairHalfMultiNewtonTriOmp::build(NeighList *list) if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]) { + if (j <= i) continue; if (j >= nlocal) { jtag = tag[j]; if (itag > jtag) { diff --git a/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp b/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp index 916b7bfbc3..4765c918b7 100644 --- a/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp +++ b/src/OPENMP/npair_half_size_multi_newton_tri_omp.cpp @@ -135,6 +135,7 @@ void NPairHalfSizeMultiNewtonTriOmp::build(NeighList *list) if (cutcollectionsq[icollection][icollection] == cutcollectionsq[jcollection][jcollection]) { + if (j <= i) continue; if (j >= nlocal) { jtag = tag[j]; if (itag > jtag) { From 3fc809a1b9782430a4e41b8ca1d8a3c224762ef6 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Tue, 11 Jul 2023 18:45:45 -0700 Subject: [PATCH 016/107] add check for atom IDs when triclinic --- src/neighbor.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/neighbor.cpp b/src/neighbor.cpp index df1547e5eb..c6eea7e2f1 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -313,7 +313,10 @@ void Neighbor::init() triclinic = domain->triclinic; newton_pair = force->newton_pair; - // error check + // error checks + + if (triclinic && atom->tag_enable == 0) + error->all(FLERR, "Cannot build triclinic neighbor lists unless atoms have IDs"); if (delay > 0 && (delay % every) != 0) error->all(FLERR,"Neighbor delay must be 0 or multiple of every setting"); From a91b3dab963d0044885298955cc2edf8e6556ead Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Tue, 11 Jul 2023 18:50:23 -0700 Subject: [PATCH 017/107] doc atom ID requirement for triclinic --- doc/src/Howto_triclinic.rst | 3 ++- doc/src/atom_modify.rst | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/src/Howto_triclinic.rst b/doc/src/Howto_triclinic.rst index 0efadbcc8c..2983d013c6 100644 --- a/doc/src/Howto_triclinic.rst +++ b/doc/src/Howto_triclinic.rst @@ -12,7 +12,8 @@ is created, e.g. by the :doc:`create_box ` or :doc:`read_data ` or :doc:`read_restart ` commands. Additionally, LAMMPS defines box size parameters lx,ly,lz where lx = xhi-xlo, and similarly in the y and z dimensions. The 6 -parameters, as well as lx,ly,lz, can be output via the :doc:`thermo_style custom ` command. +parameters, as well as lx,ly,lz, can be output via the +:doc:`thermo_style custom ` command. LAMMPS also allows simulations to be performed in triclinic (non-orthogonal) simulation boxes shaped as a parallelepiped with diff --git a/doc/src/atom_modify.rst b/doc/src/atom_modify.rst index 1e5a3d49ff..21590e6680 100644 --- a/doc/src/atom_modify.rst +++ b/doc/src/atom_modify.rst @@ -65,6 +65,11 @@ switch. This is described on the :doc:`Build_settings ` doc page. If atom IDs are not used, they must be specified as 0 for all atoms, e.g. in a data or restart file. +.. note:: + + If a :doc:`triclinic simulation box ` is used, + atom IDs are required, due to how neighbor lists are built. + The *map* keyword determines how atoms with specific IDs are found when required. An example are the bond (angle, etc) methods which need to find the local index of an atom with a specific global ID From f62a4c537258e6eab4c20df0237b607378963d37 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 17 Jul 2023 16:49:26 -0500 Subject: [PATCH 018/107] Working on fix efield/kk --- src/KOKKOS/fix_efield_kokkos.cpp | 87 ++++++++++++++++++++++++++++++++ src/KOKKOS/fix_efield_kokkos.h | 52 +++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 src/KOKKOS/fix_efield_kokkos.cpp create mode 100644 src/KOKKOS/fix_efield_kokkos.h diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp new file mode 100644 index 0000000000..697116402d --- /dev/null +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -0,0 +1,87 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_efield_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "atom_vec.h" +#include "input.h" +#include "modify.h" +#include "update.h" +#include "variable.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +FixEfieldKokkos::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) : + FixEfield(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = X_MASK | Q_MASK | F_MASK | RMASS_MASK | MASK_MASK | TYPE_MASK; + datamask_modify = F_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEfieldKokkos::post_force(int /*vflag*/) +{ + // update efield due to variables + + update_efield_variables(); + + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + type = atomKK->k_type.view(); + mask = atomKK->k_mask.view(); + int nlocal = atomKK->nlocal; + if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; + + copymode = 1; + + eflag = 0; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), *this); + + copymode = 0; +} + +template +KOKKOS_INLINE_FUNCTION +void FixEfieldKokkos::operator()(const int i) const +{ + if (mask[i] & groupbit) { + double qi = q[i]; + f(i,0) += qi*ex; + f(i,1) += qi*ey; + f(i,2) += qi*ez; + } +} + +namespace LAMMPS_NS { +template class FixEfieldKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixEfieldKokkos; +#endif +} diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h new file mode 100644 index 0000000000..e5171ce6f5 --- /dev/null +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(efield/kk,FixFixEfieldKokkos); +FixStyle(efield/kk/device,FixFixEfieldKokkos); +FixStyle(efield/kk/host,FixFixEfieldKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_EFIELD_KOKKOS_H +#define LMP_FIX_EFIELD_KOKKOS_H + +#include "fix_efield.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixEfieldKokkos : public FixEfield { + public: + FixEfieldKokkos(class LAMMPS *, int, char **); + + void post_force(int) override; + + KOKKOS_INLINE_FUNCTION + void operator()(const int, double &) const; + + private: + typename ArrayTypes::t_x_array x; + typename ArrayTypes::t_f_array f; + typename ArrayTypes::t_int_1d type; + typename ArrayTypes::t_int_1d mask; + typename ArrayTypes::t_float_1d_randomread q; +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_EFIELD_KOKKOS_H +#endif // FIX_CLASS From 6a991ff0a094a089a50b5f3723a7b1e8c1bbf990 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 1 Aug 2023 10:53:20 -0500 Subject: [PATCH 019/107] Updated fix efield/kk, needs work on unwrap --- src/KOKKOS/fix_efield_kokkos.cpp | 173 ++++++++++++++++++++++++++----- src/KOKKOS/fix_efield_kokkos.h | 67 +++++++++--- src/fix_efield.cpp | 2 + 3 files changed, 197 insertions(+), 45 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index 697116402d..fc84967e02 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -15,14 +15,23 @@ #include "fix_efield_kokkos.h" #include "atom_kokkos.h" -#include "atom_masks.h" -#include "atom_vec.h" -#include "input.h" -#include "modify.h" #include "update.h" +#include "modify.h" +#include "domain.h" +#include "region.h" +#include "input.h" #include "variable.h" +#include "memory_kokkos.h" +#include "error.h" +#include "atom_masks.h" +#include "kokkos_base.h" + +#include using namespace LAMMPS_NS; +using namespace FixConst; + +enum{NONE,CONSTANT,EQUAL,ATOM}; /* ---------------------------------------------------------------------- */ @@ -31,11 +40,36 @@ FixEfieldKokkos::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) FixEfield(lmp, narg, arg) { kokkosable = 1; - atomKK = (AtomKokkos *)atom; + atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; - datamask_read = X_MASK | Q_MASK | F_MASK | RMASS_MASK | MASK_MASK | TYPE_MASK; - datamask_modify = F_MASK; + memory->destroy(efield); + memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield"); + d_efield = k_efield.view(); +} + +/* ---------------------------------------------------------------------- */ + +template +FixEfieldKokkos::~FixEfieldKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_efield,efield); + efield = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEfieldKokkos::init() +{ + FixEfield::init(); + + if (utils::strmatch(update->integrate_style,"^respa")) + error->all(FLERR,"Cannot (yet) use respa with Kokkos"); } /* ---------------------------------------------------------------------- */ @@ -43,39 +77,121 @@ FixEfieldKokkos::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) template void FixEfieldKokkos::post_force(int /*vflag*/) { - // update efield due to variables + atomKK->sync(execution_space, F_MASK | Q_MASK | MASK_MASK); - update_efield_variables(); - - atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); - - x = atomKK->k_x.view(); f = atomKK->k_f.view(); q = atomKK->k_q.view(); - type = atomKK->k_type.view(); mask = atomKK->k_mask.view(); - int nlocal = atomKK->nlocal; - if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; - copymode = 1; + int nlocal = atom->nlocal; - eflag = 0; + // update region if necessary - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), *this); - - copymode = 0; + if (region) { + if (!utils::strmatch(region->style, "^block")) + error->all(FLERR,"Cannot (yet) use {}-style region with fix efield/kk",region->style); + region->prematch(); + DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal); + KokkosBase* regionKKBase = dynamic_cast(region); + regionKKBase->match_all_kokkos(groupbit,k_match); + k_match.template sync(); + d_match = k_match.template view(); + } + + // reallocate sforce array if necessary + + if (varflag == ATOM && atom->nmax > maxatom) { + maxatom = atom->nmax; + memoryKK->destroy_kokkos(k_efield,efield); + memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield"); + d_efield = k_efield.view(); + } + + fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0; + double_4 fsum_kk; + force_flag = 0; + + if (varflag == CONSTANT) { + copymode = 1; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + copymode = 0; + + // variable force, wrap with clear/add + + } else { + + atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos + + modify->clearstep_compute(); + + if (xstyle == EQUAL) ex = input->variable->compute_equal(xvar); + else if (xstyle == ATOM) + input->variable->compute_atom(xvar,igroup,&efield[0][0],4,0); + if (ystyle == EQUAL) ey = input->variable->compute_equal(yvar); + else if (ystyle == ATOM) + input->variable->compute_atom(yvar,igroup,&efield[0][1],4,0); + if (zstyle == EQUAL) ez = input->variable->compute_equal(zvar); + else if (zstyle == ATOM) + input->variable->compute_atom(zvar,igroup,&efield[0][2],4,0); + + modify->addstep_compute(update->ntimestep + 1); + + if (varflag == ATOM) { // this can be removed when variable class is ported to Kokkos + k_efield.modify(); + k_efield.sync(); + } + + copymode = 1; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + copymode = 0; + } + + atomKK->modified(execution_space, F_MASK); + + fsum[0] = fsum_kk.d0; + fsum[1] = fsum_kk.d1; + fsum[2] = fsum_kk.d2; + fsum[3] = fsum_kk.d3; } template KOKKOS_INLINE_FUNCTION -void FixEfieldKokkos::operator()(const int i) const -{ +void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { if (mask[i] & groupbit) { - double qi = q[i]; - f(i,0) += qi*ex; - f(i,1) += qi*ey; - f(i,2) += qi*ez; + if (region && !d_match[i]) return; + const F_FLOAT qtmp = q[i]; + const F_FLOAT fx = qtmp * ex; + const F_FLOAT fy = qtmp * ey; + const F_FLOAT fz = qtmp * ez; + f(i,0) += fx; + f(i,1) += fy; + f(i,2) += fz; + //fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d1 += fx; + fsum_kk.d2 += fy; + fsum_kk.d3 += fz; + } +} + +template +KOKKOS_INLINE_FUNCTION +void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const { + if (mask[i] & groupbit) { + if (region && !d_match[i]) return; + const F_FLOAT qtmp = q[i]; + const F_FLOAT fx = qtmp * ex; + const F_FLOAT fy = qtmp * ey; + const F_FLOAT fz = qtmp * ez; + if (xstyle == ATOM) f(i,0) += d_efield(i,0); + else if (xstyle) f(i,0) += fx; + if (ystyle == ATOM) f(i,1) = d_efield(i,1); + else if (ystyle) f(i,1) += fy; + if (zstyle == ATOM) f(i,2) = d_efield(i,2); + else if (zstyle) f(i,2) += fz; + //fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d1 += fx; + fsum_kk.d2 += fy; + fsum_kk.d3 += fz; } } @@ -85,3 +201,4 @@ template class FixEfieldKokkos; template class FixEfieldKokkos; #endif } + diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index e5171ce6f5..8d8d2ee97a 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -13,9 +13,9 @@ #ifdef FIX_CLASS // clang-format off -FixStyle(efield/kk,FixFixEfieldKokkos); -FixStyle(efield/kk/device,FixFixEfieldKokkos); -FixStyle(efield/kk/host,FixFixEfieldKokkos); +FixStyle(efield/kk,FixEfieldKokkos); +FixStyle(efield/kk/device,FixEfieldKokkos); +FixStyle(efield/kk/host,FixEfieldKokkos); // clang-format on #else @@ -28,25 +28,58 @@ FixStyle(efield/kk/host,FixFixEfieldKokkos); namespace LAMMPS_NS { +struct e_double_4 { + double d0, d1, d2, d3; + KOKKOS_INLINE_FUNCTION + e_double_4() { + d0 = d1 = d2 = d3 = 0.0; + } + KOKKOS_INLINE_FUNCTION + e_double_4& operator+=(const e_double_4 &rhs) { + d0 += rhs.d0; + d1 += rhs.d1; + d2 += rhs.d2; + d3 += rhs.d3; + return *this; + } +}; +typedef e_double_4 double_4; + +struct TagFixEfieldConstant{}; + +struct TagFixEfieldNonConstant{}; + template class FixEfieldKokkos : public FixEfield { - public: - FixEfieldKokkos(class LAMMPS *, int, char **); + public: + typedef DeviceType device_type; + typedef double_4 value_type; + typedef ArrayTypes AT; - void post_force(int) override; + FixEfieldKokkos(class LAMMPS *, int, char **); + ~FixEfieldKokkos() override; + void init() override; + void post_force(int) override; - KOKKOS_INLINE_FUNCTION - void operator()(const int, double &) const; + KOKKOS_INLINE_FUNCTION + void operator()(TagFixEfieldConstant, const int&, double_4&) const; - private: - typename ArrayTypes::t_x_array x; - typename ArrayTypes::t_f_array f; - typename ArrayTypes::t_int_1d type; - typename ArrayTypes::t_int_1d mask; - typename ArrayTypes::t_float_1d_randomread q; + KOKKOS_INLINE_FUNCTION + void operator()(TagFixEfieldNonConstant, const int&, double_4&) const; + + private: + DAT::tdual_ffloat_2d k_efield; + typename AT::t_ffloat_2d_randomread d_efield; + typename AT::t_int_1d d_match; + + typename AT::t_x_array_randomread x; + typename AT::t_float_1d_randomread q; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread mask; }; -} // namespace LAMMPS_NS +} + +#endif +#endif -#endif // LMP_FIX_EFIELD_KOKKOS_H -#endif // FIX_CLASS diff --git a/src/fix_efield.cpp b/src/fix_efield.cpp index d01a498d39..23277f8af3 100644 --- a/src/fix_efield.cpp +++ b/src/fix_efield.cpp @@ -129,6 +129,8 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) : FixEfield::~FixEfield() { + if (copymode) return; + delete[] xstr; delete[] ystr; delete[] zstr; From 34c398dd372a5eae6ecb53a4fa10a2bf2e0cdad0 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Wed, 2 Aug 2023 06:59:24 -0500 Subject: [PATCH 020/107] Tried two ways of doing parallel reduce for fsum --- src/KOKKOS/fix_efield_kokkos.cpp | 75 +++++++++++++++++++++++++++++--- src/KOKKOS/fix_efield_kokkos.h | 1 + 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index fc84967e02..d4ef6dc3f2 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -17,7 +17,7 @@ #include "atom_kokkos.h" #include "update.h" #include "modify.h" -#include "domain.h" +#include "domain_kokkos.h" #include "region.h" #include "input.h" #include "variable.h" @@ -77,10 +77,11 @@ void FixEfieldKokkos::init() template void FixEfieldKokkos::post_force(int /*vflag*/) { - atomKK->sync(execution_space, F_MASK | Q_MASK | MASK_MASK); + atomKK->sync(execution_space, F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); f = atomKK->k_f.view(); q = atomKK->k_q.view(); + image = atomKK->k_image.view(); mask = atomKK->k_mask.view(); int nlocal = atom->nlocal; @@ -113,7 +114,50 @@ void FixEfieldKokkos::post_force(int /*vflag*/) if (varflag == CONSTANT) { copymode = 1; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + + { + // local variables for lambda capture + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; + auto l_ex = ex; + auto l_ey = ey; + auto l_ez = ez; + + auto l_x = x; + auto l_q = q; + auto l_f = f; + auto l_mask = mask; + auto l_image = image; + auto l_groupbit = groupbit; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), + LAMMPS_LAMBDA(int i, double_4& fsum_kk) { + if (l_mask[i] & l_groupbit) { + + Few x_i; + x_i[0] = l_x(i,0); + x_i[1] = l_x(i,1); + x_i[2] = l_x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i)); + auto qtmp = l_q(i); + auto fx = qtmp * l_ex; + auto fy = qtmp * l_ey; + auto fz = qtmp * l_ez; + l_f(i,0) += fx; + l_f(i,1) += fy; + l_f(i,2) += fz; + + fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d1 += fx; + fsum_kk.d2 += fy; + fsum_kk.d3 += fz; + } + }, fsum_kk); + + } + copymode = 0; // variable force, wrap with clear/add @@ -159,6 +203,14 @@ KOKKOS_INLINE_FUNCTION void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { if (mask[i] & groupbit) { if (region && !d_match[i]) return; + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; + Few x_i; + x_i[0] = x(i,0); + x_i[1] = x(i,1); + x_i[2] = x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); const F_FLOAT qtmp = q[i]; const F_FLOAT fx = qtmp * ex; const F_FLOAT fy = qtmp * ey; @@ -166,7 +218,8 @@ void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, f(i,0) += fx; f(i,1) += fy; f(i,2) += fz; - //fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + + fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; fsum_kk.d3 += fz; @@ -176,19 +229,27 @@ void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, template KOKKOS_INLINE_FUNCTION void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const { + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; if (mask[i] & groupbit) { if (region && !d_match[i]) return; + Few x_i; + x_i[0] = x(i,0); + x_i[1] = x(i,1); + x_i[2] = x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); const F_FLOAT qtmp = q[i]; const F_FLOAT fx = qtmp * ex; const F_FLOAT fy = qtmp * ey; const F_FLOAT fz = qtmp * ez; if (xstyle == ATOM) f(i,0) += d_efield(i,0); else if (xstyle) f(i,0) += fx; - if (ystyle == ATOM) f(i,1) = d_efield(i,1); + if (ystyle == ATOM) f(i,1) += d_efield(i,1); else if (ystyle) f(i,1) += fy; - if (zstyle == ATOM) f(i,2) = d_efield(i,2); + if (zstyle == ATOM) f(i,2) += d_efield(i,2); else if (zstyle) f(i,2) += fz; - //fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; fsum_kk.d3 += fz; diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index 8d8d2ee97a..d159473d1d 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -75,6 +75,7 @@ class FixEfieldKokkos : public FixEfield { typename AT::t_x_array_randomread x; typename AT::t_float_1d_randomread q; typename AT::t_f_array f; + typename AT::t_imageint_1d_randomread image; typename AT::t_int_1d_randomread mask; }; From bc6fcdc61a550b645b25f12138a3bb4022153c9a Mon Sep 17 00:00:00 2001 From: "W. Michael Brown" Date: Fri, 4 Aug 2023 08:49:27 -0700 Subject: [PATCH 021/107] Applying triclinic neighbor fixes to intel package. --- src/INTEL/fix_intel.cpp | 2 + src/INTEL/npair_halffull_newton_intel.cpp | 66 ++++++++++---- .../npair_halffull_newton_trim_intel.cpp | 87 ++++++++++++++----- src/INTEL/npair_intel.cpp | 40 +++++++-- 4 files changed, 144 insertions(+), 51 deletions(-) diff --git a/src/INTEL/fix_intel.cpp b/src/INTEL/fix_intel.cpp index 4c46608677..8396904ffd 100644 --- a/src/INTEL/fix_intel.cpp +++ b/src/INTEL/fix_intel.cpp @@ -20,6 +20,7 @@ #include "fix_intel.h" #include "comm.h" +#include "domain.h" #include "error.h" #include "force.h" #include "neighbor.h" @@ -470,6 +471,7 @@ void FixIntel::pair_init_check(const bool cdmessage) int need_tag = 0; if (atom->molecular != Atom::ATOMIC || three_body_neighbor()) need_tag = 1; + if (domain->triclinic && force->newton_pair) need_tag = 1; // Clear buffers used for pair style char kmode[80]; diff --git a/src/INTEL/npair_halffull_newton_intel.cpp b/src/INTEL/npair_halffull_newton_intel.cpp index cd05d5f97a..adcf2527ab 100644 --- a/src/INTEL/npair_halffull_newton_intel.cpp +++ b/src/INTEL/npair_halffull_newton_intel.cpp @@ -20,7 +20,9 @@ #include "atom.h" #include "comm.h" +#include "domain.h" #include "error.h" +#include "force.h" #include "modify.h" #include "my_page.h" #include "neigh_list.h" @@ -56,6 +58,9 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list, const int * _noalias const numneigh_full = list->listfull->numneigh; const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; + #if defined(_OPENMP) #pragma omp parallel #endif @@ -82,25 +87,50 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list, const int * _noalias const jlist = firstneigh_full[i]; const int jnum = numneigh_full[i]; - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma ivdep - #endif - for (int jj = 0; jj < jnum; jj++) { - const int joriginal = jlist[jj]; - const int j = joriginal & NEIGHMASK; - int addme = 1; - if (j < nlocal) { - if (i > j) addme = 0; - } else { - if (x[j].z < ztmp) addme = 0; - if (x[j].z == ztmp) { - if (x[j].y < ytmp) addme = 0; - if (x[j].y == ytmp && x[j].x < xtmp) addme = 0; + if (!triclinic) { + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int jj = 0; jj < jnum; jj++) { + const int joriginal = jlist[jj]; + const int j = joriginal & NEIGHMASK; + int addme = 1; + if (j < nlocal) { + if (i > j) addme = 0; + } else { + if (x[j].z < ztmp) addme = 0; + if (x[j].z == ztmp) { + if (x[j].y < ytmp) addme = 0; + if (x[j].y == ytmp && x[j].x < xtmp) addme = 0; + } } + if (addme) + neighptr[n++] = joriginal; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int jj = 0; jj < jnum; jj++) { + const int joriginal = jlist[jj]; + const int j = joriginal & NEIGHMASK; + int addme = 1; + if (j < nlocal) { + if (i > j) addme = 0; + } else { + if (fabs(x[j].z-ztmp) > delta) { + if (x[j].z < ztmp) addme = 0; + } else if (fabs(x[j].y-ytmp) > delta) { + if (x[j].y < ytmp) addme = 0; + } else { + if (x[j].x < xtmp) addme = 0; + } + } + if (addme) + neighptr[n++] = joriginal; } - if (addme) - neighptr[n++] = joriginal; } ilist[ii] = i; @@ -203,7 +233,7 @@ void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf) void NPairHalffullNewtonIntel::build(NeighList *list) { - if (_fix->three_body_neighbor() == 0) { + if (_fix->three_body_neighbor() == 0 || domain->triclinic) { if (_fix->precision() == FixIntel::PREC_MODE_MIXED) build_t(list, _fix->get_mixed_buffers()); else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) diff --git a/src/INTEL/npair_halffull_newton_trim_intel.cpp b/src/INTEL/npair_halffull_newton_trim_intel.cpp index e38375f750..34b9b20e9c 100644 --- a/src/INTEL/npair_halffull_newton_trim_intel.cpp +++ b/src/INTEL/npair_halffull_newton_trim_intel.cpp @@ -20,7 +20,9 @@ #include "atom.h" #include "comm.h" +#include "domain.h" #include "error.h" +#include "force.h" #include "modify.h" #include "my_page.h" #include "neigh_list.h" @@ -57,6 +59,8 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list, const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT const flt_t cutsq_custom = cutoff_custom * cutoff_custom; + const double delta = 0.01 * force->angstrom; + const int triclinic = domain->triclinic; #if defined(_OPENMP) #pragma omp parallel @@ -84,35 +88,70 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list, const int * _noalias const jlist = firstneigh_full[i]; const int jnum = numneigh_full[i]; - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma ivdep - #endif - for (int jj = 0; jj < jnum; jj++) { - const int joriginal = jlist[jj]; - const int j = joriginal & NEIGHMASK; - int addme = 1; - if (j < nlocal) { - if (i > j) addme = 0; - } else { - if (x[j].z < ztmp) addme = 0; - if (x[j].z == ztmp) { - if (x[j].y < ytmp) addme = 0; - if (x[j].y == ytmp && x[j].x < xtmp) addme = 0; + if (!triclinic) { + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int jj = 0; jj < jnum; jj++) { + const int joriginal = jlist[jj]; + const int j = joriginal & NEIGHMASK; + int addme = 1; + if (j < nlocal) { + if (i > j) addme = 0; + } else { + if (x[j].z < ztmp) addme = 0; + if (x[j].z == ztmp) { + if (x[j].y < ytmp) addme = 0; + if (x[j].y == ytmp && x[j].x < xtmp) addme = 0; + } } + + // trim to shorter cutoff + + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + + if (rsq > cutsq_custom) addme = 0; + + if (addme) + neighptr[n++] = joriginal; } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int jj = 0; jj < jnum; jj++) { + const int joriginal = jlist[jj]; + const int j = joriginal & NEIGHMASK; + int addme = 1; + if (j < nlocal) { + if (i > j) addme = 0; + } else { + if (fabs(x[j].z-ztmp) > delta) { + if (x[j].z < ztmp) addme = 0; + } else if (fabs(x[j].y-ytmp) > delta) { + if (x[j].y < ytmp) addme = 0; + } else { + if (x[j].x < xtmp) addme = 0; + } + } - // trim to shorter cutoff + // trim to shorter cutoff - const flt_t delx = xtmp - x[j].x; - const flt_t dely = ytmp - x[j].y; - const flt_t delz = ztmp - x[j].z; - const flt_t rsq = delx * delx + dely * dely + delz * delz; + const flt_t delx = xtmp - x[j].x; + const flt_t dely = ytmp - x[j].y; + const flt_t delz = ztmp - x[j].z; + const flt_t rsq = delx * delx + dely * dely + delz * delz; - if (rsq > cutsq_custom) addme = 0; + if (rsq > cutsq_custom) addme = 0; - if (addme) - neighptr[n++] = joriginal; + if (addme) + neighptr[n++] = joriginal; + } } ilist[ii] = i; @@ -235,7 +274,7 @@ void NPairHalffullNewtonTrimIntel::build_t3(NeighList *list, int *numhalf, void NPairHalffullNewtonTrimIntel::build(NeighList *list) { - if (_fix->three_body_neighbor() == 0) { + if (_fix->three_body_neighbor() == 0 || domain->triclinic) { if (_fix->precision() == FixIntel::PREC_MODE_MIXED) build_t(list, _fix->get_mixed_buffers()); else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) diff --git a/src/INTEL/npair_intel.cpp b/src/INTEL/npair_intel.cpp index 600109d7ae..dcfb66e05f 100644 --- a/src/INTEL/npair_intel.cpp +++ b/src/INTEL/npair_intel.cpp @@ -204,6 +204,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, } const int special_bound = sb; + const double delta = 0.01 * force->angstrom; + #ifdef _LMP_INTEL_OFFLOAD const int * _noalias const binhead = this->binhead; const int * _noalias const bins = this->bins; @@ -229,7 +231,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \ in(offload_end,separate_buffers,astart,aend,nlocal,molecular) \ in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \ - in(pack_width,special_bound) \ + in(pack_width,special_bound,delta) \ out(overflow:length(5) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ signal(tag) @@ -331,7 +333,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, const flt_t ztmp = x[i].z; const int itype = x[i].w; tagint itag; - if (THREE) itag = tag[i]; + if (THREE || (TRI && !FULL)) itag = tag[i]; const int ioffset = ntypes * itype; const int ibin = atombin[i]; @@ -365,7 +367,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, ty[u] = x[j].y; tz[u] = x[j].z; tjtype[u] = x[j].w; - if (THREE) ttag[u] = tag[j]; + if (THREE || (TRI && !FULL)) ttag[u] = tag[j]; } if (FULL == 0 && TRI != 1) { @@ -486,12 +488,32 @@ void NPairIntel::bin_newton(const int offload, NeighList *list, // Triclinic if (TRI) { - if (tz[u] < ztmp) addme = 0; - if (tz[u] == ztmp) { - if (ty[u] < ytmp) addme = 0; - if (ty[u] == ytmp) { - if (tx[u] < xtmp) addme = 0; - if (tx[u] == xtmp && j <= i) addme = 0; + if (FULL) { + if (tz[u] < ztmp) addme = 0; + if (tz[u] == ztmp) { + if (ty[u] < ytmp) addme = 0; + if (ty[u] == ytmp) { + if (tx[u] < xtmp) addme = 0; + if (tx[u] == xtmp && j <= i) addme = 0; + } + } + } else { + if (j <= i) addme = 0; + if (j >= nlocal) { + const tagint jtag = ttag[u]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) addme = 0; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) addme = 0; + } else { + if (fabs(tz[u]-ztmp) > delta) { + if (tz[u] < ztmp) addme = 0; + } else if (fabs(ty[u]-ytmp) > delta) { + if (ty[u] < ytmp) addme = 0; + } else { + if (tx[u] < xtmp) addme = 0; + } + } } } } From dbab5b69312a2d789f37973ce21873cc2e2757e4 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 4 Aug 2023 22:24:48 -0400 Subject: [PATCH 022/107] possible workaround for unit test failure taken from: https://github.com/open-mpi/ompi/issues/9656 --- unittest/formats/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unittest/formats/CMakeLists.txt b/unittest/formats/CMakeLists.txt index 93ea2f3b32..58c797b6e6 100644 --- a/unittest/formats/CMakeLists.txt +++ b/unittest/formats/CMakeLists.txt @@ -41,6 +41,8 @@ set_tests_properties(TextFileReader PROPERTIES ENVIRONMENT "LAMMPS_POTENTIALS=${ add_executable(test_file_operations test_file_operations.cpp) target_link_libraries(test_file_operations PRIVATE lammps GTest::GMock) add_test(NAME FileOperations COMMAND test_file_operations) +# try to mitigate possible OpenMPI bug +set_tests_properties(TextFileReader PROPERTIES ENVIRONMENT "OMPI_MCA_sharedfp=\"^sm\"") add_executable(test_dump_atom test_dump_atom.cpp) target_link_libraries(test_dump_atom PRIVATE lammps GTest::GMock) From dc8f17e8e6d75e795e93fd2b2d85e93324f6ec12 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sun, 6 Aug 2023 00:04:46 -0500 Subject: [PATCH 023/107] Fixed bugs with missing x array, removed the () operator overloads because they require access to domain within the kernels --- src/KOKKOS/fix_efield_kokkos.cpp | 63 +++++++++++++++++++++++++++----- src/KOKKOS/fix_efield_kokkos.h | 4 +- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index d4ef6dc3f2..ecf4418cf6 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -77,8 +77,9 @@ void FixEfieldKokkos::init() template void FixEfieldKokkos::post_force(int /*vflag*/) { - atomKK->sync(execution_space, F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); + atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); + x = atomKK->k_f.view(); f = atomKK->k_f.view(); q = atomKK->k_q.view(); image = atomKK->k_image.view(); @@ -132,10 +133,8 @@ void FixEfieldKokkos::post_force(int /*vflag*/) auto l_image = image; auto l_groupbit = groupbit; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), - LAMMPS_LAMBDA(int i, double_4& fsum_kk) { + Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) { if (l_mask[i] & l_groupbit) { - Few x_i; x_i[0] = l_x(i,0); x_i[1] = l_x(i,1); @@ -148,14 +147,12 @@ void FixEfieldKokkos::post_force(int /*vflag*/) l_f(i,0) += fx; l_f(i,1) += fy; l_f(i,2) += fz; - fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; fsum_kk.d3 += fz; } - }, fsum_kk); - + },fsum_kk); } copymode = 0; @@ -186,7 +183,53 @@ void FixEfieldKokkos::post_force(int /*vflag*/) } copymode = 1; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + + { + // local variables for lambda capture + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; + auto l_ex = ex; + auto l_ey = ey; + auto l_ez = ez; + auto l_d_efield = d_efield; + + auto l_x = x; + auto l_q = q; + auto l_f = f; + auto l_mask = mask; + auto l_image = image; + auto l_groupbit = groupbit; + auto l_xstyle = xstyle; + auto l_ystyle = ystyle; + auto l_zstyle = zstyle; + + Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) { + if (l_mask[i] & l_groupbit) { + Few x_i; + x_i[0] = l_x(i,0); + x_i[1] = l_x(i,1); + x_i[2] = l_x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i)); + auto qtmp = l_q(i); + auto fx = qtmp * l_ex; + auto fy = qtmp * l_ey; + auto fz = qtmp * l_ez; + if (l_xstyle == ATOM) l_f(i,0) += l_d_efield(i,0); + else if (l_xstyle) l_f(i,0) += fx; + if (l_ystyle == ATOM) l_f(i,1) += l_d_efield(i,1); + else if (l_ystyle) l_f(i,1) += fy; + if (l_zstyle == ATOM) l_f(i,2) += l_d_efield(i,2); + else if (l_zstyle) l_f(i,2) += fz; + fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d1 += fx; + fsum_kk.d2 += fy; + fsum_kk.d3 += fz; + } + },fsum_kk); + } + copymode = 0; } @@ -197,7 +240,7 @@ void FixEfieldKokkos::post_force(int /*vflag*/) fsum[2] = fsum_kk.d2; fsum[3] = fsum_kk.d3; } - +/* template KOKKOS_INLINE_FUNCTION void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { @@ -255,7 +298,7 @@ void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int fsum_kk.d3 += fz; } } - +*/ namespace LAMMPS_NS { template class FixEfieldKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index d159473d1d..2739d03ffc 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -60,13 +60,13 @@ class FixEfieldKokkos : public FixEfield { ~FixEfieldKokkos() override; void init() override; void post_force(int) override; - +/* KOKKOS_INLINE_FUNCTION void operator()(TagFixEfieldConstant, const int&, double_4&) const; KOKKOS_INLINE_FUNCTION void operator()(TagFixEfieldNonConstant, const int&, double_4&) const; - +*/ private: DAT::tdual_ffloat_2d k_efield; typename AT::t_ffloat_2d_randomread d_efield; From 4a8275446439aaaa0e859fbbe7fc53de9ac6c4b4 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 7 Aug 2023 00:30:32 -0500 Subject: [PATCH 024/107] Fixed an obvious bug with x --- src/KOKKOS/fix_efield_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index ecf4418cf6..1f29d1f809 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -79,7 +79,7 @@ void FixEfieldKokkos::post_force(int /*vflag*/) { atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); - x = atomKK->k_f.view(); + x = atomKK->k_x.view(); f = atomKK->k_f.view(); q = atomKK->k_q.view(); image = atomKK->k_image.view(); From 4ca32f0ceccedc2bee1c657038a72e0802c58605 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 7 Aug 2023 15:24:16 -0500 Subject: [PATCH 025/107] Added comments to the use of operators overloaded when accessing unwrap on the GPU (serial works fine) --- src/KOKKOS/fix_efield_kokkos.cpp | 14 +++++++++----- src/KOKKOS/fix_efield_kokkos.h | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index 1f29d1f809..bbf106f515 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -115,6 +115,8 @@ void FixEfieldKokkos::post_force(int /*vflag*/) if (varflag == CONSTANT) { copymode = 1; + + // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); { @@ -183,8 +185,8 @@ void FixEfieldKokkos::post_force(int /*vflag*/) } copymode = 1; + // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); - { // local variables for lambda capture auto prd = Few(domain->prd); @@ -240,12 +242,13 @@ void FixEfieldKokkos::post_force(int /*vflag*/) fsum[2] = fsum_kk.d2; fsum[3] = fsum_kk.d3; } -/* + template KOKKOS_INLINE_FUNCTION void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { if (mask[i] & groupbit) { if (region && !d_match[i]) return; + auto prd = Few(domain->prd); auto h = Few(domain->h); auto triclinic = domain->triclinic; @@ -254,14 +257,14 @@ void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, x_i[1] = x(i,1); x_i[2] = x(i,2); auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); - const F_FLOAT qtmp = q[i]; + const F_FLOAT qtmp = q(i); const F_FLOAT fx = qtmp * ex; const F_FLOAT fy = qtmp * ey; const F_FLOAT fz = qtmp * ez; f(i,0) += fx; f(i,1) += fy; f(i,2) += fz; - + // TODO: access to unwrap below crashes fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; @@ -292,13 +295,14 @@ void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int else if (ystyle) f(i,1) += fy; if (zstyle == ATOM) f(i,2) += d_efield(i,2); else if (zstyle) f(i,2) += fz; + // TODO: access to unwrap below crashes fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; fsum_kk.d3 += fz; } } -*/ + namespace LAMMPS_NS { template class FixEfieldKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index 2739d03ffc..d159473d1d 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -60,13 +60,13 @@ class FixEfieldKokkos : public FixEfield { ~FixEfieldKokkos() override; void init() override; void post_force(int) override; -/* + KOKKOS_INLINE_FUNCTION void operator()(TagFixEfieldConstant, const int&, double_4&) const; KOKKOS_INLINE_FUNCTION void operator()(TagFixEfieldNonConstant, const int&, double_4&) const; -*/ + private: DAT::tdual_ffloat_2d k_efield; typename AT::t_ffloat_2d_randomread d_efield; From 6ff85cab7f3bc8cec12470fadd675945561a0c39 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 10 Aug 2023 00:34:52 -0500 Subject: [PATCH 026/107] Adding fix spring/self/kk, needed to add maxatom to fix spring/self, may need resize xoriginal as well --- src/fix_spring_self.cpp | 1 + src/fix_spring_self.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index 550b3afc4d..31f54caef2 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -73,6 +73,7 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : // register with Atom class xoriginal = nullptr; + maxatom = atom->nmax; FixSpringSelf::grow_arrays(atom->nmax); atom->add_callback(Atom::GROW); atom->add_callback(Atom::RESTART); diff --git a/src/fix_spring_self.h b/src/fix_spring_self.h index 59dba78e43..24a03aa92e 100644 --- a/src/fix_spring_self.h +++ b/src/fix_spring_self.h @@ -47,11 +47,12 @@ class FixSpringSelf : public Fix { int size_restart(int) override; int maxsize_restart() override; - private: + protected: double k, espring; double **xoriginal; // original coords of atoms int xflag, yflag, zflag; int ilevel_respa; + int maxatom; }; } // namespace LAMMPS_NS From 4bb3ecd09c1a88768e556be76f8707a685c33680 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 10 Aug 2023 00:36:33 -0500 Subject: [PATCH 027/107] Added the fix spring/self/kk source files --- src/KOKKOS/fix_spring_self_kokkos.cpp | 152 ++++++++++++++++++++++++++ src/KOKKOS/fix_spring_self_kokkos.h | 57 ++++++++++ 2 files changed, 209 insertions(+) create mode 100644 src/KOKKOS/fix_spring_self_kokkos.cpp create mode 100644 src/KOKKOS/fix_spring_self_kokkos.h diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp new file mode 100644 index 0000000000..fe6d3a3d50 --- /dev/null +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -0,0 +1,152 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_spring_self_kokkos.h" + +#include "atom_kokkos.h" +#include "update.h" +#include "modify.h" +#include "domain_kokkos.h" +#include "region.h" +#include "input.h" +#include "variable.h" +#include "memory_kokkos.h" +#include "error.h" +#include "atom_masks.h" +#include "kokkos_base.h" + +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +template +FixSpringSelfKokkos::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char **arg) : + FixSpringSelf(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + maxatom = atom->nmax; + memory->destroy(xoriginal); + memoryKK->create_kokkos(k_xoriginal,xoriginal,maxatom,3,"spring/self:xoriginal"); + d_xoriginal = k_xoriginal.view(); +} + +/* ---------------------------------------------------------------------- */ + +template +FixSpringSelfKokkos::~FixSpringSelfKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_xoriginal,xoriginal); + xoriginal = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::init() +{ + FixSpringSelf::init(); + + if (utils::strmatch(update->integrate_style,"^respa")) + error->all(FLERR,"Cannot (yet) use respa with Kokkos"); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::post_force(int /*vflag*/) +{ + atomKK->sync(execution_space, X_MASK | F_MASK | MASK_MASK); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + image = atomKK->k_image.view(); + mask = atomKK->k_mask.view(); + + int nlocal = atom->nlocal; + + // reallocate xoriginal array if necessary + + if (atom->nmax > maxatom) { + maxatom = atom->nmax; + memoryKK->destroy_kokkos(k_xoriginal,xoriginal); + memoryKK->create_kokkos(k_xoriginal,xoriginal,maxatom,3,"fix_spring/self:xoriginal"); + d_xoriginal = k_xoriginal.view(); + } + + double espring_kk; + + + copymode = 1; + //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this, espring_kk); + { + // local variables for lambda capture + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; + auto l_xflag = xflag; + auto l_yflag = yflag; + auto l_zflag = zflag; + auto l_k = k; + auto l_x = x; + auto l_xoriginal = d_xoriginal; + auto l_f = f; + auto l_mask = mask; + auto l_image = image; + auto l_groupbit = groupbit; + + Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) { + if (l_mask[i] & l_groupbit) { + Few x_i; + x_i[0] = l_x(i,0); + x_i[1] = l_x(i,1); + x_i[2] = l_x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i)); + auto dx = unwrap[0] - l_xoriginal(i, 0); + auto dy = unwrap[1] - l_xoriginal(i, 1); + auto dz = unwrap[2] - l_xoriginal(i, 2); + if (!l_xflag) dx = 0.0; + if (!l_yflag) dy = 0.0; + if (!l_zflag) dz = 0.0; + l_f(i,0) -= l_k*dx; + l_f(i,1) -= l_k*dy; + l_f(i,2) -= l_k*dz; + espring_kk += l_k * (dx*dx + dy*dy + dz*dz); + } + },espring_kk); + } + + copymode = 0; + + atomKK->modified(execution_space, F_MASK); + + espring = 0.5*espring_kk; +} + +namespace LAMMPS_NS { +template class FixSpringSelfKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixSpringSelfKokkos; +#endif +} + diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h new file mode 100644 index 0000000000..58dcbc525e --- /dev/null +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -0,0 +1,57 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(spring/self/kk,FixSpringSelfKokkos); +FixStyle(spring/self/kk/device,FixSpringSelfKokkos); +FixStyle(spring/self/kk/host,FixSpringSelfKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_SPRING_SELF_KOKKOS_H +#define LMP_FIX_SPRING_SELF_KOKKOS_H + +#include "fix_spring_self.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixSpringSelfKokkos : public FixSpringSelf { + public: + typedef DeviceType device_type; + typedef double value_type; + typedef ArrayTypes AT; + + FixSpringSelfKokkos(class LAMMPS *, int, char **); + ~FixSpringSelfKokkos() override; + void init() override; + void post_force(int) override; + + private: + DAT::tdual_ffloat_2d k_xoriginal; + typename AT::t_ffloat_2d_randomread d_xoriginal; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_imageint_1d_randomread image; + typename AT::t_int_1d_randomread mask; +}; + +} + +#endif +#endif + From a24eccf95d0abd036823ff5bbe45839ca9d8a907 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 11 Aug 2023 09:43:28 -0500 Subject: [PATCH 028/107] Removed maxatom from fix spring/self, need to work on exchange for xoriginal on spring/self/kk --- src/KOKKOS/fix_spring_self_kokkos.cpp | 12 +----------- src/fix_spring_self.cpp | 3 ++- src/fix_spring_self.h | 1 - 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index fe6d3a3d50..5031c0641b 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -43,9 +43,8 @@ FixSpringSelfKokkos::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; - maxatom = atom->nmax; memory->destroy(xoriginal); - memoryKK->create_kokkos(k_xoriginal,xoriginal,maxatom,3,"spring/self:xoriginal"); + memoryKK->create_kokkos(k_xoriginal,xoriginal,atom->nmax,3,"spring/self:xoriginal"); d_xoriginal = k_xoriginal.view(); } @@ -85,15 +84,6 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) int nlocal = atom->nlocal; - // reallocate xoriginal array if necessary - - if (atom->nmax > maxatom) { - maxatom = atom->nmax; - memoryKK->destroy_kokkos(k_xoriginal,xoriginal); - memoryKK->create_kokkos(k_xoriginal,xoriginal,maxatom,3,"fix_spring/self:xoriginal"); - d_xoriginal = k_xoriginal.view(); - } - double espring_kk; diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index 31f54caef2..df00a2ba8c 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -73,7 +73,6 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : // register with Atom class xoriginal = nullptr; - maxatom = atom->nmax; FixSpringSelf::grow_arrays(atom->nmax); atom->add_callback(Atom::GROW); atom->add_callback(Atom::RESTART); @@ -97,6 +96,8 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : FixSpringSelf::~FixSpringSelf() { + if (copymode) return; + // unregister callbacks to this fix from Atom class atom->delete_callback(id,Atom::GROW); diff --git a/src/fix_spring_self.h b/src/fix_spring_self.h index 24a03aa92e..f13f2be918 100644 --- a/src/fix_spring_self.h +++ b/src/fix_spring_self.h @@ -52,7 +52,6 @@ class FixSpringSelf : public Fix { double **xoriginal; // original coords of atoms int xflag, yflag, zflag; int ilevel_respa; - int maxatom; }; } // namespace LAMMPS_NS From ca9924035bbb90c5d6c9cf41db40a6a10f293ec5 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sat, 12 Aug 2023 01:19:43 -0500 Subject: [PATCH 029/107] Working on pack/unpack exchange for xoriginal --- src/KOKKOS/fix_spring_self_kokkos.cpp | 143 ++++++++++++++++++++++++++ src/KOKKOS/fix_spring_self_kokkos.h | 53 +++++++++- 2 files changed, 193 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 5031c0641b..4e89ede2ba 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -133,6 +133,149 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) espring = 0.5*espring_kk; } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixSpringSelfKokkos::pack_exchange_item(const int &mysend, int &offset, const bool &final) const +{ + const int i = d_exchange_sendlist(mysend); + + d_buf[mysend] = nsend + offset; + int m = nsend + offset; + d_buf[m++] = d_xoriginal(i,0); + d_buf[m++] = d_xoriginal(i,1); + d_buf[m++] = d_xoriginal(i,2); + if (mysend == nsend-1) d_count() = m; + offset = m - nsend; + + const int j = d_copylist(mysend); + if (j > -1) { + d_xoriginal(i,0) = d_xoriginal(j,0); + d_xoriginal(i,1) = d_xoriginal(j,1); + d_xoriginal(i,2) = d_xoriginal(j,2); + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixSpringSelfKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + + k_buf.sync(); + k_copylist.sync(); + k_exchange_sendlist.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_copylist = k_copylist.view(); + d_exchange_sendlist = k_exchange_sendlist.view(); + this->nsend = nsend; + + + k_xoriginal.template sync(); + + Kokkos::deep_copy(d_count,0); + + copymode = 1; + + FixSpringSelfKokkosPackExchangeFunctor pack_exchange_functor(this); + Kokkos::parallel_scan(nsend,pack_exchange_functor); + + copymode = 0; + + k_buf.modify(); + + if (space == Host) k_buf.sync(); + else k_buf.sync(); + + k_xoriginal.template modify(); + + Kokkos::deep_copy(h_count,d_count); + + return h_count(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixSpringSelfKokkos::operator()(TagFixSpringSelfUnpackExchange, const int &i) const +{ + int index = d_indices(i); + + if (index > -1) { + int m = d_buf[i]; + + d_xoriginal(index,0) = static_cast (d_buf[m++]); + d_xoriginal(index,1) = static_cast (d_buf[m++]); + d_xoriginal(index,2) = static_cast (d_buf[m++]); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + ExecutionSpace /*space*/) +{ + k_buf.sync(); + k_indices.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + k_xoriginal.template sync(); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); + + copymode = 0; + + k_xoriginal.template modify(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixSpringSelfKokkos::pack_exchange(int i, double *buf) +{ + k_xoriginal.sync_host(); + + int m = FixSpringSelf::pack_exchange(i,buf); + + k_xoriginal.modify_host(); + + return m; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based arrays from exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixSpringSelfKokkos::unpack_exchange(int nlocal, double *buf) +{ + k_xoriginal.sync_host(); + + int m = FixSpringSelf::unpack_exchange(nlocal,buf); + + k_xoriginal.modify_host(); + + return m; +} + namespace LAMMPS_NS { template class FixSpringSelfKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index 58dcbc525e..b69d4edb4a 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -25,11 +25,14 @@ FixStyle(spring/self/kk/host,FixSpringSelfKokkos); #include "fix_spring_self.h" #include "kokkos_type.h" +#include "kokkos_base.h" namespace LAMMPS_NS { +struct TagFixSpringSelfUnpackExchange{}; + template -class FixSpringSelfKokkos : public FixSpringSelf { +class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { public: typedef DeviceType device_type; typedef double value_type; @@ -40,14 +43,58 @@ class FixSpringSelfKokkos : public FixSpringSelf { void init() override; void post_force(int) override; - private: + KOKKOS_INLINE_FUNCTION + void pack_exchange_item(const int&, int &, const bool &) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixSpringSelfUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space) override; + + + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + + protected: DAT::tdual_ffloat_2d k_xoriginal; - typename AT::t_ffloat_2d_randomread d_xoriginal; + typename AT::t_ffloat_2d d_xoriginal; typename AT::t_x_array_randomread x; typename AT::t_f_array f; typename AT::t_imageint_1d_randomread image; typename AT::t_int_1d_randomread mask; + + int nsend; + + typename AT::t_int_2d d_sendlist; + typename AT::t_xfloat_1d_um d_buf; + + typename AT::t_int_1d d_exchange_sendlist; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; + + typename AT::t_int_scalar d_count; + HAT::t_int_scalar h_count; + +}; + +template +struct FixSpringSelfKokkosPackExchangeFunctor { + typedef DeviceType device_type; + typedef int value_type; + FixSpringSelfKokkos c; + FixSpringSelfKokkosPackExchangeFunctor(FixSpringSelfKokkos* c_ptr):c(*c_ptr) {}; + KOKKOS_INLINE_FUNCTION + void operator()(const int &i, int &offset, const bool &final) const { + c.pack_exchange_item(i, offset, final); + } }; } From ea965d3b2681d6d922edb27e7a0b98859d6c6fb3 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sun, 13 Aug 2023 23:40:53 -0500 Subject: [PATCH 030/107] Working on exchange comm on device, and grow arrays for xoriginal --- src/KOKKOS/fix_spring_self_kokkos.cpp | 21 +++++++++++++++++++-- src/KOKKOS/fix_spring_self_kokkos.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 4e89ede2ba..8a576e2dea 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -38,14 +38,20 @@ FixSpringSelfKokkos::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char FixSpringSelf(lmp, narg, arg) { kokkosable = 1; + exchange_comm_device = 1; + maxexchange = 6; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; memory->destroy(xoriginal); - memoryKK->create_kokkos(k_xoriginal,xoriginal,atom->nmax,3,"spring/self:xoriginal"); - d_xoriginal = k_xoriginal.view(); + + int nmax = atom->nmax; + grow_arrays(nmax); + + d_count = typename AT::t_int_scalar("fix_shake:count"); + h_count = Kokkos::create_mirror_view(d_count); } /* ---------------------------------------------------------------------- */ @@ -133,6 +139,17 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) espring = 0.5*espring_kk; } +/* ---------------------------------------------------------------------- + allocate local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::grow_arrays(int nmax) +{ + memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,3,"spring/self:xoriginal"); + d_xoriginal = k_xoriginal.view(); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index b69d4edb4a..30b9eaf40a 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -41,6 +41,7 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { FixSpringSelfKokkos(class LAMMPS *, int, char **); ~FixSpringSelfKokkos() override; void init() override; + void grow_arrays(int) override; void post_force(int) override; KOKKOS_INLINE_FUNCTION From 3eb8fd219afbb97303efd4fcc5dcd85958dd7954 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Wed, 16 Aug 2023 16:19:11 -0600 Subject: [PATCH 031/107] update of a Howto_output doc page --- doc/src/Howto_output.rst | 140 ++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 47 deletions(-) diff --git a/doc/src/Howto_output.rst b/doc/src/Howto_output.rst index 851b7703fd..6fcd36ab56 100644 --- a/doc/src/Howto_output.rst +++ b/doc/src/Howto_output.rst @@ -1,7 +1,7 @@ Output from LAMMPS (thermo, dumps, computes, fixes, variables) ============================================================== -There are four basic kinds of LAMMPS output: +There are four basic forms of LAMMPS output: * :doc:`Thermodynamic output `, which is a list of quantities printed every few timesteps to the screen and logfile. @@ -20,18 +20,17 @@ output files, depending on what :doc:`dump ` and :doc:`fix ` commands you specify. As discussed below, LAMMPS gives you a variety of ways to determine -what quantities are computed and printed when the thermodynamics, +what quantities are calculated and printed when the thermodynamics, dump, or fix commands listed above perform output. Throughout this discussion, note that users can also :doc:`add their own computes and -fixes to LAMMPS ` which can then generate values that can then -be output with these commands. +fixes to LAMMPS ` which can generate values that can then be +output with these commands. The following subsections discuss different LAMMPS commands related to output and the kind of data they operate on and produce: * :ref:`Global/per-atom/local/per-grid data ` * :ref:`Scalar/vector/array data ` -* :ref:`Per-grid data ` * :ref:`Disambiguation ` * :ref:`Thermodynamic output ` * :ref:`Dump file output ` @@ -48,34 +47,65 @@ to output and the kind of data they operate on and produce: Global/per-atom/local/per-grid data ----------------------------------- -Various output-related commands work with four different styles of +Various output-related commands work with four different "styles" of data: global, per-atom, local, and per-grid. A global datum is one or more system-wide values, e.g. the temperature of the system. A per-atom datum is one or more values per atom, e.g. the kinetic energy of each atom. Local datums are calculated by each processor based on -the atoms it owns, but there may be zero or more per atom, e.g. a list +the atoms it owns, and there may be zero or more per atom, e.g. a list of bond distances. A per-grid datum is one or more values per grid cell, for a grid which -overlays the simulation domain. The grid cells and the data they -store are distributed across processors; each processor owns the grid -cells whose center point falls within its subdomain. +overlays the simulation domain. Similar to atoms and per-atom data, +the grid cells and the data they store are distributed across +processors; each processor owns the grid cells whose center points +fall within its subdomain. .. _scalar: Scalar/vector/array data ------------------------ -Global, per-atom, and local datums can come in three kinds: a single -scalar value, a vector of values, or a 2d array of values. The doc -page for a "compute" or "fix" or "variable" that generates data will -specify both the style and kind of data it produces, e.g. a per-atom -vector. +Global, per-atom, local, and per-grid datums can come in three +"kinds": a single scalar value, a vector of values, or a 2d array of +values. More specifically these are the valid kinds for each style: -When a quantity is accessed, as in many of the output commands -discussed below, it can be referenced via the following bracket -notation, where ID in this case is the ID of a compute. The leading -"c\_" would be replaced by "f\_" for a fix, or "v\_" for a variable: +* global scalar +* global vector +* global array +* per-atom vector +* per-atom array +* local vector +* local array +* per-grid vector +* per-grid array + +A per-atom vector means a single value per atom; the "vector" is the +length of the number of atoms. A per-atom array means multiple values +per atom. Similarly a local vector or array means one or multiple +values per entity (e.g. per bond in the system). And a per-grid +vector or array means one or multiple values per grid cell. + +The doc page for a compute or fix or variable that generates data will +specify both the styles and kinds of data it produces, e.g. a per-atom +vector. Note that a compute or fix may generate multiple styles and +kinds of output. However, for per-atom data only a vector or array is +output, never both. Likewise for per-local and per-grid data. An +example of a fix which generates multiple styles and kinds of data is +the :doc:`fix mdi/qm ` command. It outputs a global +scalar, global vector, and per-atom array for the quantum mechanical +energy and virial of the system and forces on each atom. + +By contrast, different variable styles generate only a single kind of +data: a global scalar for an equal-style variable, global vector for a +vector-style variable, and a per-atom vector for an atom-style +variable. + +When data is accessed by another command, as in many of the output +commands discussed below, it can be referenced via the following +bracket notation, where ID in this case is the ID of a compute. The +leading "c\_" would be replaced by "f\_" for a fix, or "v\_" for a +variable (and ID would be the name of the variable): +-------------+--------------------------------------------+ | c_ID | entire scalar, vector, or array | @@ -85,40 +115,56 @@ notation, where ID in this case is the ID of a compute. The leading | c_ID[I][J] | one element of array | +-------------+--------------------------------------------+ -In other words, using one bracket reduces the dimension of the data -once (vector -> scalar, array -> vector). Using two brackets reduces -the dimension twice (array -> scalar). Thus a command that uses -scalar values as input can typically also process elements of a vector -or array. +Note that using one bracket reduces the dimension of the data once +(vector -> scalar, array -> vector). Using two brackets reduces the +dimension twice (array -> scalar). Thus a command that uses scalar +values as input can also conceptually operate on an element of a +vector or array. -.. _grid: - -Per-grid data ------------------------- - -Per-grid data can come in two kinds: a vector of values (one per grid -cekk), or a 2d array of values (multiple values per grid ckk). The -doc page for a "compute" or "fix" that generates data will specify -names for both the grid(s) and datum(s) it produces, e.g. per-grid -vectors or arrays, which can be referenced by other commands. See the -:doc:`Howto grid ` doc page for more details. +Per-grid vectors or arrays are accessed similarly, except that the ID +for the compute or fix includes a grid name and a data name. This is +because a fix or compute can create multiple grids (of different +sizes) and multiple sets of data (for each grid). The fix or compute +defines names for each grid and for each data set, so that all of them +can be accessed by other commands. See the :doc:`Howto grid +` doc page for more details. .. _disambiguation: Disambiguation -------------- -Some computes and fixes produce data in multiple styles, e.g. a global -scalar and a per-atom vector. Usually the context in which the input -script references the data determines which style is meant. Example: -if a compute provides both a global scalar and a per-atom vector, the -former will be accessed by using ``c_ID`` in an equal-style variable, -while the latter will be accessed by using ``c_ID`` in an atom-style -variable. Note that atom-style variable formulas can also access -global scalars, but in this case it is not possible to do this -directly because of the ambiguity. Instead, an equal-style variable -can be defined which accesses the global scalar, and that variable can -be used in the atom-style variable formula in place of ``c_ID``. +When a compute or fix produces data in multiple styles, e.g. global +and per-atom, a reference to the data can sometimes be ambiguous. +Usually the context in which the input script references the data +determines which style is meant. + +For example, if a compute outputs a global vector and a per-atom +array, an element of the global vector will be accessed by using +``c_ID[I]`` in :doc:`thermodynamic output `, while a +column of the per-atom array will be accessed by using ``c_ID[I]`` in +a :doc:`dump custom ` command. + +However, if a :doc:`atom-style variable ` references +``c_ID[I]``, then it could be intended to refer to a single element of +the global vector or a column of the per-atom array. The doc page for +any command that has a potential ambiguity (variables are the most +common) will explain how to resolve the ambiguity. + +In this case, an atom-style variables references per-atom data if it +exists. If access to an element of a global vector is needed (as in +this example), an equal-style variable which references the value can +be defined and used in the atom-style variable formula instead. + +Similarly, :doc:`thermodynamic output ` can only +reference global data from a compute or fix. But you can indirectly +access per-atom data as follows. The reference ``c_ID[245][2]`` for +the ID of a :doc:`compute displace/atom ` +command, refers to the y-component of displacement for the atom with +ID 245. While you cannot use that reference directly in the +:doc:`thermo_style ` command, you can use it an +equal-style variable formula, and then reference the variable in +thermodynamic output. .. _thermo: @@ -389,7 +435,7 @@ output and input data types must match, e.g. global/per-atom/local data and scalar/vector/array data. Also note that, as described above, when a command takes a scalar as -input, that could be an element of a vector or array. Likewise a +input, that could also be an element of a vector or array. Likewise a vector input could be a column of an array. +--------------------------------------------------------+----------------------------------------------+----------------------------------------------------+ From 0d739439c7cebf64716561022151921c04cd35c8 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Thu, 17 Aug 2023 12:47:48 -0600 Subject: [PATCH 032/107] changes to compute voronoi/atom --- doc/src/compute_voronoi_atom.rst | 126 ++++++++++++--------------- src/VORONOI/compute_voronoi_atom.cpp | 37 ++++---- 2 files changed, 75 insertions(+), 88 deletions(-) diff --git a/doc/src/compute_voronoi_atom.rst b/doc/src/compute_voronoi_atom.rst index 274be1b702..3e67bb6cbf 100644 --- a/doc/src/compute_voronoi_atom.rst +++ b/doc/src/compute_voronoi_atom.rst @@ -13,7 +13,7 @@ Syntax * ID, group-ID are documented in :doc:`compute ` command * voronoi/atom = style name of this compute command * zero or more keyword/value pairs may be appended -* keyword = *only_group* or *occupation* or *surface* or *radius* or *edge_histo* or *edge_threshold* or *face_threshold* or *neighbors* or *peratom* +* keyword = *only_group* or *occupation* or *surface* or *radius* or *edge_histo* or *edge_threshold* or *face_threshold* or *neighbors* .. parsed-literal:: @@ -31,7 +31,6 @@ Syntax *face_threshold* arg = minarea minarea = minimum area for a face to be counted *neighbors* value = *yes* or *no* = store list of all neighbors or no - *peratom* value = *yes* or *no* = per-atom quantities accessible or no Examples """""""" @@ -53,14 +52,12 @@ atoms in the simulation box. The tessellation is calculated using all atoms in the simulation, but non-zero values are only stored for atoms in the group. -By default two per-atom quantities are calculated by this compute. -The first is the volume of the Voronoi cell around each atom. Any -point in an atom's Voronoi cell is closer to that atom than any other. -The second is the number of faces of the Voronoi cell. This is -equal to the number of nearest neighbors of the central atom, -plus any exterior faces (see note below). If the *peratom* keyword -is set to "no", the per-atom quantities are still calculated, -but they are not accessible. +Two per-atom quantities are calculated by this compute. The first is +the volume of the Voronoi cell around each atom. Any point in an +atom's Voronoi cell is closer to that atom than any other. The second +is the number of faces of the Voronoi cell. This is equal to the +number of nearest neighbors of the central atom, plus any exterior +faces (see note below). ---------- @@ -97,13 +94,13 @@ present in atom_style sphere for granular models. The *edge_histo* keyword activates the compilation of a histogram of number of edges on the faces of the Voronoi cells in the compute -group. The argument *maxedge* of the this keyword is the largest number -of edges on a single Voronoi cell face expected to occur in the -sample. This keyword adds the generation of a global vector with -*maxedge*\ +1 entries. The last entry in the vector contains the number of -faces with more than *maxedge* edges. Since the polygon with the -smallest amount of edges is a triangle, entries 1 and 2 of the vector -will always be zero. +group. The argument *maxedge* of the this keyword is the largest +number of edges on a single Voronoi cell face expected to occur in the +sample. This keyword generates output of a global vector by this +compute with *maxedge*\ +1 entries. The last entry in the vector +contains the number of faces with more than *maxedge* edges. Since the +polygon with the smallest amount of edges is a triangle, entries 1 and +2 of the vector will always be zero. The *edge_threshold* and *face_threshold* keywords allow the suppression of edges below a given minimum length and faces below a @@ -127,8 +124,8 @@ to locate vacancies (the coordinates are given by the atom coordinates at the time step when the compute was first invoked), while column two data can be used to identify interstitial atoms. -If the *neighbors* value is set to yes, then this compute creates a -local array with 3 columns. There is one row for each face of each +If the *neighbors* value is set to yes, then this compute also creates +a local array with 3 columns. There is one row for each face of each Voronoi cell. The 3 columns are the atom ID of the atom that owns the cell, the atom ID of the atom in the neighboring cell (or zero if the face is external), and the area of the face. The array can be @@ -143,8 +140,8 @@ containing all the Voronoi neighbors in a system: compute 6 all voronoi/atom neighbors yes dump d2 all local 1 dump.neighbors index c_6[1] c_6[2] c_6[3] -If the *face_threshold* keyword is used, then only faces -with areas greater than the threshold are stored. +If the *face_threshold* keyword is used, then only faces with areas +greater than the threshold are stored. ---------- @@ -158,48 +155,48 @@ Voro++ software in the src/VORONOI/README file. .. note:: - The calculation of Voronoi volumes is performed by each processor for - the atoms it owns, and includes the effect of ghost atoms stored by - the processor. This assumes that the Voronoi cells of owned atoms - are not affected by atoms beyond the ghost atom cut-off distance. - This is usually a good assumption for liquid and solid systems, but - may lead to underestimation of Voronoi volumes in low density - systems. By default, the set of ghost atoms stored by each processor - is determined by the cutoff used for :doc:`pair_style ` - interactions. The cutoff can be set explicitly via the - :doc:`comm_modify cutoff ` command. The Voronoi cells - for atoms adjacent to empty regions will extend into those regions up - to the communication cutoff in :math:`x`, :math:`y`, or :math:`z`. - In that situation, an exterior face is created at the cutoff distance - normal to the :math:`x`, :math:`y`, or :math:`z` direction. For - triclinic systems, the exterior face is parallel to the corresponding - reciprocal lattice vector. + The calculation of Voronoi volumes is performed by each processor + for the atoms it owns, and includes the effect of ghost atoms + stored by the processor. This assumes that the Voronoi cells of + owned atoms are not affected by atoms beyond the ghost atom cut-off + distance. This is usually a good assumption for liquid and solid + systems, but may lead to underestimation of Voronoi volumes in low + density systems. By default, the set of ghost atoms stored by each + processor is determined by the cutoff used for :doc:`pair_style + ` interactions. The cutoff can be set explicitly via + the :doc:`comm_modify cutoff ` command. The Voronoi + cells for atoms adjacent to empty regions will extend into those + regions up to the communication cutoff in :math:`x`, :math:`y`, or + :math:`z`. In that situation, an exterior face is created at the + cutoff distance normal to the :math:`x`, :math:`y`, or :math:`z` + direction. For triclinic systems, the exterior face is parallel to + the corresponding reciprocal lattice vector. .. note:: - The Voro++ package performs its calculation in 3d. This will - still work for a 2d LAMMPS simulation, provided all the atoms have the - same :math:`z`-coordinate. The Voronoi cell of each atom will be a columnar - polyhedron with constant cross-sectional area along the :math:`z`-direction - and two exterior faces at the top and bottom of the simulation box. If - the atoms do not all have the same :math:`z`-coordinate, then the columnar - cells will be accordingly distorted. The cross-sectional area of each - Voronoi cell can be obtained by dividing its volume by the :math:`z` extent - of the simulation box. Note that you define the :math:`z` extent of the - simulation box for 2d simulations when using the - :doc:`create_box ` or :doc:`read_data ` commands. + The Voro++ package performs its calculation in 3d. This will still + work for a 2d LAMMPS simulation, provided all the atoms have the + same :math:`z`-coordinate. The Voronoi cell of each atom will be a + columnar polyhedron with constant cross-sectional area along the + :math:`z`-direction and two exterior faces at the top and bottom of + the simulation box. If the atoms do not all have the same + :math:`z`-coordinate, then the columnar cells will be accordingly + distorted. The cross-sectional area of each Voronoi cell can be + obtained by dividing its volume by the :math:`z` extent of the + simulation box. Note that you define the :math:`z` extent of the + simulation box for 2d simulations when using the :doc:`create_box + ` or :doc:`read_data ` commands. Output info """"""""""" -By default, this compute calculates a per-atom array with two -columns. In regular dynamic tessellation mode the first column is the -Voronoi volume, the second is the neighbor count, as described above -(read above for the output data in case the *occupation* keyword is -specified). These values can be accessed by any command that uses -per-atom values from a compute as input. See the :doc:`Howto output ` page for an overview of LAMMPS output -options. If the *peratom* keyword is set to "no", the per-atom array -is still created, but it is not accessible. +This compute calculates a per-atom array with two columns. In regular +dynamic tessellation mode the first column is the Voronoi volume, the +second is the neighbor count, as described above (read above for the +output data in case the *occupation* keyword is specified). These +values can be accessed by any command that uses per-atom values from a +compute as input. See the :doc:`Howto output ` page for +an overview of LAMMPS output options. If the *edge_histo* keyword is used, then this compute generates a global vector of length *maxedge*\ +1, containing a histogram of the @@ -209,17 +206,6 @@ If the *neighbors* value is set to *yes*, then this compute calculates a local array with three columns. There is one row for each face of each Voronoi cell. -.. note:: - - Some LAMMPS commands such as the :doc:`compute reduce ` - command can accept either a per-atom or local quantity. If this compute - produces both quantities, the command - may access the per-atom quantity, even if you want to access the local - quantity. This effect can be eliminated by using the *peratom* - keyword to turn off the production of the per-atom quantities. For - the default value *yes* both quantities are produced. For the value - *no*, only the local array is produced. - The Voronoi cell volume will be in distance :doc:`units ` cubed. The Voronoi face area will be in distance :doc:`units ` squared. @@ -227,7 +213,8 @@ Restrictions """""""""""" This compute is part of the VORONOI package. It is only enabled if -LAMMPS was built with that package. See the :doc:`Build package ` page for more info. +LAMMPS was built with that package. See the :doc:`Build package +` page for more info. It also requires you have a copy of the Voro++ library built and installed on your system. See instructions on obtaining and @@ -241,5 +228,4 @@ Related commands Default """"""" -*neighbors* no, *peratom* yes - +The default for the neighobrs keyword is no. diff --git a/src/VORONOI/compute_voronoi_atom.cpp b/src/VORONOI/compute_voronoi_atom.cpp index 28bab271a2..eb4f53986f 100644 --- a/src/VORONOI/compute_voronoi_atom.cpp +++ b/src/VORONOI/compute_voronoi_atom.cpp @@ -111,12 +111,7 @@ ComputeVoronoi::ComputeVoronoi(LAMMPS *lmp, int narg, char **arg) : if (iarg + 2 > narg) error->all(FLERR,"Illegal compute voronoi/atom command"); faces_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; - } else if (strcmp(arg[iarg], "peratom") == 0) { - if (iarg + 2 > narg) error->all(FLERR,"Illegal compute voronoi/atom command"); - peratom_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); - iarg += 2; - } - else error->all(FLERR,"Illegal compute voronoi/atom command"); + } else error->all(FLERR,"Illegal compute voronoi/atom command"); } if (occupation && ( surface!=VOROSURF_NONE || maxedge>0 ) ) @@ -394,27 +389,29 @@ void ComputeVoronoi::checkOccupation() // clear occupation vector memset(occvec, 0, oldnatoms*sizeof(*occvec)); - int i, j, k, - nlocal = atom->nlocal, - nall = atom->nghost + nlocal; - double rx, ry, rz, - **x = atom->x; + int i, j, k; + double rx, ry, rz; + + int nlocal = atom->nlocal; + int nall = atom->nghost + nlocal; + double **x = atom->x; // prepare destination buffer for variable evaluation + if (atom->nmax > lmax) { memory->destroy(lnext); lmax = atom->nmax; memory->create(lnext,lmax,"voronoi/atom:lnext"); } - // clear lroot - for (i=0; ifind_voronoi_cell(x[i][0], x[i][1], x[i][2], rx, ry, rz, k)) || @@ -435,6 +432,7 @@ void ComputeVoronoi::checkOccupation() } // MPI sum occupation + #ifdef NOTINPLACE memcpy(sendocc, occvec, oldnatoms*sizeof(*occvec)); MPI_Allreduce(sendocc, occvec, oldnatoms, MPI_INT, MPI_SUM, world); @@ -443,6 +441,7 @@ void ComputeVoronoi::checkOccupation() #endif // determine the total number of atoms in this atom's currently occupied cell + int c; for (i=0; itag[i]; if (mytag > oldmaxtag) voro[i][0] = 0; @@ -479,6 +479,7 @@ void ComputeVoronoi::checkOccupation() void ComputeVoronoi::loopCells() { // invoke voro++ and fetch results for owned atoms in group + voronoicell_neighbor c; int i; if (faces_flag) nfaces = 0; From 299eda8ca36eb4f1eda63f31bd021dca479a4ba3 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Thu, 17 Aug 2023 16:12:14 -0600 Subject: [PATCH 033/107] have compute_reduce require either peratom or local inputs --- doc/src/compute_reduce.rst | 134 +++++++++++++++++++--------------- doc/src/fix_rigid.rst | 5 +- src/compute_reduce.cpp | 95 +++++++++++++++--------- src/compute_reduce.h | 3 +- src/compute_reduce_region.cpp | 18 +++-- 5 files changed, 149 insertions(+), 106 deletions(-) diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index 204f1c090d..31591d4419 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -37,13 +37,16 @@ Syntax v_name = per-atom vector calculated by an atom-style variable with name * zero or more keyword/args pairs may be appended -* keyword = *replace* +* keyword = *replace* or *inputs* .. parsed-literal:: *replace* args = vec1 vec2 vec1 = reduced value from this input vector will be replaced vec2 = replace it with vec1[N] where N is index of max/min value from vec2 + *inputs* arg = peratom or local + peratom = all inputs are per-atom quantities (default) + local = all input are local quantities Examples """""""" @@ -61,26 +64,30 @@ Description Define a calculation that "reduces" one or more vector inputs into scalar values, one per listed input. The inputs can be per-atom or -local quantities; they cannot be global quantities. Atom attributes -are per-atom quantities, :doc:`computes ` and :doc:`fixes ` -may generate any of the three kinds of quantities, and :doc:`atom-style variables ` generate per-atom quantities. See the -:doc:`variable ` command and its special functions which can -perform the same operations as the compute reduce command on global -vectors. +local quantities and must all be the same kind (per-atom or local); +see discussion of the optional *inputs* keyword below. + +Atom attributes are per-atom quantities, :doc:`computes ` and +:doc:`fixes ` can generate either per-atom or local quantities, +and :doc:`atom-style variables ` generate per-atom +quantities. See the :doc:`variable ` command and its +special functions which can perform the same reduction operations as +the compute reduce command on global vectors. The reduction operation is specified by the *mode* setting. The *sum* option adds the values in the vector into a global total. The *min* or *max* options find the minimum or maximum value across all vector values. The *minabs* or *maxabs* options find the minimum or maximum value across all absolute vector values. The *ave* setting adds the -vector values into a global total, then divides by the number of values -in the vector. The *sumsq* option sums the square of the values in the -vector into a global total. The *avesq* setting does the same as *sumsq*, -then divides the sum of squares by the number of values. The last two options -can be useful for calculating the variance of some quantity (e.g., variance = -sumsq :math:`-` ave\ :math:`^2`). The *sumabs* option sums the absolute -values in the vector into a global total. The *aveabs* setting does the same -as *sumabs*, then divides the sum of absolute values by the number of +vector values into a global total, then divides by the number of +values in the vector. The *sumsq* option sums the square of the +values in the vector into a global total. The *avesq* setting does +the same as *sumsq*, then divides the sum of squares by the number of +values. The last two options can be useful for calculating the +variance of some quantity (e.g., variance = sumsq :math:`-` ave\ +:math:`^2`). The *sumabs* option sums the absolute values in the +vector into a global total. The *aveabs* setting does the same as +*sumabs*, then divides the sum of absolute values by the number of values. Each listed input is operated on independently. For per-atom inputs, @@ -123,52 +130,54 @@ array with six columns: ---------- -The atom attribute values (*x*, *y*, *z*, *vx*, *vy*, *vz*, *fx*, *fy*, and -*fz*) are self-explanatory. Note that other atom attributes can be used as -inputs to this fix by using the -:doc:`compute property/atom ` command and then specifying -an input value from that compute. +The atom attribute values (*x*, *y*, *z*, *vx*, *vy*, *vz*, *fx*, +*fy*, and *fz*) are self-explanatory. Note that other atom attributes +can be used as inputs to this fix by using the :doc:`compute +property/atom ` command and then specifying an +input value from that compute. If a value begins with "c\_", a compute ID must follow which has been -previously defined in the input script. Computes can generate -per-atom or local quantities. See the individual -:doc:`compute ` page for details. If no bracketed integer -is appended, the vector calculated by the compute is used. If a -bracketed integer is appended, the Ith column of the array calculated -by the compute is used. Users can also write code for their own -compute styles and :doc:`add them to LAMMPS `. See the -discussion above for how :math:`I` can be specified with a wildcard asterisk -to effectively specify multiple values. +previously defined in the input script. Valid computes can generate +per-atom or local quantities. See the individual :doc:`compute +` page for details. If no bracketed integer is appended, the +vector calculated by the compute is used. If a bracketed integer is +appended, the Ith column of the array calculated by the compute is +used. Users can also write code for their own compute styles and +:doc:`add them to LAMMPS `. See the discussion above for how +:math:`I` can be specified with a wildcard asterisk to effectively +specify multiple values. If a value begins with "f\_", a fix ID must follow which has been -previously defined in the input script. Fixes can generate per-atom -or local quantities. See the individual :doc:`fix ` page for -details. Note that some fixes only produce their values on certain -timesteps, which must be compatible with when compute reduce +previously defined in the input script. Valid fixes can generate +per-atom or local quantities. See the individual :doc:`fix ` +page for details. Note that some fixes only produce their values on +certain timesteps, which must be compatible with when compute reduce references the values, else an error results. If no bracketed integer is appended, the vector calculated by the fix is used. If a bracketed integer is appended, the Ith column of the array calculated by the fix is used. Users can also write code for their own fix style and :doc:`add them to LAMMPS `. See the discussion above for how -:math:`I` can be specified with a wildcard asterisk to effectively specify -multiple values. +:math:`I` can be specified with a wildcard asterisk to effectively +specify multiple values. If a value begins with "v\_", a variable name must follow which has been previously defined in the input script. It must be an :doc:`atom-style variable `. Atom-style variables can reference thermodynamic keywords and various per-atom attributes, or invoke other computes, fixes, or variables when they are evaluated, so -this is a very general means of generating per-atom quantities to reduce. +this is a very general means of generating per-atom quantities to +reduce. ---------- If the *replace* keyword is used, two indices *vec1* and *vec2* are -specified, where each index ranges from 1 to the number of input values. -The replace keyword can only be used if the *mode* is *min* or *max*\ . -It works as follows. A min/max is computed as usual on the *vec2* -input vector. The index :math:`N` of that value within *vec2* is also stored. -Then, instead of performing a min/max on the *vec1* input vector, the -stored index is used to select the :math:`N`\ th element of the *vec1* vector. +specified, where each index ranges from 1 to the number of input +values. The replace keyword can only be used if the *mode* is *min* +or *max*\ . It works as follows. A min/max is computed as usual on +the *vec2* input vector. The index :math:`N` of that value within +*vec2* is also stored. Then, instead of performing a min/max on the +*vec1* input vector, the stored index is used to select the :math:`N`\ +th element of the *vec1* vector. Thus, for example, if you wish to use this compute to find the bond with maximum stretch, you can do it as follows: @@ -190,6 +199,14 @@ information in this context, the *replace* keywords will extract the atom IDs for the two atoms in the bond of maximum stretch. These atom IDs and the bond stretch will be printed with thermodynamic output. +The *inputs* keyword allows selection of whether all the inputs are +per-atom or local quantities. As noted above, all the inputs must be +the same kind (per-atom or local). Per-atom is the default setting. +If a compute or fix is specified as an input, it must produce per-atom +or local data to match this setting. If it produces both, e.g. for +the :doc:`compute voronoi/atom ` command, then +this keyword selects between them. + ---------- If a single input is specified this compute produces a global scalar @@ -197,34 +214,35 @@ value. If multiple inputs are specified, this compute produces a global vector of values, the length of which is equal to the number of inputs specified. -As discussed below, for the *sum*, *sumabs*, and *sumsq* modes, the value(s) -produced by this compute are all "extensive", meaning their value -scales linearly with the number of atoms involved. If normalized -values are desired, this compute can be accessed by the +As discussed below, for the *sum*, *sumabs*, and *sumsq* modes, the +value(s) produced by this compute are all "extensive", meaning their +value scales linearly with the number of atoms involved. If +normalized values are desired, this compute can be accessed by the :doc:`thermo_style custom ` command with -:doc:`thermo_modify norm yes ` set as an option. -Or it can be accessed by a -:doc:`variable ` that divides by the appropriate atom count. +:doc:`thermo_modify norm yes ` set as an option. Or it +can be accessed by a :doc:`variable ` that divides by the +appropriate atom count. ---------- Output info """"""""""" -This compute calculates a global scalar if a single input value is specified -or a global vector of length :math:`N`, where :math:`N` is the number of -inputs, and which can be accessed by indices 1 to :math:`N`. These values can -be used by any command that uses global scalar or vector values from a -compute as input. See the :doc:`Howto output ` doc page -for an overview of LAMMPS output options. +This compute calculates a global scalar if a single input value is +specified or a global vector of length :math:`N`, where :math:`N` is +the number of inputs, and which can be accessed by indices 1 to +:math:`N`. These values can be used by any command that uses global +scalar or vector values from a compute as input. See the :doc:`Howto +output ` doc page for an overview of LAMMPS output +options. All the scalar or vector values calculated by this compute are "intensive", except when the *sum*, *sumabs*, or *sumsq* modes are used on per-atom or local vectors, in which case the calculated values are "extensive". -The scalar or vector values will be in whatever :doc:`units ` the -quantities being reduced are in. +The scalar or vector values will be in whatever :doc:`units ` +the quantities being reduced are in. Restrictions """""""""""" @@ -238,4 +256,4 @@ Related commands Default """"""" -none +The default value for the *inputs* keyword is peratom. diff --git a/doc/src/fix_rigid.rst b/doc/src/fix_rigid.rst index 89759da817..a50e215681 100644 --- a/doc/src/fix_rigid.rst +++ b/doc/src/fix_rigid.rst @@ -843,7 +843,7 @@ stress/atom ` commands. The former can be accessed by :doc:`thermodynamic output `. The default setting for this fix is :doc:`fix_modify virial yes `. -All of the *rigid* styles (not the *rigid/small* styles) compute a +All of the *rigid* styles (but not the *rigid/small* styles) compute a global array of values which can be accessed by various :doc:`output commands `. Similar information about the bodies defined by the *rigid/small* styles can be accessed via the @@ -887,7 +887,8 @@ Restrictions """""""""""" These fixes are all part of the RIGID package. It is only enabled if -LAMMPS was built with that package. See the :doc:`Build package ` page for more info. +LAMMPS was built with that package. See the :doc:`Build package +` page for more info. Assigning a temperature via the :doc:`velocity create ` command to a system with :doc:`rigid bodies ` may not have diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 6b27498eb7..8565ddb1c9 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -31,12 +31,16 @@ using namespace LAMMPS_NS; +enum{UNDECIDED,PERATOM,LOCAL}; // same as in ComputeReduceRegion + #define BIG 1.0e20 //---------------------------------------------------------------- + void abs_max(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/) { // r is the already reduced value, n is the new value + double n = std::fabs(*(double *) in), r = *(double *) inout; double m; @@ -47,9 +51,11 @@ void abs_max(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/) } *(double *) inout = m; } + void abs_min(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/) { // r is the already reduced value, n is the new value + double n = std::fabs(*(double *) in), r = *(double *) inout; double m; @@ -68,6 +74,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : owner(nullptr), idregion(nullptr), region(nullptr), varatom(nullptr) { int iarg = 0; + if (strcmp(style, "reduce") == 0) { if (narg < 5) utils::missing_cmd_args(FLERR, "compute reduce", error); iarg = 3; @@ -128,42 +135,52 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : // parse values + input_mode = UNDECIDED; + values.clear(); nvalues = 0; for (int iarg = 0; iarg < nargnew; ++iarg) { value_t val; val.id = ""; - val.flavor = 0; val.val.c = nullptr; if (strcmp(arg[iarg], "x") == 0) { + input_mode = PERATOM; val.which = ArgInfo::X; val.argindex = 0; } else if (strcmp(arg[iarg], "y") == 0) { + input_mode = PERATOM; val.which = ArgInfo::X; val.argindex = 1; } else if (strcmp(arg[iarg], "z") == 0) { + input_mode = PERATOM; val.which = ArgInfo::X; val.argindex = 2; } else if (strcmp(arg[iarg], "vx") == 0) { + input_mode = PERATOM; val.which = ArgInfo::V; val.argindex = 0; } else if (strcmp(arg[iarg], "vy") == 0) { + input_mode = PERATOM; val.which = ArgInfo::V; val.argindex = 1; } else if (strcmp(arg[iarg], "vz") == 0) { + input_mode = PERATOM; val.which = ArgInfo::V; val.argindex = 2; } else if (strcmp(arg[iarg], "fx") == 0) { + input_mode = PERATOM; val.which = ArgInfo::F; val.argindex = 0; } else if (strcmp(arg[iarg], "fy") == 0) { + input_mode = PERATOM; val.which = ArgInfo::F; val.argindex = 1; } else if (strcmp(arg[iarg], "fz") == 0) { + input_mode = PERATOM; val.which = ArgInfo::F; val.argindex = 2; @@ -207,6 +224,14 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR, "Compute {} replace column already used for another replacement"); replace[col1] = col2; iarg += 2; + } else if (strcmp(arg[iarg], "inputs") == 0) { + if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, mycmd + " inputs", error); + if (strcmp(arg[iarg+1], "peratom") == 0) input_mode = PERATOM; + else if (strcmp(arg[iarg+1], "local") == 0) { + if (input_mode == PERATOM) + error->all(FLERR,"Compute {} inputs must be all peratom or all local"); + input_mode = LOCAL; + } } else error->all(FLERR, "Unknown compute {} keyword: {}", style, arg[iarg]); } @@ -231,66 +256,64 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : // setup and error check for (auto &val : values) { - if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F) - val.flavor = PERATOM; - - else if (val.which == ArgInfo::COMPUTE) { + if (val.which == ArgInfo::COMPUTE) { val.val.c = modify->get_compute_by_id(val.id); if (!val.val.c) error->all(FLERR, "Compute ID {} for compute {} does not exist", val.id, style); - if (val.val.c->peratom_flag) { - val.flavor = PERATOM; + + if (input_mode == PERATOM) { + if (!val.val.c->peratom_flag) + error->all(FLERR, "Compute {} compute {} does not calculate per-atom values", style, val.id); if (val.argindex == 0 && val.val.c->size_peratom_cols != 0) - error->all(FLERR, "Compute {} compute {} does not calculate a per-atom vector", style, - val.id); + error->all(FLERR, "Compute {} compute {} does not calculate a per-atom vector", style, val.id); if (val.argindex && val.val.c->size_peratom_cols == 0) - error->all(FLERR, "Compute {} compute {} does not calculate a per-atom array", style, - val.id); + error->all(FLERR, "Compute {} compute {} does not calculate a per-atom array", style, val.id); if (val.argindex && val.argindex > val.val.c->size_peratom_cols) error->all(FLERR, "Compute {} compute {} array is accessed out-of-range", style, val.id); - } else if (val.val.c->local_flag) { - val.flavor = LOCAL; + + } else if (input_mode == LOCAL) { + if (!val.val.c->peratom_flag) + error->all(FLERR, "Compute {} compute {} does not calculate local values", style, val.id); if (val.argindex == 0 && val.val.c->size_local_cols != 0) - error->all(FLERR, "Compute {} compute {} does not calculate a local vector", style, - val.id); + error->all(FLERR, "Compute {} compute {} does not calculate a local vector", style, val.id); if (val.argindex && val.val.c->size_local_cols == 0) - error->all(FLERR, "Compute {} compute {} does not calculate a local array", style, - val.id); + error->all(FLERR, "Compute {} compute {} does not calculate a local array", style, val.id); if (val.argindex && val.argindex > val.val.c->size_local_cols) error->all(FLERR, "Compute {} compute {} array is accessed out-of-range", style, val.id); - } else - error->all(FLERR, "Compute {} compute {} calculates global values", style, val.id); + } } else if (val.which == ArgInfo::FIX) { val.val.f = modify->get_fix_by_id(val.id); if (!val.val.f) error->all(FLERR, "Fix ID {} for compute {} does not exist", val.id, style); - if (val.val.f->peratom_flag) { - val.flavor = PERATOM; + + if (input_mode == PERATOM) { + if (!val.val.f->peratom_flag) + error->all(FLERR, "Compute {} fix {} does not calculate per-atom values", style, val.id); if (val.argindex == 0 && (val.val.f->size_peratom_cols != 0)) - error->all(FLERR, "Compute {} fix {} does not calculate a per-atom vector", style, - val.id); + error->all(FLERR, "Compute {} fix {} does not calculate a per-atom vector", style, val.id); if (val.argindex && (val.val.f->size_peratom_cols == 0)) error->all(FLERR, "Compute {} fix {} does not calculate a per-atom array", style, val.id); if (val.argindex && (val.argindex > val.val.f->size_peratom_cols)) error->all(FLERR, "Compute {} fix {} array is accessed out-of-range", style, val.id); - } else if (val.val.f->local_flag) { - val.flavor = LOCAL; + + } else if (input_mode == LOCAL) { + if (!val.val.f->local_flag) + error->all(FLERR, "Compute {} fix {} does not calculate local values", style, val.id); if (val.argindex == 0 && (val.val.f->size_local_cols != 0)) error->all(FLERR, "Compute {} fix {} does not calculate a local vector", style, val.id); if (val.argindex && (val.val.f->size_local_cols == 0)) error->all(FLERR, "Compute {} fix {} does not calculate a local array", style, val.id); if (val.argindex && (val.argindex > val.val.f->size_local_cols)) error->all(FLERR, "Compute {} fix {} array is accessed out-of-range", style, val.id); - } else - error->all(FLERR, "Compute {} fix {} calculates global values", style, val.id); + } } else if (val.which == ArgInfo::VARIABLE) { + if (input_mode == LOCAL) error->all(FLERR,"Compute {} inputs must be all local"); val.val.v = input->variable->find(val.id.c_str()); if (val.val.v < 0) error->all(FLERR, "Variable name {} for compute {} does not exist", val.id, style); if (input->variable->atomstyle(val.val.v) == 0) error->all(FLERR, "Compute {} variable {} is not atom-style variable", style, val.id); - val.flavor = PERATOM; } } @@ -512,7 +535,7 @@ double ComputeReduce::compute_one(int m, int flag) } else if (val.which == ArgInfo::COMPUTE) { - if (val.flavor == PERATOM) { + if (input_mode == PERATOM) { if (!(val.val.c->invoked_flag & Compute::INVOKED_PERATOM)) { val.val.c->compute_peratom(); val.val.c->invoked_flag |= Compute::INVOKED_PERATOM; @@ -537,7 +560,7 @@ double ComputeReduce::compute_one(int m, int flag) one = carray_atom[flag][aidxm1]; } - } else if (val.flavor == LOCAL) { + } else if (input_mode == LOCAL) { if (!(val.val.c->invoked_flag & Compute::INVOKED_LOCAL)) { val.val.c->compute_local(); val.val.c->invoked_flag |= Compute::INVOKED_LOCAL; @@ -567,7 +590,7 @@ double ComputeReduce::compute_one(int m, int flag) if (update->ntimestep % val.val.f->peratom_freq) error->all(FLERR, "Fix {} used in compute {} not computed at compatible time", val.id, style); - if (val.flavor == PERATOM) { + if (input_mode == PERATOM) { if (aidx == 0) { double *fix_vector = val.val.f->vector_atom; if (flag < 0) { @@ -585,7 +608,7 @@ double ComputeReduce::compute_one(int m, int flag) one = fix_array[flag][aidxm1]; } - } else if (val.flavor == LOCAL) { + } else if (input_mode == LOCAL) { if (aidx == 0) { double *fix_vector = val.val.f->vector_local; int n = val.val.f->size_local_rows; @@ -632,18 +655,18 @@ bigint ComputeReduce::count(int m) if ((val.which == ArgInfo::X) || (val.which == ArgInfo::V) || (val.which == ArgInfo::F)) return group->count(igroup); else if (val.which == ArgInfo::COMPUTE) { - if (val.flavor == PERATOM) { + if (input_mode == PERATOM) { return group->count(igroup); - } else if (val.flavor == LOCAL) { + } else if (input_mode == LOCAL) { bigint ncount = val.val.c->size_local_rows; bigint ncountall; MPI_Allreduce(&ncount, &ncountall, 1, MPI_LMP_BIGINT, MPI_SUM, world); return ncountall; } } else if (val.which == ArgInfo::FIX) { - if (val.flavor == PERATOM) { + if (input_mode == PERATOM) { return group->count(igroup); - } else if (val.flavor == LOCAL) { + } else if (input_mode == LOCAL) { bigint ncount = val.val.f->size_local_rows; bigint ncountall; MPI_Allreduce(&ncount, &ncountall, 1, MPI_LMP_BIGINT, MPI_SUM, world); diff --git a/src/compute_reduce.h b/src/compute_reduce.h index f8f73cb17a..f8b652e00c 100644 --- a/src/compute_reduce.h +++ b/src/compute_reduce.h @@ -37,12 +37,11 @@ class ComputeReduce : public Compute { double memory_usage() override; protected: - int mode, nvalues; + int mode, nvalues, input_mode; struct value_t { int which; int argindex; std::string id; - int flavor; union { class Compute *c; class Fix *f; diff --git a/src/compute_reduce_region.cpp b/src/compute_reduce_region.cpp index efce00ff66..2f5a3de675 100644 --- a/src/compute_reduce_region.cpp +++ b/src/compute_reduce_region.cpp @@ -26,6 +26,8 @@ using namespace LAMMPS_NS; +enum{UNDECIDED,PERATOM,LOCAL}; // same as in ComputeReduce + static constexpr double BIG = 1.0e20; /* ---------------------------------------------------------------------- */ @@ -97,7 +99,7 @@ double ComputeReduceRegion::compute_one(int m, int flag) // invoke compute if not previously invoked } else if (val.which == ArgInfo::COMPUTE) { - if (val.flavor == PERATOM) { + if (input_mode == PERATOM) { if (!(val.val.c->invoked_flag & Compute::INVOKED_PERATOM)) { val.val.c->compute_peratom(); val.val.c->invoked_flag |= Compute::INVOKED_PERATOM; @@ -122,7 +124,7 @@ double ComputeReduceRegion::compute_one(int m, int flag) one = compute_array[flag][aidxm1]; } - } else if (val.flavor == LOCAL) { + } else if (input_mode == LOCAL) { if (!(val.val.c->invoked_flag & Compute::INVOKED_LOCAL)) { val.val.c->compute_local(); val.val.c->invoked_flag |= Compute::INVOKED_LOCAL; @@ -151,7 +153,7 @@ double ComputeReduceRegion::compute_one(int m, int flag) if (update->ntimestep % val.val.f->peratom_freq) error->all(FLERR, "Fix {} used in compute {} not computed at compatible time", val.id, style); - if (val.flavor == PERATOM) { + if (input_mode == PERATOM) { if (aidx == 0) { double *fix_vector = val.val.f->vector_atom; if (flag < 0) { @@ -171,7 +173,7 @@ double ComputeReduceRegion::compute_one(int m, int flag) one = fix_array[flag][aidxm1]; } - } else if (val.flavor == LOCAL) { + } else if (input_mode == LOCAL) { if (aidx == 0) { double *fix_vector = val.val.f->vector_local; if (flag < 0) @@ -219,18 +221,18 @@ bigint ComputeReduceRegion::count(int m) if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F) return group->count(igroup, region); else if (val.which == ArgInfo::COMPUTE) { - if (val.flavor == PERATOM) { + if (input_mode == PERATOM) { return group->count(igroup, region); - } else if (val.flavor == LOCAL) { + } else if (input_mode == LOCAL) { bigint ncount = val.val.c->size_local_rows; bigint ncountall; MPI_Allreduce(&ncount, &ncountall, 1, MPI_DOUBLE, MPI_SUM, world); return ncountall; } } else if (val.which == ArgInfo::FIX) { - if (val.flavor == PERATOM) { + if (input_mode == PERATOM) { return group->count(igroup, region); - } else if (val.flavor == LOCAL) { + } else if (input_mode == LOCAL) { bigint ncount = val.val.f->size_local_rows; bigint ncountall; MPI_Allreduce(&ncount, &ncountall, 1, MPI_DOUBLE, MPI_SUM, world); From f2901827e6cc74bdacfe8cfb06d1ba62322ed007 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Thu, 17 Aug 2023 17:25:27 -0600 Subject: [PATCH 034/107] updates to variable doc page to clarify compute/fix options --- doc/src/compute.rst | 4 +- doc/src/thermo_style.rst | 53 +++++----- doc/src/variable.rst | 218 +++++++++++++++++++++------------------ 3 files changed, 148 insertions(+), 127 deletions(-) diff --git a/doc/src/compute.rst b/doc/src/compute.rst index 317efa4f41..226dc6373b 100644 --- a/doc/src/compute.rst +++ b/doc/src/compute.rst @@ -43,8 +43,8 @@ underscores. ---------- -Computes calculate one or more of four styles of quantities: global, -per-atom, local, or per-atom. A global quantity is one or more +Computes calculate and store any of four styles of quantities: global, +per-atom, local, or per-grid. A global quantity is one or more system-wide values, e.g. the temperature of the system. A per-atom quantity is one or more values per atom, e.g. the kinetic energy of each atom. Per-atom values are set to 0.0 for atoms not in the diff --git a/doc/src/thermo_style.rst b/doc/src/thermo_style.rst index 63ad59e553..c3c607a479 100644 --- a/doc/src/thermo_style.rst +++ b/doc/src/thermo_style.rst @@ -385,19 +385,20 @@ creates a global vector with 6 values. The *c_ID* and *c_ID[I]* and *c_ID[I][J]* keywords allow global values calculated by a compute to be output. As discussed on the :doc:`compute ` doc page, computes can calculate global, -per-atom, or local values. Only global values can be referenced by -this command. However, per-atom compute values for an individual atom -can be referenced in a :doc:`variable ` and the variable -referenced by thermo_style custom, as discussed below. See the -discussion above for how the I in *c_ID[I]* can be specified with a -wildcard asterisk to effectively specify multiple values from a global -compute vector. +per-atom, local, and per-grid values. Only global values can be +referenced by this command. However, per-atom compute values for an +individual atom can be referenced in a :doc:`equal-style variable +` and the variable referenced by thermo_style custom, as +discussed below. See the discussion above for how the I in *c_ID[I]* +can be specified with a wildcard asterisk to effectively specify +multiple values from a global compute vector. The ID in the keyword should be replaced by the actual ID of a compute that has been defined elsewhere in the input script. See the -:doc:`compute ` command for details. If the compute calculates -a global scalar, vector, or array, then the keyword formats with 0, 1, -or 2 brackets will reference a scalar value from the compute. +:doc:`compute ` command for details. If the compute +calculates a global scalar, vector, or array, then the keyword formats +with 0, 1, or 2 brackets will reference a scalar value from the +compute. Note that some computes calculate "intensive" global quantities like temperature; others calculate "extensive" global quantities like @@ -410,13 +411,14 @@ norm ` option being used. The *f_ID* and *f_ID[I]* and *f_ID[I][J]* keywords allow global values calculated by a fix to be output. As discussed on the :doc:`fix -` doc page, fixes can calculate global, per-atom, or local -values. Only global values can be referenced by this command. -However, per-atom fix values can be referenced for an individual atom -in a :doc:`variable ` and the variable referenced by -thermo_style custom, as discussed below. See the discussion above for -how the I in *f_ID[I]* can be specified with a wildcard asterisk to -effectively specify multiple values from a global fix vector. +` doc page, fixes can calculate global, per-atom, local, and +per-grid values. Only global values can be referenced by this +command. However, per-atom fix values can be referenced for an +individual atom in a :doc:`equal-style variable ` and the +variable referenced by thermo_style custom, as discussed below. See +the discussion above for how the I in *f_ID[I]* can be specified with +a wildcard asterisk to effectively specify multiple values from a +global fix vector. The ID in the keyword should be replaced by the actual ID of a fix that has been defined elsewhere in the input script. See the @@ -438,14 +440,15 @@ output. The name in the keyword should be replaced by the variable name that has been defined elsewhere in the input script. Only equal-style and vector-style variables can be referenced; the latter requires a bracketed term to specify the Ith element of the vector -calculated by the variable. However, an atom-style variable can be -referenced for an individual atom by an equal-style variable and that -variable referenced. See the :doc:`variable ` command for -details. Variables of style *equal* and *vector* and *atom* define a -formula which can reference per-atom properties or thermodynamic -keywords, or they can invoke other computes, fixes, or variables when -evaluated, so this is a very general means of creating thermodynamic -output. +calculated by the variable. However, an equal-style variable can use +an atom-style variable in its formula indexed by the ID of an +individual atom. This is a way to output a speciic atom's per-atom +coordinates or other per-atom properties in thermo output. See the +:doc:`variable ` command for details. Note that variables +of style *equal* and *vector* and *atom* define a formula which can +reference per-atom properties or thermodynamic keywords, or they can +invoke other computes, fixes, or variables when evaluated, so this is +a very general means of creating thermodynamic output. Note that equal-style and vector-style variables are assumed to produce "intensive" global quantities, which are thus printed as-is, diff --git a/doc/src/variable.rst b/doc/src/variable.rst index 28c0d29799..38e423b632 100644 --- a/doc/src/variable.rst +++ b/doc/src/variable.rst @@ -550,12 +550,11 @@ variables. Most of the formula elements produce a scalar value. Some produce a global or per-atom vector of values. Global vectors can be produced by computes or fixes or by other vector-style variables. Per-atom -vectors are produced by atom vectors, compute references that -represent a per-atom vector, fix references that represent a per-atom -vector, and variables that are atom-style variables. Math functions -that operate on scalar values produce a scalar value; math function -that operate on global or per-atom vectors do so element-by-element -and produce a global or per-atom vector. +vectors are produced by atom vectors, computes or fixes which output a +per-atom vector or array, and variables that are atom-style variables. +Math functions that operate on scalar values produce a scalar value; +math function that operate on global or per-atom vectors do so +element-by-element and produce a global or per-atom vector. A formula for equal-style variables cannot use any formula element that produces a global or per-atom vector. A formula for a @@ -564,12 +563,13 @@ scalar value or a global vector value, but cannot use a formula element that produces a per-atom vector. A formula for an atom-style variable can use formula elements that produce either a scalar value or a per-atom vector, but not one that produces a global vector. + Atom-style variables are evaluated by other commands that define a -:doc:`group ` on which they operate, e.g. a :doc:`dump ` or -:doc:`compute ` or :doc:`fix ` command. When they invoke -the atom-style variable, only atoms in the group are included in the -formula evaluation. The variable evaluates to 0.0 for atoms not in -the group. +:doc:`group ` on which they operate, e.g. a :doc:`dump ` +or :doc:`compute ` or :doc:`fix ` command. When they +invoke the atom-style variable, only atoms in the group are included +in the formula evaluation. The variable evaluates to 0.0 for atoms +not in the group. ---------- @@ -1138,69 +1138,74 @@ only defined if an :doc:`atom_style ` is being used that defines molecule IDs. Note that many other atom attributes can be used as inputs to a -variable by using the :doc:`compute property/atom ` command and then specifying -a quantity from that compute. +variable by using the :doc:`compute property/atom +` command and then specifying a quantity from +that compute. ---------- Compute References ------------------ -Compute references access quantities calculated by a -:doc:`compute `. The ID in the reference should be replaced by -the ID of a compute defined elsewhere in the input script. As -discussed in the page for the :doc:`compute ` command, -computes can produce global, per-atom, or local values. Only global -and per-atom values can be used in a variable. Computes can also -produce a scalar, vector, or array. +Compute references access quantities calculated by a :doc:`compute +`. The ID in the reference should be replaced by the ID of a +compute defined elsewhere in the input script. -An equal-style variable can only use scalar values, which means a -global scalar, or an element of a global or per-atom vector or array. -A vector-style variable can use scalar values or a global vector of -values, or a column of a global array of values. Atom-style variables -can use global scalar values. They can also use per-atom vector -values, or a column of a per-atom array. See the doc pages for -individual computes to see what kind of values they produce. +As discussed on the page for the :doc:`compute ` command, +computes can produce global, per-atom, local, and per-grid values. +Only global and per-atom values can be used in a variable. Computes +can also produce scalars (global only), vectors, and arrays. See the +doc pages for individual computes to see what different kinds of data +they produce. -Examples of different kinds of compute references are as follows. -There is typically no ambiguity (see exception below) as to what a -reference means, since computes only produce either global or per-atom -quantities, never both. +An equal-style variable can only use scalar values, either from global +or per-atom data. In the case of per-atom data, this would be a value +for a specific atom. -+-------------+-------------------------------------------------------------------------------------------------------+ -| c_ID | global scalar, or per-atom vector | -+-------------+-------------------------------------------------------------------------------------------------------+ -| c_ID[I] | Ith element of global vector, or atom I's value in per-atom vector, or Ith column from per-atom array | -+-------------+-------------------------------------------------------------------------------------------------------+ -| c_ID[I][J] | I,J element of global array, or atom I's Jth value in per-atom array | -+-------------+-------------------------------------------------------------------------------------------------------+ +A vector-style variable can use scalar values (same as for equal-style +variables), or global vectors of values. The latter can also be a +column of a global array. -For I and J indices, integers can be specified or a variable name, -specified as v_name, where name is the name of the variable. The -rules for this syntax are the same as for the "Atom Values and -Vectors" discussion above. +Atom-style variables can use scalar values (same as for equal-style +varaibles), or per-atom vectors of values. The latter can also be a +column of a per-atom array. -One source of ambiguity for compute references is when a vector-style -variable refers to a compute that produces both a global scalar and a -global vector. Consider a compute with ID "foo" that does this, -referenced as follows by variable "a", where "myVec" is another -vector-style variable: +The various allowed compute references in the variable formulas for +equal-, vector-, and atom-style variables are listed in the following +table: -.. code-block:: LAMMPS ++--------+------------+--------------------------------------------+ +| equal | c_ID | global scalar | +| equal | c_ID[I] | element of global vector | +| equal | c_ID[I][J] | element of global array | +| equal | C_ID[I] | element of per-atom vector, I = ID of atom | +| equal | C_ID{i}[J] | element of per-atom array, I = ID of atom | ++--------+------------+--------------------------------------------| +| vector | c_ID | global vector | +| vector | c_ID[I] | column of global array | +---------+------------+--------------------------------------------+ +| atom | c_ID | per-atom vector | +| atom | c_ID[I] | column of per-atom array | ++--------+------------+--------------------------------------------+ - variable a vector c_foo*v_myVec +Note that if an equal-style variable formula wishes to access per-atom +data from a compute, it must use capital "C" as the ID prefix and not +lower-case "c". -The reference "c_foo" could refer to either the global scalar or -global vector produced by compute "foo". In this case, "c_foo" will -always refer to the global scalar, and "C_foo" can be used to -reference the global vector. Similarly if the compute produces both a -global vector and global array, then "c_foo[I]" will always refer to -an element of the global vector, and "C_foo[I]" can be used to -reference the Ith column of the global array. +Also note that if a vector- or atom-style variable formula needs to +access a scalar value from a compute (i.e. the 5 kinds of values in +the first 5 lines of the table), it can not do so directly. Instead, +it can use a reference to an equal-style variable which stores the +scalar value from the compute. -Note that if a variable containing a compute is evaluated directly in -an input script (not during a run), then the values accessed by the -compute must be current. See the discussion below about "Variable +The I and J indices in these compute references can be integers or can +be a variable name, specified as v_name, where name is the name of the +variable. The rules for this syntax are the same as for indices in +the "Atom Values and Vectors" discussion above. + +If a variable containing a compute is evaluated directly in an input +script (not during a run), then the values accessed by the compute +should be current. See the discussion below about "Variable Accuracy". ---------- @@ -1208,51 +1213,60 @@ Accuracy". Fix References -------------- -Fix references access quantities calculated by a :doc:`fix `. +Fix references access quantities calculated by a :doc:`fix `. The ID in the reference should be replaced by the ID of a fix defined -elsewhere in the input script. As discussed in the page for the -:doc:`fix ` command, fixes can produce global, per-atom, or local -values. Only global and per-atom values can be used in a variable. -Fixes can also produce a scalar, vector, or array. An equal-style -variable can only use scalar values, which means a global scalar, or -an element of a global or per-atom vector or array. Atom-style -variables can use the same scalar values. They can also use per-atom -vector values. A vector value can be a per-atom vector itself, or a -column of an per-atom array. See the doc pages for individual fixes -to see what kind of values they produce. +elsewhere in the input script. -The different kinds of fix references are exactly the same as the -compute references listed in the above table, where "c\_" is replaced -by "f\_". Again, there is typically no ambiguity (see exception below) -as to what a reference means, since fixes only produce either global -or per-atom quantities, never both. +As discussed on the page for the :doc:`fix ` command, fixes can +produce global, per-atom, local, and per-grid values. Only global and +per-atom values can be used in a variable. Fixes can also produce +scalars (global only), vectors, and arrays. See the doc pages for +individual fixes to see what different kinds of data they produce. -+-------------+-------------------------------------------------------------------------------------------------------+ -| f_ID | global scalar, or per-atom vector | -+-------------+-------------------------------------------------------------------------------------------------------+ -| f_ID[I] | Ith element of global vector, or atom I's value in per-atom vector, or Ith column from per-atom array | -+-------------+-------------------------------------------------------------------------------------------------------+ -| f_ID[I][J] | I,J element of global array, or atom I's Jth value in per-atom array | -+-------------+-------------------------------------------------------------------------------------------------------+ +An equal-style variable can only use scalar values, either from global +or per-atom data. In the case of per-atom data, this would be a value +for a specific atom. -For I and J indices, integers can be specified or a variable name, -specified as v_name, where name is the name of the variable. The -rules for this syntax are the same as for the "Atom Values and -Vectors" discussion above. +A vector-style variable can use scalar values (same as for equal-style +variables), or global vectors of values. The latter can also be a +column of a global array. -One source of ambiguity for fix references is the same ambiguity -discussed for compute references above. Namely when a vector-style -variable refers to a fix that produces both a global scalar and a -global vector. The solution is the same as for compute references. -For a fix with ID "foo", "f_foo" will always refer to the global -scalar, and "F_foo" can be used to reference the global vector. And -similarly for distinguishing between a fix's global vector versus -global array with "f_foo[I]" versus "F_foo[I]". +Atom-style variables can use scalar values (same as for equal-style +varaibles), or per-atom vectors of values. The latter can also be a +column of a per-atom array. -Note that if a variable containing a fix is evaluated directly in an -input script (not during a run), then the values accessed by the fix -should be current. See the discussion below about "Variable -Accuracy". +The various allowed fix references in the variable formulas for +equal-, vector-, and atom-style variables are listed in the following +table: + ++--------+------------+--------------------------------------------+ +| equal | f_ID | global scalar | +| equal | f_ID[I] | element of global vector | +| equal | f_ID[I][J] | element of global array | +| equal | F_ID[I] | element of per-atom vector, I = ID of atom | +| equal | F_ID{i}[J] | element of per-atom array, I = ID of atom | ++--------+------------+--------------------------------------------| +| vector | f_ID | global vector | +| vector | f_ID[I] | column of global array | +---------+------------+--------------------------------------------+ +| atom | f_ID | per-atom vector | +| atom | f_ID[I] | column of per-atom array | ++--------+------------+--------------------------------------------+ + +Note that if an equal-style variable formula wishes to access per-atom +data from a fix, it must use capital "F" as the ID prefix and not +lower-case "f". + +Also note that if a vector- or atom-style variable formula needs to +access a scalar value from a fix (i.e. the 5 kinds of values in the +first 5 lines of the table), it can not do so directly. Instead, it +can use a reference to an equal-style variable which stores the scalar +value from the fix. + +The I and J indices in these fix references can be integers or can be +a variable name, specified as v_name, where name is the name of the +variable. The rules for this syntax are the same as for indices in +the "Atom Values and Vectors" discussion above. Note that some fixes only generate quantities on certain timesteps. If a variable attempts to access the fix on non-allowed timesteps, an @@ -1260,6 +1274,10 @@ error is generated. For example, the :doc:`fix ave/time ` command may only generate averaged quantities every 100 steps. See the doc pages for individual fix commands for details. +If a variable containing a fix is evaluated directly in an input +script (not during a run), then the values accessed by the fix should +be current. See the discussion below about "Variable Accuracy". + ---------- Variable References @@ -1312,8 +1330,8 @@ produce only a global scalar or global vector or per-atom vector. For the I index, an integer can be specified or a variable name, specified as v_name, where name is the name of the variable. The -rules for this syntax are the same as for the "Atom Values and -Vectors" discussion above. +rules for this syntax are the same as for indices in the "Atom Values +and Vectors" discussion above. ---------- From 95e9e6549f6a2658e7f361c4e40616478f274856 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 18 Aug 2023 09:28:58 -0600 Subject: [PATCH 035/107] simply variable.cpp --- src/variable.cpp | 347 ++++++++++++++++++++++++----------------------- 1 file changed, 180 insertions(+), 167 deletions(-) diff --git a/src/variable.cpp b/src/variable.cpp index cf2e5c3b6f..5013f3ce55 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1469,8 +1469,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) if (domain->box_exist == 0) print_var_error(FLERR,"Variable evaluation before simulation box is defined",ivar); - // uppercase used to force access of - // global vector vs global scalar, and global array vs global vector + // uppercase used to access of peratom data by equal-style var int lowercase = 1; if (word[0] == 'C') lowercase = 0; @@ -1479,7 +1478,6 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) if (!compute) print_var_error(FLERR,fmt::format("Invalid compute ID '{}' in variable formula", word+2),ivar); - // parse zero or one or two trailing brackets // point i beyond last bracket // nbracket = # of bracket pairs @@ -1501,107 +1499,203 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) } } - // c_ID = scalar from global scalar, must be lowercase + // equal-style variable is being evaluated - if (nbracket == 0 && compute->scalar_flag && lowercase) { + if (style[ivar] == EQUAL) { + + // c_ID = scalar from global scalar - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_SCALAR)) { - compute->compute_scalar(); - compute->invoked_flag |= Compute::INVOKED_SCALAR; - } + if (lowercase && nbracket == 0) { - value1 = compute->scalar; - if (tree) { - auto newtree = new Tree(); - newtree->type = VALUE; - newtree->value = value1; - treestack[ntreestack++] = newtree; - } else argstack[nargstack++] = value1; + if (!compute->scalar_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); - // c_ID[i] = scalar from global vector, must be lowercase + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_SCALAR)) { + compute->compute_scalar(); + compute->invoked_flag |= Compute::INVOKED_SCALAR; + } - } else if (nbracket == 1 && compute->vector_flag && lowercase) { + value1 = compute->scalar; + argstack[nargstack++] = value1; - if (index1 > compute->size_vector && - compute->size_vector_variable == 0) - print_var_error(FLERR,"Variable formula compute vector is accessed out-of-range",ivar,0); - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) { - compute->compute_vector(); - compute->invoked_flag |= Compute::INVOKED_VECTOR; - } + // c_ID[i] = scalar from global vector + + } else if (lowercase && nbracket == 1) { + + if (!compute->vector_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (index1 > compute->size_vector && + compute->size_vector_variable == 0) + print_var_error(FLERR,"Variable formula compute vector is accessed out-of-range",ivar,0); + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) { + compute->compute_vector(); + compute->invoked_flag |= Compute::INVOKED_VECTOR; + } if (compute->size_vector_variable && index1 > compute->size_vector) value1 = 0.0; else value1 = compute->vector[index1-1]; - if (tree) { + argstack[nargstack++] = value1; + + // c_ID[i][j] = scalar from global array + + } else if (lowercase && nbracket == 2) { + + if (!compute->array_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (index1 > compute->size_array_rows && + compute->size_array_rows_variable == 0) + print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); + if (index2 > compute->size_array_cols) + print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_ARRAY)) { + compute->compute_array(); + compute->invoked_flag |= Compute::INVOKED_ARRAY; + } + + if (compute->size_array_rows_variable && + index1 > compute->size_array_rows) value1 = 0.0; + else value1 = compute->array[index1-1][index2-1]; + argstack[nargstack++] = value1; + + // C_ID[i] = scalar element of per-atom vector + + } else if (!lowercase && nbracket == 1) { + + if (!compute->peratom_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (compute->size_peratom_cols) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) { + compute->compute_peratom(); + compute->invoked_flag |= Compute::INVOKED_PERATOM; + } + + peratom2global(1,nullptr,compute->vector_atom,1,index1,tree, + treestack,ntreestack,argstack,nargstack); + + // C_ID[i][j] = scalar element of per-atom array + + } else if (!lowercase && nbracket == 2) { + + if (!compute->peratom_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (!compute->size_peratom_cols) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (index2 > compute->size_peratom_cols) + print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) { + compute->compute_peratom(); + compute->invoked_flag |= Compute::INVOKED_PERATOM; + } + + if (compute->array_atom) + peratom2global(1,nullptr,&compute->array_atom[0][index2-1], + compute->size_peratom_cols,index1, + tree,treestack,ntreestack,argstack,nargstack); + else + peratom2global(1,nullptr,nullptr,compute->size_peratom_cols,index1, + tree,treestack,ntreestack,argstack,nargstack); + + // no other possibilities for equal-style variable, so error + + } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + + // vector-style variable is being evaluated + + } else if (style[ivar] == VECTOR) { + + // c_ID = vector from global vector + + if (lowercase && nbracket == 0) { + + if (!compute->vector_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + + // c_ID[i] = vector from global array + + } else if (lowercase && nbracket == 1) { + + if (!compute->array_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + + // no other possibilities for vector-style variable, so error + + } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + + // atom-style variable is being evaluated + + } else if (style[ivar] == ATOM) { + + // c_ID = vector from per-atom vector + + if (lowercase && nbracket == 0) { + + if (!compute->peratom_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (compute->size_peratom_cols) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) { + compute->compute_peratom(); + compute->invoked_flag |= Compute::INVOKED_PERATOM; + } + auto newtree = new Tree(); - newtree->type = VALUE; - newtree->value = value1; + newtree->type = ATOMARRAY; + newtree->array = compute->vector_atom; + newtree->nstride = 1; treestack[ntreestack++] = newtree; - } else argstack[nargstack++] = value1; + + // c_ID[i] = vector from per-atom array - // c_ID[i][j] = scalar from global array, must be lowercase + } else if (lowercase && nbracket == 1) { - } else if (nbracket == 2 && compute->array_flag && lowercase) { + if (!compute->peratom_flag) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (!compute->size_peratom_cols) + print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (index1 > compute->size_peratom_cols) + print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) { + compute->compute_peratom(); + compute->invoked_flag |= Compute::INVOKED_PERATOM; + } - if (index1 > compute->size_array_rows && - compute->size_array_rows_variable == 0) - print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); - if (index2 > compute->size_array_cols) - print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_ARRAY)) { - compute->compute_array(); - compute->invoked_flag |= Compute::INVOKED_ARRAY; - } - - if (compute->size_array_rows_variable && - index1 > compute->size_array_rows) value1 = 0.0; - else value1 = compute->array[index1-1][index2-1]; - if (tree) { auto newtree = new Tree(); - newtree->type = VALUE; - newtree->value = value1; + newtree->type = ATOMARRAY; + newtree->array = nullptr; + if (compute->array_atom) + newtree->array = &compute->array_atom[0][index1-1]; + newtree->nstride = compute->size_peratom_cols; treestack[ntreestack++] = newtree; - } else argstack[nargstack++] = value1; - // c_ID = vector from global vector, lowercase or uppercase - - } else if (nbracket == 0 && compute->vector_flag) { - - if (tree == nullptr) - print_var_error(FLERR,"Compute global vector in equal-style variable formula",ivar); - if (treetype == ATOM) - print_var_error(FLERR,"Compute global vector in atom-style variable formula",ivar); - if (compute->size_vector == 0) - print_var_error(FLERR,"Variable formula compute vector is zero length",ivar); - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) { - compute->compute_vector(); - compute->invoked_flag |= Compute::INVOKED_VECTOR; - } - - auto newtree = new Tree(); - newtree->type = VECTORARRAY; - newtree->array = compute->vector; - newtree->nvector = compute->size_vector; - newtree->nstride = 1; - treestack[ntreestack++] = newtree; - - // c_ID[i] = vector from global array, lowercase or uppercase - - } else if (nbracket == 1 && compute->array_flag) { + // no other possibilities for atom-style variable, so error + + } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + } + if (tree == nullptr) print_var_error(FLERR,"Compute global vector in equal-style variable formula",ivar); if (treetype == ATOM) @@ -1623,97 +1717,16 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) newtree->nstride = compute->size_array_cols; treestack[ntreestack++] = newtree; - // c_ID[i] = scalar from per-atom vector - } else if (nbracket == 1 && compute->peratom_flag && - compute->size_peratom_cols == 0) { - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) { - compute->compute_peratom(); - compute->invoked_flag |= Compute::INVOKED_PERATOM; - } - peratom2global(1,nullptr,compute->vector_atom,1,index1,tree, - treestack,ntreestack,argstack,nargstack); - // c_ID[i][j] = scalar from per-atom array - } else if (nbracket == 2 && compute->peratom_flag && - compute->size_peratom_cols > 0) { - if (index2 > compute->size_peratom_cols) - print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) { - compute->compute_peratom(); - compute->invoked_flag |= Compute::INVOKED_PERATOM; - } - if (compute->array_atom) - peratom2global(1,nullptr,&compute->array_atom[0][index2-1],compute->size_peratom_cols,index1, - tree,treestack,ntreestack,argstack,nargstack); - else - peratom2global(1,nullptr,nullptr,compute->size_peratom_cols,index1, - tree,treestack,ntreestack,argstack,nargstack); - // c_ID = vector from per-atom vector - - } else if (nbracket == 0 && compute->peratom_flag && - compute->size_peratom_cols == 0) { - - if (tree == nullptr) - print_var_error(FLERR,"Per-atom compute in equal-style variable formula",ivar); - if (treetype == VECTOR) - print_var_error(FLERR,"Per-atom compute in vector-style variable formula",ivar); - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) { - compute->compute_peratom(); - compute->invoked_flag |= Compute::INVOKED_PERATOM; - } - - auto newtree = new Tree(); - newtree->type = ATOMARRAY; - newtree->array = compute->vector_atom; - newtree->nstride = 1; - treestack[ntreestack++] = newtree; - - // c_ID[i] = vector from per-atom array - - } else if (nbracket == 1 && compute->peratom_flag && - compute->size_peratom_cols > 0) { - - if (tree == nullptr) - print_var_error(FLERR,"Per-atom compute in equal-style variable formula",ivar); - if (treetype == VECTOR) - print_var_error(FLERR,"Per-atom compute in vector-style variable formula",ivar); - if (index1 > compute->size_peratom_cols) - print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) { - compute->compute_peratom(); - compute->invoked_flag |= Compute::INVOKED_PERATOM; - } - - auto newtree = new Tree(); - newtree->type = ATOMARRAY; - if (compute->array_atom) - newtree->array = &compute->array_atom[0][index1-1]; - newtree->nstride = compute->size_peratom_cols; - treestack[ntreestack++] = newtree; - - } else if (nbracket == 1 && compute->local_flag) { - print_var_error(FLERR,"Cannot access local data via indexing",ivar); - } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + // ---------------- // fix // ---------------- From 91d826a5d660e2e00f5a016dfb8db189cbde1985 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 18 Aug 2023 09:34:46 -0600 Subject: [PATCH 036/107] changed compute section of variable formulas --- src/variable.cpp | 62 +++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/src/variable.cpp b/src/variable.cpp index 5013f3ce55..a41c12d111 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1626,6 +1626,22 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) if (!compute->vector_flag) print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (compute->size_vector == 0) + print_var_error(FLERR,"Variable formula compute vector is zero length",ivar); + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) { + compute->compute_vector(); + compute->invoked_flag |= Compute::INVOKED_VECTOR; + } + + auto newtree = new Tree(); + newtree->type = VECTORARRAY; + newtree->array = compute->vector; + newtree->nvector = compute->size_vector; + newtree->nstride = 1; + treestack[ntreestack++] = newtree; // c_ID[i] = vector from global array @@ -1633,6 +1649,24 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) if (!compute->array_flag) print_var_error(FLERR,"Mismatched compute in variable formula",ivar); + if (compute->size_array_rows == 0) + print_var_error(FLERR,"Variable formula compute array is zero length",ivar); + if (index1 > compute->size_array_cols) + print_var_error(FLERR,"Variable formula compute array is accessed out-of-range",ivar,0); + if (!compute->is_initialized()) + print_var_error(FLERR,"Variable formula compute cannot be invoked before " + "initialization by a run",ivar); + if (!(compute->invoked_flag & Compute::INVOKED_ARRAY)) { + compute->compute_array(); + compute->invoked_flag |= Compute::INVOKED_ARRAY; + } + + auto newtree = new Tree(); + newtree->type = VECTORARRAY; + newtree->array = &compute->array[0][index1-1]; + newtree->nvector = compute->size_array_rows; + newtree->nstride = compute->size_array_cols; + treestack[ntreestack++] = newtree; // no other possibilities for vector-style variable, so error @@ -1695,38 +1729,12 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); } - - if (tree == nullptr) - print_var_error(FLERR,"Compute global vector in equal-style variable formula",ivar); - if (treetype == ATOM) - print_var_error(FLERR,"Compute global vector in atom-style variable formula",ivar); - if (compute->size_array_rows == 0) - print_var_error(FLERR,"Variable formula compute array is zero length",ivar); - if (!compute->is_initialized()) - print_var_error(FLERR,"Variable formula compute cannot be invoked before " - "initialization by a run",ivar); - if (!(compute->invoked_flag & Compute::INVOKED_ARRAY)) { - compute->compute_array(); - compute->invoked_flag |= Compute::INVOKED_ARRAY; - } - - auto newtree = new Tree(); - newtree->type = VECTORARRAY; - newtree->array = &compute->array[0][index1-1]; - newtree->nvector = compute->size_array_rows; - newtree->nstride = compute->size_array_cols; - treestack[ntreestack++] = newtree; - - - - - - + // ---------------- // fix // ---------------- From 6e1529ddff9068529f70e8aa29f16c0e49df36a6 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Fri, 18 Aug 2023 13:18:50 -0600 Subject: [PATCH 037/107] finish changes to variables --- doc/src/variable.rst | 91 +++---- src/variable.cpp | 553 +++++++++++++++++++++++-------------------- 2 files changed, 338 insertions(+), 306 deletions(-) diff --git a/doc/src/variable.rst b/doc/src/variable.rst index 38e423b632..4541de5fa2 100644 --- a/doc/src/variable.rst +++ b/doc/src/variable.rst @@ -1174,19 +1174,19 @@ The various allowed compute references in the variable formulas for equal-, vector-, and atom-style variables are listed in the following table: -+--------+------------+--------------------------------------------+ -| equal | c_ID | global scalar | -| equal | c_ID[I] | element of global vector | -| equal | c_ID[I][J] | element of global array | -| equal | C_ID[I] | element of per-atom vector, I = ID of atom | -| equal | C_ID{i}[J] | element of per-atom array, I = ID of atom | -+--------+------------+--------------------------------------------| -| vector | c_ID | global vector | -| vector | c_ID[I] | column of global array | ----------+------------+--------------------------------------------+ -| atom | c_ID | per-atom vector | -| atom | c_ID[I] | column of per-atom array | -+--------+------------+--------------------------------------------+ ++--------+------------+------------------------------------------+ +| equal | c_ID | global scalar | +| equal | c_ID[I] | element of global vector | +| equal | c_ID[I][J] | element of global array | +| equal | C_ID[I] | element of per-atom vector (I = atom ID) | +| equal | C_ID{i}[J] | element of per-atom array (I = atom ID) | ++--------+------------+------------------------------------------+ +| vector | c_ID | global vector | +| vector | c_ID[I] | column of global array | +---------+------------+------------------------------------------+ +| atom | c_ID | per-atom vector | +| atom | c_ID[I] | column of per-atom array | ++--------+------------+------------------------------------------+ Note that if an equal-style variable formula wishes to access per-atom data from a compute, it must use capital "C" as the ID prefix and not @@ -1235,23 +1235,22 @@ Atom-style variables can use scalar values (same as for equal-style varaibles), or per-atom vectors of values. The latter can also be a column of a per-atom array. -The various allowed fix references in the variable formulas for -equal-, vector-, and atom-style variables are listed in the following -table: +The allowed fix references in variable formulas for equal-, vector-, +and atom-style variables are listed in the following table: -+--------+------------+--------------------------------------------+ -| equal | f_ID | global scalar | -| equal | f_ID[I] | element of global vector | -| equal | f_ID[I][J] | element of global array | -| equal | F_ID[I] | element of per-atom vector, I = ID of atom | -| equal | F_ID{i}[J] | element of per-atom array, I = ID of atom | -+--------+------------+--------------------------------------------| -| vector | f_ID | global vector | -| vector | f_ID[I] | column of global array | ----------+------------+--------------------------------------------+ -| atom | f_ID | per-atom vector | -| atom | f_ID[I] | column of per-atom array | -+--------+------------+--------------------------------------------+ ++--------+------------+------------------------------------------+ +| equal | f_ID | global scalar | +| equal | f_ID[I] | element of global vector | +| equal | f_ID[I][J] | element of global array | +| equal | F_ID[I] | element of per-atom vector (I = atom ID) | +| equal | F_ID{i}[J] | element of per-atom array (I = atom ID) | ++--------+------------+------------------------------------------+ +| vector | f_ID | global vector | +| vector | f_ID[I] | column of global array | +---------+------------+------------------------------------------+ +| atom | f_ID | per-atom vector | +| atom | f_ID[I] | column of per-atom array | ++--------+------------+------------------------------------------+ Note that if an equal-style variable formula wishes to access per-atom data from a fix, it must use capital "F" as the ID prefix and not @@ -1312,21 +1311,27 @@ including other atom-style or atomfile-style variables. If it uses a vector-style variable, a subscript must be used to access a single value from the vector-style variable. -Examples of different kinds of variable references are as follows. -There is no ambiguity as to what a reference means, since variables -produce only a global scalar or global vector or per-atom vector. +The allowed variable references in variable formulas for equal-, +vector-, and atom-style variables are listed in the following table. +Note that there is no ambiguity as to what a reference means, since +referenced variables produce only a global scalar or global vector or +per-atom vector. -+------------+----------------------------------------------------------------------+ -| v_name | global scalar from equal-style variable | -+------------+----------------------------------------------------------------------+ -| v_name | global vector from vector-style variable | -+------------+----------------------------------------------------------------------+ -| v_name | per-atom vector from atom-style or atomfile-style variable | -+------------+----------------------------------------------------------------------+ -| v_name[I] | Ith element of a global vector from vector-style variable | -+------------+----------------------------------------------------------------------+ -| v_name[I] | value of atom with ID = I from atom-style or atomfile-style variable | -+------------+----------------------------------------------------------------------+ ++--------+-----------+-----------------------------------------------------------------------------------+ +| equal | v_name | global scalar from an equal-style variable | +| equal | v_name[I] | element of global vector from a vector-style variable | +| equal | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable | ++--------+-----------+-----------------------------------------------------------------------------------+ +| vector | v_name | global scalar from an equal-style variable | +| vector | v_name | global vector from a vector-style variable | +| vector | v_name[I] | element of global vector from a vector-style variable | +| vector | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable | ++--------+-----------+-----------------------------------------------------------------------------------+ +| atom | v_name | global scalar from an equal-style variable | +| atom | v_name | per-atom vector from an atom-style or atomfile-style variable | +| atom | v_name[I] | element of global vector from a vector-style variable | +| atom | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable | ++--------+-----------+-----------------------------------------------------------------------------------+ For the I index, an integer can be specified or a variable name, specified as v_name, where name is the name of the variable. The diff --git a/src/variable.cpp b/src/variable.cpp index a41c12d111..ce8f16cd68 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1509,7 +1509,6 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) if (!compute->scalar_flag) print_var_error(FLERR,"Mismatched compute in variable formula",ivar); - if (!compute->is_initialized()) print_var_error(FLERR,"Variable formula compute cannot be invoked before " "initialization by a run",ivar); @@ -1567,7 +1566,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) else value1 = compute->array[index1-1][index2-1]; argstack[nargstack++] = value1; - // C_ID[i] = scalar element of per-atom vector + // C_ID[i] = scalar element of per-atom vector, note uppercase "C" } else if (!lowercase && nbracket == 1) { @@ -1586,7 +1585,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) peratom2global(1,nullptr,compute->vector_atom,1,index1,tree, treestack,ntreestack,argstack,nargstack); - // C_ID[i][j] = scalar element of per-atom array + // C_ID[i][j] = scalar element of per-atom array, note uppercase "C" } else if (!lowercase && nbracket == 2) { @@ -1728,12 +1727,6 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); } - - - - - - // ---------------- // fix @@ -1753,7 +1746,6 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) if (!fix) print_var_error(FLERR,fmt::format("Invalid fix ID '{}' in variable formula",word+2),ivar); - // parse zero or one or two trailing brackets // point i beyond last bracket // nbracket = # of bracket pairs @@ -1775,181 +1767,200 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) } } - // f_ID = scalar from global scalar, must be lowercase + // equal-style variable is being evaluated - if (nbracket == 0 && fix->scalar_flag && lowercase) { + if (style[ivar] == EQUAL) { + + // f_ID = scalar from global scalar - if (update->whichflag > 0 && update->ntimestep % fix->global_freq) - print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); + if (lowercase && nbracket == 0) { - value1 = fix->compute_scalar(); - if (tree) { + if (!fix->scalar_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (update->whichflag > 0 && update->ntimestep % fix->global_freq) + print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); + + value1 = fix->compute_scalar(); + argstack[nargstack++] = value1; + + // f_ID[i] = scalar from global vector + + } else if (lowercase && nbracket == 1) { + + if (!fix->vector_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (index1 > fix->size_vector && + fix->size_vector_variable == 0) + print_var_error(FLERR,"Variable formula fix vector is accessed out-of-range",ivar,0); + if (update->whichflag > 0 && update->ntimestep % fix->global_freq) + print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); + + value1 = fix->compute_vector(index1-1); + argstack[nargstack++] = value1; + + // f_ID[i][j] = scalar from global array + + } else if (lowercase && nbracket == 2) { + + if (!fix->array_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (index1 > fix->size_array_rows && + fix->size_array_rows_variable == 0) + print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); + if (index2 > fix->size_array_cols) + print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); + if (update->whichflag > 0 && update->ntimestep % fix->global_freq) + print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); + + value1 = fix->compute_array(index1-1,index2-1); + argstack[nargstack++] = value1; + + // F_ID[i] = scalar element of per-atom vector, note uppercase "F" + + } else if (!lowercase && nbracket == 1) { + + if (!fix->peratom_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (fix->size_peratom_cols) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (update->whichflag > 0 && + update->ntimestep % fix->peratom_freq) + print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); + + peratom2global(1,nullptr,fix->vector_atom,1,index1,tree, + treestack,ntreestack,argstack,nargstack); + + // F_ID[i][j] = scalar element of per-atom array, note uppercase "F" + + } else if (!lowercase && nbracket == 2) { + + if (!fix->peratom_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (!fix->size_peratom_cols) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (index2 > fix->size_peratom_cols) + print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); + if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq) + print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); + + if (fix->array_atom) + peratom2global(1,nullptr,&fix->array_atom[0][index2-1], + fix->size_peratom_cols,index1, + tree,treestack,ntreestack,argstack,nargstack); + else + peratom2global(1,nullptr,nullptr,fix->size_peratom_cols,index1, + tree,treestack,ntreestack,argstack,nargstack); + + // no other possibilities for equal-style variable, so error + + } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + + // vector-style variable is being evaluated + + } else if (style[ivar] == VECTOR) { + + // f_ID = vector from global vector + + if (lowercase && nbracket == 0) { + + if (!fix->vector_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (fix->size_vector == 0) + print_var_error(FLERR,"Variable formula fix vector is zero length",ivar); + if (update->whichflag > 0 && update->ntimestep % fix->global_freq) + print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar); + + int nvec = fix->size_vector; + double *vec; + memory->create(vec,nvec,"variable:values"); + for (int m = 0; m < nvec; m++) + vec[m] = fix->compute_vector(m); + auto newtree = new Tree(); - newtree->type = VALUE; - newtree->value = value1; + newtree->type = VECTORARRAY; + newtree->array = vec; + newtree->nvector = nvec; + newtree->nstride = 1; + newtree->selfalloc = 1; treestack[ntreestack++] = newtree; - } else argstack[nargstack++] = value1; + + // f_ID[i] = vector from global array - // f_ID[i] = scalar from global vector, must be lowercase + } else if (lowercase && nbracket == 1) { - } else if (nbracket == 1 && fix->vector_flag && lowercase) { + if (!fix->array_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (fix->size_array_rows == 0) + print_var_error(FLERR,"Variable formula fix array is zero length",ivar); + if (index1 > fix->size_array_cols) + print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); + if (update->whichflag > 0 && update->ntimestep % fix->global_freq) + print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); - if (index1 > fix->size_vector && - fix->size_vector_variable == 0) - print_var_error(FLERR,"Variable formula fix vector is accessed out-of-range",ivar,0); - if (update->whichflag > 0 && update->ntimestep % fix->global_freq) - print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); + int nvec = fix->size_array_rows; + double *vec; + memory->create(vec,nvec,"variable:values"); + for (int m = 0; m < nvec; m++) + vec[m] = fix->compute_array(m,index1-1); - value1 = fix->compute_vector(index1-1); - if (tree) { auto newtree = new Tree(); - newtree->type = VALUE; - newtree->value = value1; + newtree->type = VECTORARRAY; + newtree->array = vec; + newtree->nvector = nvec; + newtree->nstride = 1; + newtree->selfalloc = 1; treestack[ntreestack++] = newtree; - } else argstack[nargstack++] = value1; + + // no other possibilities for vector-style variable, so error + + } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar); - // f_ID[i][j] = scalar from global array, must be lowercase + // atom-style variable is being evaluated - } else if (nbracket == 2 && fix->array_flag && lowercase) { + } else if (style[ivar] == ATOM) { + + // f_ID = vector from per-atom vector - if (index1 > fix->size_array_rows && - fix->size_array_rows_variable == 0) - print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); - if (index2 > fix->size_array_cols) - print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); - if (update->whichflag > 0 && update->ntimestep % fix->global_freq) - print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); + if (lowercase && nbracket == 0) { + + if (!fix->peratom_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (fix->size_peratom_cols) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq) + print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar); - value1 = fix->compute_array(index1-1,index2-1); - if (tree) { auto newtree = new Tree(); - newtree->type = VALUE; - newtree->value = value1; + newtree->type = ATOMARRAY; + newtree->array = fix->vector_atom; + newtree->nstride = 1; treestack[ntreestack++] = newtree; - } else argstack[nargstack++] = value1; + + // f_ID[i] = vector from per-atom array - // f_ID = vector from global vector, lowercase or uppercase + } else if (lowercase && nbracket == 1) { - } else if (nbracket == 0 && fix->vector_flag) { + if (!fix->peratom_flag) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (!fix->size_peratom_cols) + print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + if (index1 > fix->size_peratom_cols) + print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); + if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq) + print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar); - if (update->whichflag > 0 && update->ntimestep % fix->global_freq) - print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar); - if (tree == nullptr) - print_var_error(FLERR,"Fix global vector in equal-style variable formula",ivar); - if (treetype == ATOM) - print_var_error(FLERR,"Fix global vector in atom-style variable formula",ivar); - if (fix->size_vector == 0) - print_var_error(FLERR,"Variable formula fix vector is zero length",ivar); + auto newtree = new Tree(); + newtree->type = ATOMARRAY; + newtree->array = nullptr; + if (fix->array_atom) + newtree->array = &fix->array_atom[0][index1-1]; + newtree->nstride = fix->size_peratom_cols; + treestack[ntreestack++] = newtree; - int nvec = fix->size_vector; - double *vec; - memory->create(vec,nvec,"variable:values"); - for (int m = 0; m < nvec; m++) - vec[m] = fix->compute_vector(m); - - auto newtree = new Tree(); - newtree->type = VECTORARRAY; - newtree->array = vec; - newtree->nvector = nvec; - newtree->nstride = 1; - newtree->selfalloc = 1; - treestack[ntreestack++] = newtree; - - // f_ID[i] = vector from global array, lowercase or uppercase - - } else if (nbracket == 1 && fix->array_flag) { - - if (update->whichflag > 0 && update->ntimestep % fix->global_freq) - print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); - if (tree == nullptr) - print_var_error(FLERR,"Fix global vector in equal-style variable formula",ivar); - if (treetype == ATOM) - print_var_error(FLERR,"Fix global vector in atom-style variable formula",ivar); - if (fix->size_array_rows == 0) - print_var_error(FLERR,"Variable formula fix array is zero length",ivar); - - int nvec = fix->size_array_rows; - double *vec; - memory->create(vec,nvec,"variable:values"); - for (int m = 0; m < nvec; m++) - vec[m] = fix->compute_array(m,index1-1); - - auto newtree = new Tree(); - newtree->type = VECTORARRAY; - newtree->array = vec; - newtree->nvector = nvec; - newtree->nstride = 1; - newtree->selfalloc = 1; - treestack[ntreestack++] = newtree; - - // f_ID[i] = scalar from per-atom vector - - } else if (nbracket == 1 && fix->peratom_flag && - fix->size_peratom_cols == 0) { - - if (update->whichflag > 0 && - update->ntimestep % fix->peratom_freq) - print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); - - peratom2global(1,nullptr,fix->vector_atom,1,index1, - tree,treestack,ntreestack,argstack,nargstack); - - // f_ID[i][j] = scalar from per-atom array - - } else if (nbracket == 2 && fix->peratom_flag && - fix->size_peratom_cols > 0) { - - if (index2 > fix->size_peratom_cols) - print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); - if (update->whichflag > 0 && - update->ntimestep % fix->peratom_freq) - print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); - - if (fix->array_atom) - peratom2global(1,nullptr,&fix->array_atom[0][index2-1],fix->size_peratom_cols,index1, - tree,treestack,ntreestack,argstack,nargstack); - else - peratom2global(1,nullptr,nullptr,fix->size_peratom_cols,index1, - tree,treestack,ntreestack,argstack,nargstack); - - // f_ID = vector from per-atom vector - - } else if (nbracket == 0 && fix->peratom_flag && - fix->size_peratom_cols == 0) { - - if (tree == nullptr) - print_var_error(FLERR,"Per-atom fix in equal-style variable formula",ivar); - if (update->whichflag > 0 && - update->ntimestep % fix->peratom_freq) - print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar); - - auto newtree = new Tree(); - newtree->type = ATOMARRAY; - newtree->array = fix->vector_atom; - newtree->nstride = 1; - treestack[ntreestack++] = newtree; - - // f_ID[i] = vector from per-atom array - - } else if (nbracket == 1 && fix->peratom_flag && - fix->size_peratom_cols > 0) { - - if (tree == nullptr) - print_var_error(FLERR,"Per-atom fix in equal-style variable formula",ivar); - if (index1 > fix->size_peratom_cols) - print_var_error(FLERR,"Variable formula fix array is accessed out-of-range",ivar,0); - if (update->whichflag > 0 && - update->ntimestep % fix->peratom_freq) - print_var_error(FLERR,"Fix in variable not computed at compatible time",ivar); - - auto newtree = new Tree(); - newtree->type = ATOMARRAY; - if (fix->array_atom) - newtree->array = &fix->array_atom[0][index1-1]; - newtree->nstride = fix->size_peratom_cols; - treestack[ntreestack++] = newtree; - - } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + // no other possibilities for atom-style variable, so error + + } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar); + } // ---------------- // variable @@ -1979,124 +1990,140 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) i = ptr-str+1; } - // v_name = scalar from internal-style variable - // access value directly + // vname with no bracket - if (nbracket == 0 && style[ivar] == INTERNAL) { + if (nbracket == 0) { - value1 = dvalue[ivar]; - if (tree) { - auto newtree = new Tree(); - newtree->type = VALUE; - newtree->value = value1; - treestack[ntreestack++] = newtree; - } else argstack[nargstack++] = value1; + // scalar from internal-style variable + // access value directly - // v_name = scalar from non atom/atomfile & non vector-style variable - // access value via retrieve() + if (style[ivar] = INTERNAL) { - } else if (nbracket == 0 && style[ivar] != ATOM && - style[ivar] != ATOMFILE && style[ivar] != VECTOR) { + value1 = dvalue[ivar]; + if (tree) { + auto newtree = new Tree(); + newtree->type = VALUE; + newtree->value = value1; + treestack[ntreestack++] = newtree; + } else argstack[nargstack++] = value1; - char *var = retrieve(word+2); - if (var == nullptr) - print_var_error(FLERR,"Invalid variable evaluation in variable formula",ivar); - if (utils::is_double(var)) { + // scalar from any style variable except VECTOR, ATOM, ATOMFILE + // access value via retrieve() + + } else if (style[ivar] != ATOM && style[ivar] != ATOMFILE && style[ivar] != VECTOR) { + + char *var = retrieve(word+2); + if (var == nullptr) + print_var_error(FLERR,"Invalid variable evaluation in variable formula",ivar); + if (!utils::is_double(var)) + print_var_error(FLERR,"Non-numeric variable value in variable formula",ivar); if (tree) { auto newtree = new Tree(); newtree->type = VALUE; newtree->value = atof(var); treestack[ntreestack++] = newtree; } else argstack[nargstack++] = atof(var); - } else print_var_error(FLERR,"Non-numeric variable value in variable formula",ivar); - // v_name = per-atom vector from atom-style variable - // evaluate the atom-style variable as newtree + // vector from vector-style variable + // evaluate the vector-style variable, put result in newtree - } else if (nbracket == 0 && style[ivar] == ATOM) { + } else if (style[ivar] == VECTOR) { - if (tree == nullptr) - print_var_error(FLERR,"Atom-style variable in equal-style variable formula",ivar); - if (treetype == VECTOR) - print_var_error(FLERR,"Atom-style variable in vector-style variable formula",ivar); + if (tree == nullptr) + print_var_error(FLERR,"Vector-style variable in equal-style variable formula",ivar); + if (treetype == ATOM) + print_var_error(FLERR,"Vector-style variable in atom-style variable formula",ivar); - Tree *newtree = nullptr; - evaluate(data[ivar][0],&newtree,ivar); - treestack[ntreestack++] = newtree; + double *vec; + int nvec = compute_vector(ivar,&vec); - // v_name = per-atom vector from atomfile-style variable - - } else if (nbracket == 0 && style[ivar] == ATOMFILE) { - - if (tree == nullptr) - print_var_error(FLERR,"Atomfile-style variable in equal-style variable formula",ivar); - if (treetype == VECTOR) - print_var_error(FLERR,"Atomfile-style variable in vector-style variable formula",ivar); - - auto newtree = new Tree(); - newtree->type = ATOMARRAY; - newtree->array = reader[ivar]->fixstore->vstore; - newtree->nstride = 1; - treestack[ntreestack++] = newtree; - - // v_name = vector from vector-style variable - // evaluate the vector-style variable, put result in newtree - - } else if (nbracket == 0 && style[ivar] == VECTOR) { - - if (tree == nullptr) - print_var_error(FLERR,"Vector-style variable in equal-style variable formula",ivar); - if (treetype == ATOM) - print_var_error(FLERR,"Vector-style variable in atom-style variable formula",ivar); - - double *vec; - int nvec = compute_vector(ivar,&vec); - - auto newtree = new Tree(); - newtree->type = VECTORARRAY; - newtree->array = vec; - newtree->nvector = nvec; - newtree->nstride = 1; - treestack[ntreestack++] = newtree; - - // v_name[N] = scalar from atom-style variable - // compute the per-atom variable in result - // use peratom2global to extract single value from result - - } else if (nbracket && style[ivar] == ATOM) { - - double *result; - memory->create(result,atom->nlocal,"variable:result"); - compute_atom(ivar,0,result,1,0); - peratom2global(1,nullptr,result,1,index,tree,treestack,ntreestack,argstack,nargstack); - memory->destroy(result); - - // v_name[N] = scalar from atomfile-style variable - - } else if (nbracket && style[ivar] == ATOMFILE) { - - peratom2global(1,nullptr,reader[ivar]->fixstore->vstore,1,index, - tree,treestack,ntreestack,argstack,nargstack); - - // v_name[N] = scalar from vector-style variable - // compute the vector-style variable, extract single value - - } else if (nbracket && style[ivar] == VECTOR) { - - double *vec; - int nvec = compute_vector(ivar,&vec); - if (index <= 0 || index > nvec) - print_var_error(FLERR,"Invalid index into vector-style variable",ivar); - int m = index; // convert from tagint to int - - if (tree) { auto newtree = new Tree(); - newtree->type = VALUE; - newtree->value = vec[m-1]; + newtree->type = VECTORARRAY; + newtree->array = vec; + newtree->nvector = nvec; + newtree->nstride = 1; treestack[ntreestack++] = newtree; - } else argstack[nargstack++] = vec[m-1]; - } else print_var_error(FLERR,"Mismatched variable in variable formula",ivar); + // vector from atom-style variable + // evaluate the atom-style variable as newtree + + } else if (style[ivar] == ATOM) { + + if (tree == nullptr) + print_var_error(FLERR,"Atom-style variable in equal-style variable formula",ivar); + if (treetype == VECTOR) + print_var_error(FLERR,"Atom-style variable in vector-style variable formula",ivar); + + Tree *newtree = nullptr; + evaluate(data[ivar][0],&newtree,ivar); + treestack[ntreestack++] = newtree; + + // vector from atomfile-style variable + // point to the values in FixStore instance + + } else if (style[ivar] == ATOMFILE) { + + if (tree == nullptr) + print_var_error(FLERR,"Atomfile-style variable in equal-style variable formula",ivar); + if (treetype == VECTOR) + print_var_error(FLERR,"Atomfile-style variable in vector-style variable formula",ivar); + + auto newtree = new Tree(); + newtree->type = ATOMARRAY; + newtree->array = reader[ivar]->fixstore->vstore; + newtree->nstride = 1; + treestack[ntreestack++] = newtree; + + // no other possibilities for variable with no bracket + + } else print_var_error(FLERR,"Mismatched variable in variable formula",ivar); + + // vname[i] with one bracket + + } else if (nbracket == 1) { + + // scalar from vector-style variable + // compute the vector-style variable, extract single value + + if (style[ivar] == VECTOR) { + + double *vec; + int nvec = compute_vector(ivar,&vec); + if (index <= 0 || index > nvec) + print_var_error(FLERR,"Invalid index into vector-style variable",ivar); + int m = index; // convert from tagint to int + + if (tree) { + auto newtree = new Tree(); + newtree->type = VALUE; + newtree->value = vec[m-1]; + treestack[ntreestack++] = newtree; + } else argstack[nargstack++] = vec[m-1]; + + // scalar from atom-style variable + // compute the per-atom variable in result + // use peratom2global to extract single value from result + + } else if (style[ivar] == ATOM) { + + double *result; + memory->create(result,atom->nlocal,"variable:result"); + compute_atom(ivar,0,result,1,0); + peratom2global(1,nullptr,result,1,index,tree,treestack,ntreestack,argstack,nargstack); + memory->destroy(result); + + // scalar from atomfile-style variable + // use peratom2global to extract single value from FixStore instance + + } else if (style[ivar] == ATOMFILE) { + + peratom2global(1,nullptr,reader[ivar]->fixstore->vstore,1,index, + tree,treestack,ntreestack,argstack,nargstack); + + // no other possibilities for variable with one bracket + + } else print_var_error(FLERR,"Mismatched variable in variable formula",ivar); + } // ---------------- // math/group/special/labelmap function or atom value/vector or From c6233547a531cee52049780592e8c3589960633d Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Mon, 21 Aug 2023 09:39:00 -0600 Subject: [PATCH 038/107] update compute and fix doc pages for new generality --- doc/src/compute.rst | 114 ++++++++++++++++++++++++-------------------- doc/src/fix.rst | 89 +++++++++++++++++++--------------- 2 files changed, 113 insertions(+), 90 deletions(-) diff --git a/doc/src/compute.rst b/doc/src/compute.rst index 226dc6373b..2780cac368 100644 --- a/doc/src/compute.rst +++ b/doc/src/compute.rst @@ -27,58 +27,62 @@ Examples Description """"""""""" -Define a computation that will be performed on a group of atoms. -Quantities calculated by a compute are instantaneous values, meaning -they are calculated from information about atoms on the current -timestep or iteration, though a compute may internally store some -information about a previous state of the system. Defining a compute -does not perform a computation. Instead computes are invoked by other -LAMMPS commands as needed (e.g., to calculate a temperature needed for -a thermostat fix or to generate thermodynamic or dump file output). -See the :doc:`Howto output ` page for a summary of -various LAMMPS output options, many of which involve computes. +Define a diagnostic computation that will be performed on a group of +atoms. Quantities calculated by a compute are instantaneous values, +meaning they are calculated from information about atoms on the +current timestep or iteration, though internally a compute may store +some information about a previous state of the system. Defining a +compute does not perform the computation. Instead computes are +invoked by other LAMMPS commands as needed (e.g., to calculate a +temperature needed for a thermostat fix or to generate thermodynamic +or dump file output). See the :doc:`Howto output ` page +for a summary of various LAMMPS output options, many of which involve +computes. The ID of a compute can only contain alphanumeric characters and underscores. ---------- -Computes calculate and store any of four styles of quantities: global, -per-atom, local, or per-grid. A global quantity is one or more -system-wide values, e.g. the temperature of the system. A per-atom -quantity is one or more values per atom, e.g. the kinetic energy of -each atom. Per-atom values are set to 0.0 for atoms not in the -specified compute group. Local quantities are calculated by each -processor based on the atoms it owns, but there may be zero or more -per atom, e.g. a list of bond distances. Per-grid quantities are -calculated on a regular 2d or 3d grid which overlays a 2d or 3d -simulation domain. The grid points and the data they store are -distributed across processors; each processor owns the grid points -which fall within its subdomain. +Computes calculate and store any of four *styles* of quantities: +global, per-atom, local, or per-grid. -Computes that produce per-atom quantities have the word "atom" at the -end of their style, e.g. *ke/atom*\ . Computes that produce local -quantities have the word "local" at the end of their style, -e.g. *bond/local*\ . Computes that produce per-grid quantities have -the word "grid" at the end of their style, e.g. *property/grid*\ . -Styles with neither "atom" or "local" or "grid" at the end of their -style name produce global quantities. +A global quantity is one or more system-wide values, e.g. the +temperature of the system. A per-atom quantity is one or more values +per atom, e.g. the kinetic energy of each atom. Per-atom values are +set to 0.0 for atoms not in the specified compute group. Local +quantities are calculated by each processor based on the atoms it +owns, but there may be zero or more per atom, e.g. a list of bond +distances. Per-grid quantities are calculated on a regular 2d or 3d +grid which overlays a 2d or 3d simulation domain. The grid points and +the data they store are distributed across processors; each processor +owns the grid points which fall within its subdomain. -Note that a single compute typically produces either global or -per-atom or local or per-grid values. It does not compute both global -and per-atom values. It can produce local values or per-grid values -in tandem with global or per-atom quantities. The compute doc page -will explain the details. +As a general rule of thumb, computes that produce per-atom quantities +have the word "atom" at the end of their style, e.g. *ke/atom*\ . +Computes that produce local quantities have the word "local" at the +end of their style, e.g. *bond/local*\ . Computes that produce +per-grid quantities have the word "grid" at the end of their style, +e.g. *property/grid*\ . And styles with neither "atom" or "local" or +"grid" at the end of their style name produce global quantities. -Global, per-atom, local, and per-grid quantities come in three kinds: -a single scalar value, a vector of values, or a 2d array of values. -The doc page for each compute describes the style and kind of values -it produces, e.g. a per-atom vector. Some computes produce more than -one kind of a single style, e.g. a global scalar and a global vector. +Global, per-atom, local, and per-grid quantities can also be of three +*kinds*: a single scalar value (global only), a vector of values, or a +2d array of values. For per-atom, local, and per-grid quantities, a +"vector" means a single value for each atom, each local entity +(e.g. bond), or grid cell. Likewise an "array", means multiple values +for each atom, each local entity, or each grid cell. -When a compute quantity is accessed, as in many of the output commands -discussed below, it can be referenced via the following bracket -notation, where ID is the ID of the compute: +Note that a single compute can produce any combination of global, +per-atom, local, or per-grid values. Likewise it can prouduce any +combination of scalar, vector, or array output for each style. The +exception is that for per-atom, local, and per-grid output, either a +vector or array can be produced, but not both. The doc page for each +compute explains the values it produces. + +When a compute output is accessed by another input script command it +is referenced via the following bracket notation, where ID is the ID +of the compute: +-------------+--------------------------------------------+ | c_ID | entire scalar, vector, or array | @@ -89,17 +93,23 @@ notation, where ID is the ID of the compute: +-------------+--------------------------------------------+ In other words, using one bracket reduces the dimension of the -quantity once (vector :math:`\to` scalar, array :math:`\to` vector). Using two -brackets reduces the dimension twice (array :math:`\to` scalar). Thus a -command that uses scalar compute values as input can also process elements of a -vector or array. +quantity once (vector :math:`\to` scalar, array :math:`\to` vector). +Using two brackets reduces the dimension twice (array :math:`\to` +scalar). Thus, for example, a command that uses global scalar compute +values as input can also process elements of a vector or array. +Depending on the command, this can either be done directly using the +syntax in the table, or by first defining a :doc:`variable ` +of the appropriate style to store the quantity, then using the +variable as an input to the command. -Note that commands and :doc:`variables ` which use compute -quantities typically do not allow for all kinds (e.g., a command may -require a vector of values, not a scalar). This means there is no -ambiguity about referring to a compute quantity as c_ID even if it -produces, for example, both a scalar and vector. The doc pages for -various commands explain the details. +Note that commands and :doc:`variables ` which take compute +outputs as input typically do not allow for all styles and kinds of +data (e.g., a command may require global but not per-atom values, or +it may require a vector of values, not a scalar). This means there is +typically no ambiguity about referring to a compute output as c_ID +even if it produces, for example, both a scalar and vector. The doc +pages for various commands explain the details, including how any +ambiguities are resolved. ---------- diff --git a/doc/src/fix.rst b/doc/src/fix.rst index 09fc05d500..a879a45e05 100644 --- a/doc/src/fix.rst +++ b/doc/src/fix.rst @@ -77,35 +77,44 @@ for individual fixes for info on which ones can be restarted. ---------- -Some fixes calculate one or more of four styles of quantities: global, -per-atom, local, or per-grid, which can be used by other commands or -output as described below. A global quantity is one or more -system-wide values, e.g. the energy of a wall interacting with -particles. A per-atom quantity is one or more values per atom, -e.g. the displacement vector for each atom since time 0. Per-atom -values are set to 0.0 for atoms not in the specified fix group. Local -quantities are calculated by each processor based on the atoms it -owns, but there may be zero or more per atoms. Per-grid quantities -are calculated on a regular 2d or 3d grid which overlays a 2d or 3d -simulation domain. The grid points and the data they store are -distributed across processors; each processor owns the grid points -which fall within its subdomain. +Some fixes calculate and store any of four *styles* of quantities: +global, per-atom, local, or per-grid. -Note that a single fix typically produces either global or per-atom or -local or per-grid values (or none at all). It does not produce both -global and per-atom. It can produce local or per-grid values in -tandem with global or per-atom values. The fix doc page will explain -the details. +A global quantity is one or more system-wide values, e.g. the energy +of a wall interacting with particles. A per-atom quantity is one or +more values per atom, e.g. the original coordinates of each atom at +time 0. Per-atom values are set to 0.0 for atoms not in the specified +fix group. Local quantities are calculated by each processor based on +the atoms it owns, but there may be zero or more per atom, e.g. values +for each bond. Per-grid quantities are calculated on a regular 2d or +3d grid which overlays a 2d or 3d simulation domain. The grid points +and the data they store are distributed across processors; each +processor owns the grid points which fall within its subdomain. -Global, per-atom, local, and per-grid quantities come in three kinds: -a single scalar value, a vector of values, or a 2d array of values. -The doc page for each fix describes the style and kind of values it -produces, e.g. a per-atom vector. Some fixes produce more than one -kind of a single style, e.g. a global scalar and a global vector. +As a general rule of thumb, fixes that produce per-atom quantities +have the word "atom" at the end of their style, e.g. *ave/atom*\ . +Fixes that produce local quantities have the word "local" at the end +of their style, e.g. *store/local*\ . Fixes that produce per-grid +quantities have the word "grid" at the end of their style, +e.g. *ave/grid*\ . -When a fix quantity is accessed, as in many of the output commands -discussed below, it can be referenced via the following bracket -notation, where ID is the ID of the fix: +Global, per-atom, local, and per-grid quantities can also be of three +*kinds*: a single scalar value (global only), a vector of values, or a +2d array of values. For per-atom, local, and per-grid quantities, a +"vector" means a single value for each atom, each local entity +(e.g. bond), or grid cell. Likewise an "array", means multiple values +for each atom, each local entity, or each grid cell. + +Note that a single fix can produce any combination of global, +per-atom, local, or per-grid values. Likewise it can prouduce any +combination of scalar, vector, or array output for each style. The +exception is that for per-atom, local, and per-grid output, either a +vector or array can be produced, but not both. The doc page for each +fix explains the values it produces, if any. + +When a fix output is accessed by another input script command it is +referenced via the following bracket notation, where ID is the ID of +the fix: +-------------+--------------------------------------------+ | f_ID | entire scalar, vector, or array | @@ -116,19 +125,23 @@ notation, where ID is the ID of the fix: +-------------+--------------------------------------------+ In other words, using one bracket reduces the dimension of the -quantity once (vector :math:`\to` scalar, array :math:`\to` vector). Using two -brackets reduces the dimension twice (array :math:`\to` scalar). Thus, a -command that uses scalar fix values as input can also process elements of a -vector or array. +quantity once (vector :math:`\to` scalar, array :math:`\to` vector). +Using two brackets reduces the dimension twice (array :math:`\to` +scalar). Thus, for example, a command that uses global scalar fix +values as input can also process elements of a vector or array. +Depending on the command, this can either be done directly using the +syntax in the table, or by first defining a :doc:`variable ` +of the appropriate style to store the quantity, then using the +variable as an input to the command. -Note that commands and :doc:`variables ` that use fix -quantities typically do not allow for all kinds (e.g., a command may -require a vector of values, not a scalar), and even if they do, the context -in which they are called can be used to resolve which output is being -requested. This means there is no -ambiguity about referring to a fix quantity as f_ID even if it -produces, for example, both a scalar and vector. The doc pages for -various commands explain the details. +Note that commands and :doc:`variables ` which take fix +outputs as input typically do not allow for all styles and kinds of +data (e.g., a command may require global but not per-atom values, or +it may require a vector of values, not a scalar). This means there is +typically no ambiguity about referring to a fix output as c_ID even if +it produces, for example, both a scalar and vector. The doc pages for +various commands explain the details, including how any ambiguities +are resolved. ---------- From ab2b83f65430892d1f45a928248f01507b7ddfed Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Mon, 21 Aug 2023 10:54:42 -0600 Subject: [PATCH 039/107] clarify doc for fix ave/histo command --- doc/src/compute_reduce.rst | 26 +++--- doc/src/fix_ave_histo.rst | 42 +++++----- src/compute_reduce_region.cpp | 145 +++++++++++----------------------- src/fix_ave_histo.cpp | 2 +- 4 files changed, 86 insertions(+), 129 deletions(-) diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index 31591d4419..4692e161b4 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -63,9 +63,11 @@ Description """"""""""" Define a calculation that "reduces" one or more vector inputs into -scalar values, one per listed input. The inputs can be per-atom or -local quantities and must all be the same kind (per-atom or local); -see discussion of the optional *inputs* keyword below. +scalar values, one per listed input. For the compute reduce command, +the inputs can be either per-atom or local quantities and must all be +of the same kind (per-atom or local); see discussion of the optional +*inputs* keyword below. The compute reduce/region command can only be +used with per-atom inputs. Atom attributes are per-atom quantities, :doc:`computes ` and :doc:`fixes ` can generate either per-atom or local quantities, @@ -92,13 +94,13 @@ values. Each listed input is operated on independently. For per-atom inputs, the group specified with this command means only atoms within the -group contribute to the result. For per-atom inputs, if the compute -reduce/region command is used, the atoms must also currently be within -the region. Note that an input that produces per-atom quantities may -define its own group which affects the quantities it returns. For -example, if a compute is used as an input which generates a per-atom -vector, it will generate values of 0.0 for atoms that are not in the -group specified for that compute. +group contribute to the result. Likewise for per-atom inputs, if the +compute reduce/region command is used, the atoms must also currently +be within the region. Note that an input that produces per-atom +quantities may define its own group which affects the quantities it +returns. For example, if a compute is used as an input which +generates a per-atom vector, it will generate values of 0.0 for atoms +that are not in the group specified for that compute. Each listed input can be an atom attribute (position, velocity, force component) or can be the result of a :doc:`compute ` or @@ -246,7 +248,9 @@ the quantities being reduced are in. Restrictions """""""""""" - none + +As noted above, the compute reduce/region command can only be used +with per-atom inputs. Related commands """""""""""""""" diff --git a/doc/src/fix_ave_histo.rst b/doc/src/fix_ave_histo.rst index 8bb66f0615..31e5476f9e 100644 --- a/doc/src/fix_ave_histo.rst +++ b/doc/src/fix_ave_histo.rst @@ -79,9 +79,10 @@ Description Use one or more values as inputs every few timesteps to create a single histogram. The histogram can then be averaged over longer -timescales. The resulting histogram can be used by other :doc:`output commands `, and can also be written to a file. The -fix ave/histo/weight command has identical syntax to fix ave/histo, -except that exactly two values must be specified. See details below. +timescales. The resulting histogram can be used by other :doc:`output +commands `, and can also be written to a file. The fix +ave/histo/weight command has identical syntax to fix ave/histo, except +that exactly two values must be specified. See details below. The group specified with this command is ignored for global and local input values. For per-atom input values, only atoms in the group @@ -96,14 +97,18 @@ different ways; see the discussion of the *beyond* keyword below. Each input value can be an atom attribute (position, velocity, force component) or can be the result of a :doc:`compute ` or -:doc:`fix ` or the evaluation of an equal-style or vector-style or -atom-style :doc:`variable `. The set of input values can be -either all global, all per-atom, or all local quantities. Inputs of -different kinds (e.g. global and per-atom) cannot be mixed. Atom -attributes are per-atom vector values. See the page for -individual "compute" and "fix" commands to see what kinds of -quantities they generate. See the optional *kind* keyword below for -how to force the fix ave/histo command to disambiguate if necessary. +:doc:`fix ` or the evaluation of an equal-style or vector-style +or atom-style :doc:`variable `. The set of input values can +be either all global, all per-atom, or all local quantities. Inputs +of different kinds (e.g. global and per-atom) cannot be mixed. Atom +attributes are per-atom vector values. See the page for individual +"compute" and "fix" commands to see what kinds of quantities they +generate. + +Note that a compute or fix can produce multiple kinds of data (global, +per-atom, local). If LAMMPS cannot unambiguosly determine which kind +of data to use, the optional *kind* keyword discussed below can force +the desired disambiguation. Note that the output of this command is a single histogram for all input values combined together, not one histogram per input value. @@ -258,13 +263,14 @@ keyword is set to *vector*, then all input values must be global or per-atom or local vectors, or columns of global or per-atom or local arrays. -The *kind* keyword only needs to be set if a compute or fix produces -more than one kind of output (global, per-atom, local). If this is -not the case, then LAMMPS will determine what kind of input is -provided and whether all the input arguments are consistent. If a -compute or fix produces more than one kind of output, the *kind* -keyword should be used to specify which output will be used. The -remaining input arguments must still be consistent. +The *kind* keyword only needs to be used if any of the specfied input +computes or fixes produce more than one kind of output (global, +per-atom, local). If not, LAMMPS will determine the kind of data all +the inputs produce and verify it is all the same kind. If not, an +error will be triggered. If a compute or fix produces more than one +kind of output, the *kind* keyword should be used to specify which +output will be used. The other input arguments must still be +consistent. The *beyond* keyword determines how input values that fall outside the *lo* to *hi* bounds are treated. Values such that *lo* :math:`\le` value diff --git a/src/compute_reduce_region.cpp b/src/compute_reduce_region.cpp index 2f5a3de675..d0a32b8adf 100644 --- a/src/compute_reduce_region.cpp +++ b/src/compute_reduce_region.cpp @@ -35,13 +35,15 @@ static constexpr double BIG = 1.0e20; ComputeReduceRegion::ComputeReduceRegion(LAMMPS *lmp, int narg, char **arg) : ComputeReduce(lmp, narg, arg) { + if (input_mode == LOCAL) + error->all(FLERR,"Compute reduce/region cannot use local data as input"); } /* ---------------------------------------------------------------------- calculate reduced value for one input M and return it if flag = -1: sum/min/max/ave all values in vector - for per-atom quantities, limit to atoms in group and region + limit to atoms in group and region if mode = MIN or MAX, also set index to which vector value wins if flag >= 0: simply return vector[flag] ------------------------------------------------------------------------- */ @@ -59,6 +61,7 @@ double ComputeReduceRegion::compute_one(int m, int flag) // initialization in case it has not yet been run, e.g. when // the compute was invoked right after it has been created + if ((val.which == ArgInfo::COMPUTE) || (val.which == ArgInfo::FIX)) { if (val.val.c == nullptr) init(); } @@ -99,52 +102,29 @@ double ComputeReduceRegion::compute_one(int m, int flag) // invoke compute if not previously invoked } else if (val.which == ArgInfo::COMPUTE) { - if (input_mode == PERATOM) { - if (!(val.val.c->invoked_flag & Compute::INVOKED_PERATOM)) { - val.val.c->compute_peratom(); - val.val.c->invoked_flag |= Compute::INVOKED_PERATOM; - } - if (aidx == 0) { - double *compute_vector = val.val.c->vector_atom; - if (flag < 0) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2])) - combine(one, compute_vector[i], i); - } else - one = compute_vector[flag]; - } else { - double **compute_array = val.val.c->array_atom; - int aidxm1 = aidx - 1; - if (flag < 0) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2])) - combine(one, compute_array[i][aidxm1], i); - } else - one = compute_array[flag][aidxm1]; - } + if (!(val.val.c->invoked_flag & Compute::INVOKED_PERATOM)) { + val.val.c->compute_peratom(); + val.val.c->invoked_flag |= Compute::INVOKED_PERATOM; + } - } else if (input_mode == LOCAL) { - if (!(val.val.c->invoked_flag & Compute::INVOKED_LOCAL)) { - val.val.c->compute_local(); - val.val.c->invoked_flag |= Compute::INVOKED_LOCAL; - } - - if (aidx == 0) { - double *compute_vector = val.val.c->vector_local; - if (flag < 0) - for (int i = 0; i < val.val.c->size_local_rows; i++) combine(one, compute_vector[i], i); - else - one = compute_vector[flag]; - } else { - double **compute_array = val.val.c->array_local; - int aidxm1 = aidx - 1; - if (flag < 0) - for (int i = 0; i < val.val.c->size_local_rows; i++) + if (aidx == 0) { + double *compute_vector = val.val.c->vector_atom; + if (flag < 0) { + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2])) + combine(one, compute_vector[i], i); + } else + one = compute_vector[flag]; + } else { + double **compute_array = val.val.c->array_atom; + int aidxm1 = aidx - 1; + if (flag < 0) { + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2])) combine(one, compute_array[i][aidxm1], i); - else - one = compute_array[flag][aidxm1]; - } + } else + one = compute_array[flag][aidxm1]; } // check if fix frequency is a match @@ -153,45 +133,26 @@ double ComputeReduceRegion::compute_one(int m, int flag) if (update->ntimestep % val.val.f->peratom_freq) error->all(FLERR, "Fix {} used in compute {} not computed at compatible time", val.id, style); - if (input_mode == PERATOM) { - if (aidx == 0) { - double *fix_vector = val.val.f->vector_atom; - if (flag < 0) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2])) - combine(one, fix_vector[i], i); - } else - one = fix_vector[flag]; - } else { - double **fix_array = val.val.f->array_atom; - int aidxm1 = aidx - 1; - if (flag < 0) { - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2])) - combine(one, fix_array[i][aidxm1], i); - } else - one = fix_array[flag][aidxm1]; - } - - } else if (input_mode == LOCAL) { - if (aidx == 0) { - double *fix_vector = val.val.f->vector_local; - if (flag < 0) - for (int i = 0; i < val.val.f->size_local_rows; i++) combine(one, fix_vector[i], i); - else - one = fix_vector[flag]; - } else { - double **fix_array = val.val.f->array_local; - int aidxm1 = aidx - 1; - if (flag < 0) - for (int i = 0; i < val.val.f->size_local_rows; i++) + if (aidx == 0) { + double *fix_vector = val.val.f->vector_atom; + if (flag < 0) { + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2])) + combine(one, fix_vector[i], i); + } else + one = fix_vector[flag]; + } else { + double **fix_array = val.val.f->array_atom; + int aidxm1 = aidx - 1; + if (flag < 0) { + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit && region->match(x[i][0], x[i][1], x[i][2])) combine(one, fix_array[i][aidxm1], i); - else - one = fix_array[flag][aidxm1]; - } + } else + one = fix_array[flag][aidxm1]; } - // evaluate atom-style variable + // evaluate atom-style variable } else if (val.which == ArgInfo::VARIABLE) { if (atom->nmax > maxatom) { @@ -220,25 +181,11 @@ bigint ComputeReduceRegion::count(int m) if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F) return group->count(igroup, region); - else if (val.which == ArgInfo::COMPUTE) { - if (input_mode == PERATOM) { - return group->count(igroup, region); - } else if (input_mode == LOCAL) { - bigint ncount = val.val.c->size_local_rows; - bigint ncountall; - MPI_Allreduce(&ncount, &ncountall, 1, MPI_DOUBLE, MPI_SUM, world); - return ncountall; - } - } else if (val.which == ArgInfo::FIX) { - if (input_mode == PERATOM) { - return group->count(igroup, region); - } else if (input_mode == LOCAL) { - bigint ncount = val.val.f->size_local_rows; - bigint ncountall; - MPI_Allreduce(&ncount, &ncountall, 1, MPI_DOUBLE, MPI_SUM, world); - return ncountall; - } - } else if (val.which == ArgInfo::VARIABLE) + else if (val.which == ArgInfo::COMPUTE) + return group->count(igroup, region); + else if (val.which == ArgInfo::FIX) + return group->count(igroup, region); + else if (val.which == ArgInfo::VARIABLE) return group->count(igroup, region); bigint dummy = 0; diff --git a/src/fix_ave_histo.cpp b/src/fix_ave_histo.cpp index 0a2975bb2e..4503ad56f4 100644 --- a/src/fix_ave_histo.cpp +++ b/src/fix_ave_histo.cpp @@ -164,7 +164,7 @@ FixAveHisto::FixAveHisto(LAMMPS *lmp, int narg, char **arg) : } // check input args for kind consistency - // all inputs must all be global, per-atom, or local + // inputs must all be all either global, per-atom, or local if (nevery <= 0) error->all(FLERR,"Illegal {} nevery value: {}", mycmd, nevery); From aad232ffc64240a24f5421c46df0a5d84aceff0a Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 22 Aug 2023 11:46:57 -0400 Subject: [PATCH 040/107] fix typo --- src/variable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/variable.cpp b/src/variable.cpp index ce8f16cd68..f3c987f00c 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1997,7 +1997,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) // scalar from internal-style variable // access value directly - if (style[ivar] = INTERNAL) { + if (style[ivar] == INTERNAL) { value1 = dvalue[ivar]; if (tree) { From 7d9c068da09d8135c2e672f2de21b1ed20f4510d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 22 Aug 2023 11:50:54 -0400 Subject: [PATCH 041/107] whitespace --- src/VORONOI/compute_voronoi_atom.cpp | 12 +++---- src/compute_reduce.cpp | 8 ++--- src/compute_reduce_region.cpp | 2 +- src/variable.cpp | 54 ++++++++++++++-------------- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/VORONOI/compute_voronoi_atom.cpp b/src/VORONOI/compute_voronoi_atom.cpp index eb4f53986f..b4f1aa3055 100644 --- a/src/VORONOI/compute_voronoi_atom.cpp +++ b/src/VORONOI/compute_voronoi_atom.cpp @@ -391,13 +391,13 @@ void ComputeVoronoi::checkOccupation() int i, j, k; double rx, ry, rz; - + int nlocal = atom->nlocal; int nall = atom->nghost + nlocal; double **x = atom->x; // prepare destination buffer for variable evaluation - + if (atom->nmax > lmax) { memory->destroy(lnext); lmax = atom->nmax; @@ -432,7 +432,7 @@ void ComputeVoronoi::checkOccupation() } // MPI sum occupation - + #ifdef NOTINPLACE memcpy(sendocc, occvec, oldnatoms*sizeof(*occvec)); MPI_Allreduce(sendocc, occvec, oldnatoms, MPI_INT, MPI_SUM, world); @@ -441,7 +441,7 @@ void ComputeVoronoi::checkOccupation() #endif // determine the total number of atoms in this atom's currently occupied cell - + int c; for (i=0; itag[i]; if (mytag > oldmaxtag) @@ -479,7 +479,7 @@ void ComputeVoronoi::checkOccupation() void ComputeVoronoi::loopCells() { // invoke voro++ and fetch results for owned atoms in group - + voronoicell_neighbor c; int i; if (faces_flag) nfaces = 0; diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 8565ddb1c9..24fdc4a991 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -40,7 +40,7 @@ enum{UNDECIDED,PERATOM,LOCAL}; // same as in ComputeReduceRegion void abs_max(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/) { // r is the already reduced value, n is the new value - + double n = std::fabs(*(double *) in), r = *(double *) inout; double m; @@ -55,7 +55,7 @@ void abs_max(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/) void abs_min(void *in, void *inout, int * /*len*/, MPI_Datatype * /*type*/) { // r is the already reduced value, n is the new value - + double n = std::fabs(*(double *) in), r = *(double *) inout; double m; @@ -270,7 +270,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR, "Compute {} compute {} does not calculate a per-atom array", style, val.id); if (val.argindex && val.argindex > val.val.c->size_peratom_cols) error->all(FLERR, "Compute {} compute {} array is accessed out-of-range", style, val.id); - + } else if (input_mode == LOCAL) { if (!val.val.c->peratom_flag) error->all(FLERR, "Compute {} compute {} does not calculate local values", style, val.id); @@ -295,7 +295,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR, "Compute {} fix {} does not calculate a per-atom array", style, val.id); if (val.argindex && (val.argindex > val.val.f->size_peratom_cols)) error->all(FLERR, "Compute {} fix {} array is accessed out-of-range", style, val.id); - + } else if (input_mode == LOCAL) { if (!val.val.f->local_flag) error->all(FLERR, "Compute {} fix {} does not calculate local values", style, val.id); diff --git a/src/compute_reduce_region.cpp b/src/compute_reduce_region.cpp index d0a32b8adf..15280af544 100644 --- a/src/compute_reduce_region.cpp +++ b/src/compute_reduce_region.cpp @@ -61,7 +61,7 @@ double ComputeReduceRegion::compute_one(int m, int flag) // initialization in case it has not yet been run, e.g. when // the compute was invoked right after it has been created - + if ((val.which == ArgInfo::COMPUTE) || (val.which == ArgInfo::FIX)) { if (val.val.c == nullptr) init(); } diff --git a/src/variable.cpp b/src/variable.cpp index f3c987f00c..264dcf6258 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -1502,7 +1502,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) // equal-style variable is being evaluated if (style[ivar] == EQUAL) { - + // c_ID = scalar from global scalar if (lowercase && nbracket == 0) { @@ -1588,7 +1588,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) // C_ID[i][j] = scalar element of per-atom array, note uppercase "C" } else if (!lowercase && nbracket == 2) { - + if (!compute->peratom_flag) print_var_error(FLERR,"Mismatched compute in variable formula",ivar); if (!compute->size_peratom_cols) @@ -1612,13 +1612,13 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) tree,treestack,ntreestack,argstack,nargstack); // no other possibilities for equal-style variable, so error - + } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); // vector-style variable is being evaluated } else if (style[ivar] == VECTOR) { - + // c_ID = vector from global vector if (lowercase && nbracket == 0) { @@ -1641,7 +1641,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) newtree->nvector = compute->size_vector; newtree->nstride = 1; treestack[ntreestack++] = newtree; - + // c_ID[i] = vector from global array } else if (lowercase && nbracket == 1) { @@ -1666,15 +1666,15 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) newtree->nvector = compute->size_array_rows; newtree->nstride = compute->size_array_cols; treestack[ntreestack++] = newtree; - + // no other possibilities for vector-style variable, so error - + } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); // atom-style variable is being evaluated } else if (style[ivar] == ATOM) { - + // c_ID = vector from per-atom vector if (lowercase && nbracket == 0) { @@ -1696,7 +1696,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) newtree->array = compute->vector_atom; newtree->nstride = 1; treestack[ntreestack++] = newtree; - + // c_ID[i] = vector from per-atom array } else if (lowercase && nbracket == 1) { @@ -1724,10 +1724,10 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) treestack[ntreestack++] = newtree; // no other possibilities for atom-style variable, so error - + } else print_var_error(FLERR,"Mismatched compute in variable formula",ivar); } - + // ---------------- // fix // ---------------- @@ -1770,7 +1770,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) // equal-style variable is being evaluated if (style[ivar] == EQUAL) { - + // f_ID = scalar from global scalar if (lowercase && nbracket == 0) { @@ -1826,14 +1826,14 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) if (update->whichflag > 0 && update->ntimestep % fix->peratom_freq) print_var_error(FLERR,"Fix in variable not computed at a compatible time",ivar); - + peratom2global(1,nullptr,fix->vector_atom,1,index1,tree, treestack,ntreestack,argstack,nargstack); // F_ID[i][j] = scalar element of per-atom array, note uppercase "F" } else if (!lowercase && nbracket == 2) { - + if (!fix->peratom_flag) print_var_error(FLERR,"Mismatched fix in variable formula",ivar); if (!fix->size_peratom_cols) @@ -1852,13 +1852,13 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) tree,treestack,ntreestack,argstack,nargstack); // no other possibilities for equal-style variable, so error - + } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar); // vector-style variable is being evaluated } else if (style[ivar] == VECTOR) { - + // f_ID = vector from global vector if (lowercase && nbracket == 0) { @@ -1875,7 +1875,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) memory->create(vec,nvec,"variable:values"); for (int m = 0; m < nvec; m++) vec[m] = fix->compute_vector(m); - + auto newtree = new Tree(); newtree->type = VECTORARRAY; newtree->array = vec; @@ -1883,7 +1883,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) newtree->nstride = 1; newtree->selfalloc = 1; treestack[ntreestack++] = newtree; - + // f_ID[i] = vector from global array } else if (lowercase && nbracket == 1) { @@ -1910,15 +1910,15 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) newtree->nstride = 1; newtree->selfalloc = 1; treestack[ntreestack++] = newtree; - + // no other possibilities for vector-style variable, so error - + } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar); // atom-style variable is being evaluated } else if (style[ivar] == ATOM) { - + // f_ID = vector from per-atom vector if (lowercase && nbracket == 0) { @@ -1935,7 +1935,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) newtree->array = fix->vector_atom; newtree->nstride = 1; treestack[ntreestack++] = newtree; - + // f_ID[i] = vector from per-atom array } else if (lowercase && nbracket == 1) { @@ -1958,7 +1958,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) treestack[ntreestack++] = newtree; // no other possibilities for atom-style variable, so error - + } else print_var_error(FLERR,"Mismatched fix in variable formula",ivar); } @@ -2053,21 +2053,21 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) print_var_error(FLERR,"Atom-style variable in equal-style variable formula",ivar); if (treetype == VECTOR) print_var_error(FLERR,"Atom-style variable in vector-style variable formula",ivar); - + Tree *newtree = nullptr; evaluate(data[ivar][0],&newtree,ivar); treestack[ntreestack++] = newtree; // vector from atomfile-style variable // point to the values in FixStore instance - + } else if (style[ivar] == ATOMFILE) { if (tree == nullptr) print_var_error(FLERR,"Atomfile-style variable in equal-style variable formula",ivar); if (treetype == VECTOR) print_var_error(FLERR,"Atomfile-style variable in vector-style variable formula",ivar); - + auto newtree = new Tree(); newtree->type = ATOMARRAY; newtree->array = reader[ivar]->fixstore->vstore; @@ -2121,7 +2121,7 @@ double Variable::evaluate(char *str, Tree **tree, int ivar) tree,treestack,ntreestack,argstack,nargstack); // no other possibilities for variable with one bracket - + } else print_var_error(FLERR,"Mismatched variable in variable formula",ivar); } From ffe291b7934188dc3901534b5ce9d75bf04042c6 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 22 Aug 2023 15:28:28 -0500 Subject: [PATCH 042/107] Fixed bugs with the memory allocation for xoriginal --- src/KOKKOS/fix_spring_self_kokkos.cpp | 20 ++++++++++++++------ src/KOKKOS/fix_spring_self_kokkos.h | 2 ++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 8a576e2dea..e8aa07240f 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -39,19 +39,29 @@ FixSpringSelfKokkos::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char { kokkosable = 1; exchange_comm_device = 1; - maxexchange = 6; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; - memory->destroy(xoriginal); + xoriginal_tmp = xoriginal; + xoriginal = nullptr; int nmax = atom->nmax; grow_arrays(nmax); - d_count = typename AT::t_int_scalar("fix_shake:count"); + for (int i = 0; i < atom->nlocal; i++) { + k_xoriginal.h_view(i,0) = xoriginal_tmp[i][0]; + k_xoriginal.h_view(i,1) = xoriginal_tmp[i][1]; + k_xoriginal.h_view(i,2) = xoriginal_tmp[i][2]; + } + + k_xoriginal.modify_host(); + + d_count = typename AT::t_int_scalar("spring/self:count"); h_count = Kokkos::create_mirror_view(d_count); + + memory->destroy(xoriginal_tmp); } /* ---------------------------------------------------------------------- */ @@ -81,18 +91,16 @@ void FixSpringSelfKokkos::init() template void FixSpringSelfKokkos::post_force(int /*vflag*/) { - atomKK->sync(execution_space, X_MASK | F_MASK | MASK_MASK); + atomKK->sync(execution_space, X_MASK | F_MASK | IMAGE_MASK | MASK_MASK); x = atomKK->k_x.view(); f = atomKK->k_f.view(); image = atomKK->k_image.view(); mask = atomKK->k_mask.view(); - int nlocal = atom->nlocal; double espring_kk; - copymode = 1; //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this, espring_kk); { diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index 30b9eaf40a..99fe435d88 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -84,6 +84,8 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { typename AT::t_int_scalar d_count; HAT::t_int_scalar h_count; + double **xoriginal_tmp; // original coords of atoms + }; template From dd6b847a5c6f88b24904c74f1a84ae4354400cb5 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 22 Aug 2023 16:29:14 -0400 Subject: [PATCH 043/107] mention that "peratom" is no longer required and was removed --- doc/src/compute_voronoi_atom.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/src/compute_voronoi_atom.rst b/doc/src/compute_voronoi_atom.rst index 3e67bb6cbf..37e5386341 100644 --- a/doc/src/compute_voronoi_atom.rst +++ b/doc/src/compute_voronoi_atom.rst @@ -190,6 +190,10 @@ Voro++ software in the src/VORONOI/README file. Output info """"""""""" +.. deprecated:: TBD + + The *peratom* keyword was removed as it is no longer required. + This compute calculates a per-atom array with two columns. In regular dynamic tessellation mode the first column is the Voronoi volume, the second is the neighbor count, as described above (read above for the From ad33a018f48f228f4d4b9f5815c85a93bd25663f Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Tue, 22 Aug 2023 15:52:47 -0600 Subject: [PATCH 044/107] update variable syntax in several example input scripts --- examples/snap/in.snap.compute | 2 +- examples/snap/in.snap.compute.quadratic | 2 +- examples/voronoi/in.voronoi | 8 ++++---- examples/voronoi/in.voronoi.data | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/snap/in.snap.compute b/examples/snap/in.snap.compute index b0c7314882..8d2ffe8b96 100644 --- a/examples/snap/in.snap.compute +++ b/examples/snap/in.snap.compute @@ -70,7 +70,7 @@ compute bsum2 snapgroup2 reduce sum c_b[*] # fix bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector compute vbsum all reduce sum c_vb[*] # fix vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector -variable db_2_25 equal c_db[2][25] +variable db_2_25 equal C_db[2][25] # set up compute snap generating global array diff --git a/examples/snap/in.snap.compute.quadratic b/examples/snap/in.snap.compute.quadratic index e03d4af3bf..20d5ed3039 100644 --- a/examples/snap/in.snap.compute.quadratic +++ b/examples/snap/in.snap.compute.quadratic @@ -70,7 +70,7 @@ compute bsum2 snapgroup2 reduce sum c_b[*] # fix bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector compute vbsum all reduce sum c_vb[*] # fix vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector -variable db_2_100 equal c_db[2][100] +variable db_2_100 equal C_db[2][100] # set up compute snap generating global array diff --git a/examples/voronoi/in.voronoi b/examples/voronoi/in.voronoi index 5254969fbd..79b6c6efec 100644 --- a/examples/voronoi/in.voronoi +++ b/examples/voronoi/in.voronoi @@ -146,10 +146,10 @@ variable i2 equal 257 compute v1 all voronoi/atom occupation compute r0 all reduce sum c_v1[1] compute r1 all reduce sum c_v1[2] -variable d5a equal c_v1[${i1}][1] -variable d5b equal c_v1[${i2}][1] -variable d5c equal c_v1[${i1}][2] -variable d5d equal c_v1[${i2}][2] +variable d5a equal C_v1[${i1}][1] +variable d5b equal C_v1[${i2}][1] +variable d5c equal C_v1[${i1}][2] +variable d5d equal C_v1[${i2}][2] thermo_style custom c_r0 c_r1 v_d5a v_d5b v_d5c v_d5d run 0 diff --git a/examples/voronoi/in.voronoi.data b/examples/voronoi/in.voronoi.data index 853c2c2bd1..da00b44e09 100644 --- a/examples/voronoi/in.voronoi.data +++ b/examples/voronoi/in.voronoi.data @@ -67,7 +67,7 @@ undump dlocal # local and global quantities, but # not per-atom quantities -compute v2 all voronoi/atom neighbors yes edge_histo 6 peratom no +compute v2 all voronoi/atom neighbors yes edge_histo 6 # write voronoi local quantities to a file @@ -75,7 +75,7 @@ dump d2 all local 1 dump.neighbors2 index c_v2[1] c_v2[2] c_v2[3] # sum up a voronoi local quantity -compute sumarea all reduce sum c_v2[3] +compute sumarea all reduce sum c_v2[3] inputs local # output voronoi global quantities From 71ca6ee47ca24e0db97e0137b2ad293d33e95647 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Tue, 22 Aug 2023 16:02:28 -0600 Subject: [PATCH 045/107] fix one more example input script --- examples/voronoi/in.voronoi.data | 7 +------ src/compute_reduce.cpp | 3 +-- src/compute_reduce.h | 2 +- src/compute_reduce_region.cpp | 2 -- 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/examples/voronoi/in.voronoi.data b/examples/voronoi/in.voronoi.data index da00b44e09..e5d925c498 100644 --- a/examples/voronoi/in.voronoi.data +++ b/examples/voronoi/in.voronoi.data @@ -63,9 +63,7 @@ undump dlocal # TEST 2: # -# This compute voronoi generates -# local and global quantities, but -# not per-atom quantities +# This compute voronoi generates peratom and local and global quantities compute v2 all voronoi/atom neighbors yes edge_histo 6 @@ -83,6 +81,3 @@ thermo_style custom c_sumarea c_v2[3] c_v2[4] c_v2[5] c_v2[6] c_v2[7] thermo 1 run 0 - - - diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 24fdc4a991..5983445517 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -31,8 +31,6 @@ using namespace LAMMPS_NS; -enum{UNDECIDED,PERATOM,LOCAL}; // same as in ComputeReduceRegion - #define BIG 1.0e20 //---------------------------------------------------------------- @@ -232,6 +230,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR,"Compute {} inputs must be all peratom or all local"); input_mode = LOCAL; } + iarg += 2; } else error->all(FLERR, "Unknown compute {} keyword: {}", style, arg[iarg]); } diff --git a/src/compute_reduce.h b/src/compute_reduce.h index f8b652e00c..64322bc6ac 100644 --- a/src/compute_reduce.h +++ b/src/compute_reduce.h @@ -27,7 +27,7 @@ namespace LAMMPS_NS { class ComputeReduce : public Compute { public: enum { SUM, SUMSQ, SUMABS, MINN, MAXX, AVE, AVESQ, AVEABS, MINABS, MAXABS }; - enum { PERATOM, LOCAL }; + enum { UNDECIDED, PERATOM, LOCAL }; ComputeReduce(class LAMMPS *, int, char **); ~ComputeReduce() override; diff --git a/src/compute_reduce_region.cpp b/src/compute_reduce_region.cpp index 15280af544..bd850e902c 100644 --- a/src/compute_reduce_region.cpp +++ b/src/compute_reduce_region.cpp @@ -26,8 +26,6 @@ using namespace LAMMPS_NS; -enum{UNDECIDED,PERATOM,LOCAL}; // same as in ComputeReduce - static constexpr double BIG = 1.0e20; /* ---------------------------------------------------------------------- */ From 17dd04b4dea143fe271188b235edbe13e473956e Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Tue, 22 Aug 2023 16:22:57 -0600 Subject: [PATCH 046/107] tweak variable doc page --- doc/src/variable.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/src/variable.rst b/doc/src/variable.rst index 4541de5fa2..f1a316da1f 100644 --- a/doc/src/variable.rst +++ b/doc/src/variable.rst @@ -1179,7 +1179,7 @@ table: | equal | c_ID[I] | element of global vector | | equal | c_ID[I][J] | element of global array | | equal | C_ID[I] | element of per-atom vector (I = atom ID) | -| equal | C_ID{i}[J] | element of per-atom array (I = atom ID) | +| equal | C_ID[I][J] | element of per-atom array (I = atom ID) | +--------+------------+------------------------------------------+ | vector | c_ID | global vector | | vector | c_ID[I] | column of global array | @@ -1243,7 +1243,7 @@ and atom-style variables are listed in the following table: | equal | f_ID[I] | element of global vector | | equal | f_ID[I][J] | element of global array | | equal | F_ID[I] | element of per-atom vector (I = atom ID) | -| equal | F_ID{i}[J] | element of per-atom array (I = atom ID) | +| equal | F_ID[I][J] | element of per-atom array (I = atom ID) | +--------+------------+------------------------------------------+ | vector | f_ID | global vector | | vector | f_ID[I] | column of global array | From 3e22eb83555a484388a433b20be14203dcbb4269 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Tue, 22 Aug 2023 16:40:25 -0600 Subject: [PATCH 047/107] adjust version date --- src/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/version.h b/src/version.h index 572a274053..35780aa785 100644 --- a/src/version.h +++ b/src/version.h @@ -1,2 +1,2 @@ -#define LAMMPS_VERSION "2 Aug 2023" +#define LAMMPS_VERSION "3 Aug 2023" #define LAMMPS_UPDATE "Development" From e6b98f5942856c11ec34b3206ee59cb175a8dc83 Mon Sep 17 00:00:00 2001 From: Steve Plimpton Date: Wed, 23 Aug 2023 09:47:36 -0600 Subject: [PATCH 048/107] fix logic issue in compute reduce --- src/compute_reduce.cpp | 23 ++++++----------------- src/compute_reduce.h | 2 +- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 5983445517..5385554f33 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -133,8 +133,6 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : // parse values - input_mode = UNDECIDED; - values.clear(); nvalues = 0; for (int iarg = 0; iarg < nargnew; ++iarg) { @@ -144,41 +142,32 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : val.val.c = nullptr; if (strcmp(arg[iarg], "x") == 0) { - input_mode = PERATOM; val.which = ArgInfo::X; val.argindex = 0; } else if (strcmp(arg[iarg], "y") == 0) { - input_mode = PERATOM; val.which = ArgInfo::X; val.argindex = 1; } else if (strcmp(arg[iarg], "z") == 0) { - input_mode = PERATOM; val.which = ArgInfo::X; val.argindex = 2; } else if (strcmp(arg[iarg], "vx") == 0) { - input_mode = PERATOM; val.which = ArgInfo::V; val.argindex = 0; } else if (strcmp(arg[iarg], "vy") == 0) { - input_mode = PERATOM; val.which = ArgInfo::V; val.argindex = 1; } else if (strcmp(arg[iarg], "vz") == 0) { - input_mode = PERATOM; val.which = ArgInfo::V; val.argindex = 2; } else if (strcmp(arg[iarg], "fx") == 0) { - input_mode = PERATOM; val.which = ArgInfo::F; val.argindex = 0; } else if (strcmp(arg[iarg], "fy") == 0) { - input_mode = PERATOM; val.which = ArgInfo::F; val.argindex = 1; } else if (strcmp(arg[iarg], "fz") == 0) { - input_mode = PERATOM; val.which = ArgInfo::F; val.argindex = 2; @@ -203,6 +192,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : nvalues = values.size(); replace = new int[nvalues]; for (int i = 0; i < nvalues; ++i) replace[i] = -1; + input_mode = PERATOM; std::string mycmd = "compute "; mycmd += style; @@ -225,11 +215,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : } else if (strcmp(arg[iarg], "inputs") == 0) { if (iarg + 2 > narg) utils::missing_cmd_args(FLERR, mycmd + " inputs", error); if (strcmp(arg[iarg+1], "peratom") == 0) input_mode = PERATOM; - else if (strcmp(arg[iarg+1], "local") == 0) { - if (input_mode == PERATOM) - error->all(FLERR,"Compute {} inputs must be all peratom or all local"); - input_mode = LOCAL; - } + else if (strcmp(arg[iarg+1], "local") == 0) input_mode = LOCAL; iarg += 2; } else error->all(FLERR, "Unknown compute {} keyword: {}", style, arg[iarg]); @@ -255,7 +241,10 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : // setup and error check for (auto &val : values) { - if (val.which == ArgInfo::COMPUTE) { + if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F) { + if (input_mode == LOCAL) error->all(FLERR,"Compute {} inputs must be all local"); + + } else if (val.which == ArgInfo::COMPUTE) { val.val.c = modify->get_compute_by_id(val.id); if (!val.val.c) error->all(FLERR, "Compute ID {} for compute {} does not exist", val.id, style); diff --git a/src/compute_reduce.h b/src/compute_reduce.h index 64322bc6ac..f8b652e00c 100644 --- a/src/compute_reduce.h +++ b/src/compute_reduce.h @@ -27,7 +27,7 @@ namespace LAMMPS_NS { class ComputeReduce : public Compute { public: enum { SUM, SUMSQ, SUMABS, MINN, MAXX, AVE, AVESQ, AVEABS, MINABS, MAXABS }; - enum { UNDECIDED, PERATOM, LOCAL }; + enum { PERATOM, LOCAL }; ComputeReduce(class LAMMPS *, int, char **); ~ComputeReduce() override; From b4e7d5f0b9a2a8ce329dc5ca9fb383b9b1390861 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 23 Aug 2023 20:11:32 -0400 Subject: [PATCH 049/107] fix whitespace (again) --- src/compute_reduce.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compute_reduce.cpp b/src/compute_reduce.cpp index 5385554f33..3feabf2ec3 100644 --- a/src/compute_reduce.cpp +++ b/src/compute_reduce.cpp @@ -243,7 +243,7 @@ ComputeReduce::ComputeReduce(LAMMPS *lmp, int narg, char **arg) : for (auto &val : values) { if (val.which == ArgInfo::X || val.which == ArgInfo::V || val.which == ArgInfo::F) { if (input_mode == LOCAL) error->all(FLERR,"Compute {} inputs must be all local"); - + } else if (val.which == ArgInfo::COMPUTE) { val.val.c = modify->get_compute_by_id(val.id); if (!val.val.c) From 6ccccb5d13bbb9f53033ba64c8beb646eba5ae3f Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 24 Aug 2023 09:27:17 -0400 Subject: [PATCH 050/107] add versionadded tag to new inputs keyword docs --- doc/src/compute_reduce.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/compute_reduce.rst b/doc/src/compute_reduce.rst index 4692e161b4..6820d2ee04 100644 --- a/doc/src/compute_reduce.rst +++ b/doc/src/compute_reduce.rst @@ -201,6 +201,8 @@ information in this context, the *replace* keywords will extract the atom IDs for the two atoms in the bond of maximum stretch. These atom IDs and the bond stretch will be printed with thermodynamic output. +.. versionadded:: TBD + The *inputs* keyword allows selection of whether all the inputs are per-atom or local quantities. As noted above, all the inputs must be the same kind (per-atom or local). Per-atom is the default setting. From 187bebb515081bf94e3e1b6ab431a8a4902ab3db Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 24 Aug 2023 12:17:37 -0500 Subject: [PATCH 051/107] Working on fix spring/self/kk, something with missing host-device sync that causes force blowup --- src/KOKKOS/fix_efield_kokkos.cpp | 6 +++--- src/KOKKOS/fix_spring_self_kokkos.cpp | 28 +++++++++++++++++++++------ src/KOKKOS/fix_spring_self_kokkos.h | 7 ++++--- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index bbf106f515..8c4469095f 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -218,11 +218,11 @@ void FixEfieldKokkos::post_force(int /*vflag*/) auto fx = qtmp * l_ex; auto fy = qtmp * l_ey; auto fz = qtmp * l_ez; - if (l_xstyle == ATOM) l_f(i,0) += l_d_efield(i,0); + if (l_xstyle == ATOM) l_f(i,0) += qtmp * l_d_efield(i,0); else if (l_xstyle) l_f(i,0) += fx; - if (l_ystyle == ATOM) l_f(i,1) += l_d_efield(i,1); + if (l_ystyle == ATOM) l_f(i,1) += qtmp * l_d_efield(i,1); else if (l_ystyle) l_f(i,1) += fy; - if (l_zstyle == ATOM) l_f(i,2) += l_d_efield(i,2); + if (l_zstyle == ATOM) l_f(i,2) += qtmp * l_d_efield(i,2); else if (l_zstyle) l_f(i,2) += fz; fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index e8aa07240f..da1576f3ef 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -108,16 +108,17 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) auto prd = Few(domain->prd); auto h = Few(domain->h); auto triclinic = domain->triclinic; - auto l_xflag = xflag; - auto l_yflag = yflag; - auto l_zflag = zflag; auto l_k = k; - auto l_x = x; auto l_xoriginal = d_xoriginal; + + auto l_x = x; auto l_f = f; auto l_mask = mask; auto l_image = image; auto l_groupbit = groupbit; + auto l_xflag = xflag; + auto l_yflag = yflag; + auto l_zflag = zflag; Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) { if (l_mask[i] & l_groupbit) { @@ -154,10 +155,25 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) template void FixSpringSelfKokkos::grow_arrays(int nmax) { - memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,3,"spring/self:xoriginal"); + memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,"spring/self:xoriginal"); d_xoriginal = k_xoriginal.view(); } +/* ---------------------------------------------------------------------- + copy values within local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::copy_arrays(int i, int j, int delflag) +{ + k_xoriginal.sync_host(); + + FixSpringSelf::copy_arrays(i,j,delflag); + + k_xoriginal.modify_host(); +} + + /* ---------------------------------------------------------------------- */ template @@ -202,7 +218,7 @@ int FixSpringSelfKokkos::pack_exchange_kokkos( d_exchange_sendlist = k_exchange_sendlist.view(); this->nsend = nsend; - + k_xoriginal.template sync(); Kokkos::deep_copy(d_count,0); diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index 99fe435d88..49233c4dcc 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -42,6 +42,7 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { ~FixSpringSelfKokkos() override; void init() override; void grow_arrays(int) override; + void copy_arrays(int, int, int) override; void post_force(int) override; KOKKOS_INLINE_FUNCTION @@ -64,14 +65,14 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { int unpack_exchange(int, double *) override; protected: - DAT::tdual_ffloat_2d k_xoriginal; - typename AT::t_ffloat_2d d_xoriginal; + DAT::tdual_x_array k_xoriginal; + typename AT::t_x_array d_xoriginal; typename AT::t_x_array_randomread x; typename AT::t_f_array f; typename AT::t_imageint_1d_randomread image; typename AT::t_int_1d_randomread mask; - + int nsend; typename AT::t_int_2d d_sendlist; From 58d60dfea04b7d3d2012d0c8639bc07a3ca819e0 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sat, 26 Aug 2023 16:08:59 -0500 Subject: [PATCH 052/107] Fixed bugs with device sync for xoriginal --- src/KOKKOS/fix_spring_self_kokkos.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index da1576f3ef..13ebb7de6f 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -101,8 +101,11 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) double espring_kk; + k_xoriginal.modify(); + k_xoriginal.sync(); + copymode = 1; - //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this, espring_kk); + { // local variables for lambda capture auto prd = Few(domain->prd); @@ -173,7 +176,6 @@ void FixSpringSelfKokkos::copy_arrays(int i, int j, int delflag) k_xoriginal.modify_host(); } - /* ---------------------------------------------------------------------- */ template From 4ef9f70bfe0ef7a408eff0584c793e2fb3ba64dc Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 28 Aug 2023 09:43:00 -0500 Subject: [PATCH 053/107] Fixed whitespaces, added author info --- src/KOKKOS/fix_efield_kokkos.cpp | 4 ++++ src/KOKKOS/fix_spring_self_kokkos.cpp | 4 ++++ src/KOKKOS/fix_spring_self_kokkos.h | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index 8c4469095f..ffe1c34e97 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -12,6 +12,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Trung Nguyen (U Chicago) +------------------------------------------------------------------------- */ + #include "fix_efield_kokkos.h" #include "atom_kokkos.h" diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 13ebb7de6f..efd8a652ff 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -12,6 +12,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Trung Nguyen (U Chicago) +------------------------------------------------------------------------- */ + #include "fix_spring_self_kokkos.h" #include "atom_kokkos.h" diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index 49233c4dcc..b23e92249b 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -72,7 +72,7 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { typename AT::t_f_array f; typename AT::t_imageint_1d_randomread image; typename AT::t_int_1d_randomread mask; - + int nsend; typename AT::t_int_2d d_sendlist; From b08abd4a809df5b12bf9e9abca796873742e5d75 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 28 Aug 2023 11:06:07 -0500 Subject: [PATCH 054/107] Updated Install.sh and cleaned up --- src/KOKKOS/Install.sh | 4 ++++ src/KOKKOS/pair_yukawa_colloid_kokkos.cpp | 9 ++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 766daaff19..0a5bb398aa 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -127,6 +127,8 @@ action fix_dt_reset_kokkos.cpp action fix_dt_reset_kokkos.h action fix_enforce2d_kokkos.cpp action fix_enforce2d_kokkos.h +action fix_efield_kokkos.cpp +action fix_efield_kokkos.h action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h action fix_freeze_kokkos.cpp fix_freeze.cpp @@ -171,6 +173,8 @@ action fix_shake_kokkos.cpp fix_shake.cpp action fix_shake_kokkos.h fix_shake.h action fix_shardlow_kokkos.cpp fix_shardlow.cpp action fix_shardlow_kokkos.h fix_shardlow.h +action fix_spring_self_kokkos.cpp +action fix_spring_self_kokkos.h action fix_viscous_kokkos.cpp action fix_viscous_kokkos.h action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp index ca491a3800..04eb5ab657 100644 --- a/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp +++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp @@ -210,7 +210,7 @@ void PairYukawaColloidKokkos::compute(int eflag_in, int vflag_in) } } - +/* ---------------------------------------------------------------------- */ template template @@ -227,11 +227,10 @@ compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a : params(itype,jtype).a; - // U = a * exp(-kappa*r-(radi+radj)) / kappa - // f = a * exp(-kappa*r) + // U = a * exp(-kappa*(r-(radi+radj))) / kappa + // f = -dU/dr = a * exp(-kappa*r) // f/r = a * exp(-kappa*r) / r const F_FLOAT rinv = 1.0 / rr; - const F_FLOAT rinv2 = rinv*rinv; const F_FLOAT screening = exp(-kappa*(rr-(radi+radj))); const F_FLOAT forceyukawa = aa * screening; const F_FLOAT fpair = forceyukawa * rinv; @@ -255,7 +254,7 @@ compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const F_FLOAT offset = STACKPARAMS ? m_params[itype][jtype].offset : params(itype,jtype).offset; - // U = a * exp(-kappa*r) / kappa + // U = a * exp(-kappa*(r-(radi+radj))) / kappa const F_FLOAT rinv = 1.0 / rr; const F_FLOAT screening = exp(-kappa*(rr-(radi+radj))); From 67bcf75b74f34b4bd0037d47fade1a77fabc073a Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 28 Aug 2023 23:18:05 -0500 Subject: [PATCH 055/107] Updated the corresponding doc pages with ".. index::" and added (k) to the commands in the overview pages --- doc/src/Commands_fix.rst | 4 ++-- doc/src/Commands_pair.rst | 2 +- doc/src/fix_efield.rst | 1 + doc/src/fix_spring_self.rst | 1 + doc/src/pair_yukawa_colloid.rst | 3 ++- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index 6fe321e3c9..a15e24e3e5 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -69,7 +69,7 @@ OPT. * :doc:`drude/transform/inverse ` * :doc:`dt/reset (k) ` * :doc:`edpd/source ` - * :doc:`efield ` + * :doc:`efield (k) ` * :doc:`efield/tip4p ` * :doc:`ehex ` * :doc:`electrode/conp (i) ` @@ -233,7 +233,7 @@ OPT. * :doc:`spring ` * :doc:`spring/chunk ` * :doc:`spring/rg ` - * :doc:`spring/self ` + * :doc:`spring/self (k) ` * :doc:`srd ` * :doc:`store/force ` * :doc:`store/state ` diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index c45a1d778c..b3d40717da 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -305,5 +305,5 @@ OPT. * :doc:`wf/cut ` * :doc:`ylz ` * :doc:`yukawa (gko) ` - * :doc:`yukawa/colloid (go) ` + * :doc:`yukawa/colloid (gko) ` * :doc:`zbl (gko) ` diff --git a/doc/src/fix_efield.rst b/doc/src/fix_efield.rst index e38e1e6894..c6ac3a0722 100644 --- a/doc/src/fix_efield.rst +++ b/doc/src/fix_efield.rst @@ -1,4 +1,5 @@ .. index:: fix efield +.. index:: fix efield/kk .. index:: fix efield/tip4p fix efield command diff --git a/doc/src/fix_spring_self.rst b/doc/src/fix_spring_self.rst index 6cf0a9e0e7..0def6d51f0 100644 --- a/doc/src/fix_spring_self.rst +++ b/doc/src/fix_spring_self.rst @@ -1,4 +1,5 @@ .. index:: fix spring/self +.. index:: fix spring/self/kk fix spring/self command ======================= diff --git a/doc/src/pair_yukawa_colloid.rst b/doc/src/pair_yukawa_colloid.rst index 6611ea04e4..96893f8e37 100644 --- a/doc/src/pair_yukawa_colloid.rst +++ b/doc/src/pair_yukawa_colloid.rst @@ -1,11 +1,12 @@ .. index:: pair_style yukawa/colloid .. index:: pair_style yukawa/colloid/gpu +.. index:: pair_style yukawa/colloid/kk .. index:: pair_style yukawa/colloid/omp pair_style yukawa/colloid command ================================= -Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/omp* +Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/kk*, *yukawa/colloid/omp* Syntax """""" From e840d422efb937ab2bef3db0de971149a0bde4e5 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 3 Oct 2023 10:07:41 -0600 Subject: [PATCH 056/107] Use a_f to be consistent --- src/KOKKOS/pair_kokkos.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 2c2a622791..6f48aa0266 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -138,9 +138,9 @@ struct PairComputeFunctor { F_FLOAT fztmp = 0.0; if (NEIGHFLAG == FULL) { - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; + a_f(i,0) = 0.0; + a_f(i,1) = 0.0; + a_f(i,2) = 0.0; } for (int jj = 0; jj < jnum; jj++) { @@ -212,9 +212,9 @@ struct PairComputeFunctor { F_FLOAT fztmp = 0.0; if (NEIGHFLAG == FULL) { - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; + a_f(i,0) = 0.0; + a_f(i,1) = 0.0; + a_f(i,2) = 0.0; } for (int jj = 0; jj < jnum; jj++) { From 367d0ac90518d751af2d872e9cde355c7cef7f22 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 3 Oct 2023 10:07:52 -0600 Subject: [PATCH 057/107] Add support for FLUX --- src/KOKKOS/kokkos.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 91ea6d37ac..b611502f74 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -175,6 +175,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) set_flag = 1; } } + if ((str = getenv("FLUX_TASK_LOCAL_ID"))) { + if (ngpus > 0) { + int local_rank = atoi(str); + device = local_rank % ngpus; + if (device >= skip_gpu) device++; + set_flag = 1; + } + } if (ngpus > 1 && !set_flag) error->all(FLERR,"Could not determine local MPI rank for multiple " From ff23da1c9725695cbe8f5429a6f213b96208cead Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 3 Oct 2023 10:28:08 -0600 Subject: [PATCH 058/107] Switch order --- src/KOKKOS/kokkos.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index b611502f74..84a8f59dd0 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -137,13 +137,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) int set_flag = 0; char *str; - if ((str = getenv("SLURM_LOCALID"))) { + if (str = getenv("SLURM_LOCALID")) { int local_rank = atoi(str); device = local_rank % ngpus; if (device >= skip_gpu) device++; set_flag = 1; } - if ((str = getenv("MPT_LRANK"))) { + if (str = getenv("FLUX_TASK_LOCAL_ID")) { if (ngpus > 0) { int local_rank = atoi(str); device = local_rank % ngpus; @@ -151,7 +151,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) set_flag = 1; } } - if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) { + if (str = getenv("MPT_LRANK")) { if (ngpus > 0) { int local_rank = atoi(str); device = local_rank % ngpus; @@ -159,7 +159,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) set_flag = 1; } } - if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) { + if (str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) { if (ngpus > 0) { int local_rank = atoi(str); device = local_rank % ngpus; @@ -167,7 +167,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) set_flag = 1; } } - if ((str = getenv("PMI_LOCAL_RANK"))) { + if (str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) { if (ngpus > 0) { int local_rank = atoi(str); device = local_rank % ngpus; @@ -175,7 +175,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) set_flag = 1; } } - if ((str = getenv("FLUX_TASK_LOCAL_ID"))) { + if (str = getenv("PMI_LOCAL_RANK")) { if (ngpus > 0) { int local_rank = atoi(str); device = local_rank % ngpus; From c521d54f85c2aaef9e937cabbc19e60bb8154452 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 3 Oct 2023 10:58:22 -0600 Subject: [PATCH 059/107] Fix compile error --- src/KOKKOS/pair_kokkos.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 6f48aa0266..da6bd11006 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -138,9 +138,9 @@ struct PairComputeFunctor { F_FLOAT fztmp = 0.0; if (NEIGHFLAG == FULL) { - a_f(i,0) = 0.0; - a_f(i,1) = 0.0; - a_f(i,2) = 0.0; + a_f(i,0) -= f(i,0); + a_f(i,1) -= f(i,1); + a_f(i,2) -= f(i,2); } for (int jj = 0; jj < jnum; jj++) { @@ -212,9 +212,9 @@ struct PairComputeFunctor { F_FLOAT fztmp = 0.0; if (NEIGHFLAG == FULL) { - a_f(i,0) = 0.0; - a_f(i,1) = 0.0; - a_f(i,2) = 0.0; + a_f(i,0) -= f(i,0); + a_f(i,1) -= f(i,1); + a_f(i,2) -= f(i,2); } for (int jj = 0; jj < jnum; jj++) { From f57e5d975c8e72e0a2a6e9c01cc2135ca4428ee0 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 3 Oct 2023 12:07:28 -0600 Subject: [PATCH 060/107] Fix harmless compiler warnings --- src/KOKKOS/pair_eam_alloy_kokkos.cpp | 4 ++-- src/KOKKOS/pair_eam_fs_kokkos.cpp | 4 ++-- src/KOKKOS/pair_eam_kokkos.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 5cc6fa9443..0dfe56c365 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -1477,7 +1477,7 @@ void PairEAMAlloyKokkos::file2array_alloy() template template struct PairEAMAlloyKokkos::policyInstance { - KOKKOS_INLINE_FUNCTION + static auto get(int inum) { auto policy = Kokkos::RangePolicy(0,inum); return policy; @@ -1488,7 +1488,7 @@ struct PairEAMAlloyKokkos::policyInstance { template<> template struct PairEAMAlloyKokkos::policyInstance { - KOKKOS_INLINE_FUNCTION + static auto get(int inum) { static_assert(t_ffloat_2d_n7::static_extent(2) == 7, "Breaking assumption of spline dim for KernelAB and KernelC scratch caching"); diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index 8e895dfeac..58ff615c04 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -1487,7 +1487,7 @@ void PairEAMFSKokkos::file2array_fs() template template struct PairEAMFSKokkos::policyInstance { - KOKKOS_INLINE_FUNCTION + static auto get(int inum) { auto policy = Kokkos::RangePolicy(0,inum); return policy; @@ -1498,7 +1498,7 @@ struct PairEAMFSKokkos::policyInstance { template<> template struct PairEAMFSKokkos::policyInstance { - KOKKOS_INLINE_FUNCTION + static auto get(int inum) { static_assert(t_ffloat_2d_n7::static_extent(2) == 7, "Breaking assumption of spline dim for KernelAB and KernelC scratch caching"); diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index a3bc463bbf..864f736066 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -1162,7 +1162,7 @@ void PairEAMKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int & template template struct PairEAMKokkos::policyInstance { - KOKKOS_INLINE_FUNCTION + static auto get(int inum) { auto policy = Kokkos::RangePolicy(0,inum); return policy; @@ -1173,7 +1173,7 @@ struct PairEAMKokkos::policyInstance { template<> template struct PairEAMKokkos::policyInstance { - KOKKOS_INLINE_FUNCTION + static auto get(int inum) { static_assert(t_ffloat_2d_n7::static_extent(2) == 7, "Breaking assumption of spline dim for KernelAB and KernelC scratch caching"); From 4bbaebda16fe9fb1f26952230b19855124a1f368 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 3 Oct 2023 12:09:23 -0600 Subject: [PATCH 061/107] Revert ineffectual change --- src/KOKKOS/pair_kokkos.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index da6bd11006..2c2a622791 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -138,9 +138,9 @@ struct PairComputeFunctor { F_FLOAT fztmp = 0.0; if (NEIGHFLAG == FULL) { - a_f(i,0) -= f(i,0); - a_f(i,1) -= f(i,1); - a_f(i,2) -= f(i,2); + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; } for (int jj = 0; jj < jnum; jj++) { @@ -212,9 +212,9 @@ struct PairComputeFunctor { F_FLOAT fztmp = 0.0; if (NEIGHFLAG == FULL) { - a_f(i,0) -= f(i,0); - a_f(i,1) -= f(i,1); - a_f(i,2) -= f(i,2); + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; } for (int jj = 0; jj < jnum; jj++) { From 6da8fff08c68f4f21e329751ce184fe3d515aede Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 3 Oct 2023 13:35:50 -0600 Subject: [PATCH 062/107] Fix bug with dynamic groups --- src/KOKKOS/modify_kokkos.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/KOKKOS/modify_kokkos.cpp b/src/KOKKOS/modify_kokkos.cpp index 0b81a1cabb..8d8ffca671 100644 --- a/src/KOKKOS/modify_kokkos.cpp +++ b/src/KOKKOS/modify_kokkos.cpp @@ -362,6 +362,17 @@ void ModifyKokkos::pre_reverse(int eflag, int vflag) void ModifyKokkos::post_force(int vflag) { + for (int i = 0; i < n_post_force_group; i++) { + atomKK->sync(fix[list_post_force_group[i]]->execution_space, + fix[list_post_force_group[i]]->datamask_read); + int prev_auto_sync = lmp->kokkos->auto_sync; + if (!fix[list_post_force_group[i]]->kokkosable) lmp->kokkos->auto_sync = 1; + fix[list_post_force_group[i]]->post_force(vflag); + lmp->kokkos->auto_sync = prev_auto_sync; + atomKK->modified(fix[list_post_force_group[i]]->execution_space, + fix[list_post_force_group[i]]->datamask_modify); + } + for (int i = 0; i < n_post_force; i++) { atomKK->sync(fix[list_post_force[i]]->execution_space, fix[list_post_force[i]]->datamask_read); From f9f33ce1b6a25df18b7c1613727771ee16e8fb97 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Tue, 3 Oct 2023 15:41:35 -0600 Subject: [PATCH 063/107] Fix issues with Kokkos fix property/atom --- src/KOKKOS/fix_property_atom_kokkos.cpp | 40 ++++++++++++++++++++----- src/KOKKOS/fix_property_atom_kokkos.h | 1 + 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp index 1de07b39dc..87325accda 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.cpp +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -33,6 +33,30 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg) grow_arrays(atom->nmax); } +/* ---------------------------------------------------------------------- */ + +FixPropertyAtomKokkos::~FixPropertyAtomKokkos() +{ + // deallocate per-atom vectors in Atom class + // set ptrs to a null pointer, so they no longer exist for Atom class + + for (int nv = 0; nv < nvalue; nv++) { + if (styles[nv] == MOLECULE) { + atom->molecule_flag = 0; + memoryKK->destroy_kokkos(atomKK->k_molecule,atom->molecule); + atom->molecule = nullptr; + } else if (styles[nv] == CHARGE) { + atom->q_flag = 0; + memoryKK->destroy_kokkos(atomKK->k_q,atom->q); + atom->q = nullptr; + } else if (styles[nv] == RMASS) { + atom->rmass_flag = 0; + memoryKK->destroy_kokkos(atomKK->k_rmass,atom->rmass); + atom->rmass = nullptr; + } + } +} + /* ---------------------------------------------------------------------- allocate atom-based arrays initialize new values to 0, @@ -44,17 +68,19 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) { for (int nv = 0; nv < nvalue; nv++) { if (styles[nv] == MOLECULE) { - memory->grow(atom->molecule,nmax,"atom:molecule"); + atomKK->sync(Device,MOLECULE_MASK); + memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); size_t nbytes = (nmax-nmax_old) * sizeof(tagint); - memset(&atom->molecule[nmax_old],0,nbytes); + atomKK->modified(Device,MOLECULE_MASK); } else if (styles[nv] == CHARGE) { - memory->grow(atom->q,nmax,"atom:q"); + atomKK->sync(Device,Q_MASK); + memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); size_t nbytes = (nmax-nmax_old) * sizeof(double); - memset(&atom->q[nmax_old],0,nbytes); + atomKK->modified(Device,Q_MASK); } else if (styles[nv] == RMASS) { - memory->grow(atom->rmass,nmax,"atom:rmass"); - size_t nbytes = (nmax-nmax_old) * sizeof(double); - memset(&atom->rmass[nmax_old],0,nbytes); + atomKK->sync(Device,MOLECULE_MASK); + memoryKK->grow_kokkos(atomKK->k_rmass,atomKK->rmass,nmax,"atom:rmass"); + atomKK->modified(Device,RMASS_MASK); } else if (styles[nv] == TEMPERATURE) { memory->grow(atom->temperature, nmax, "atom:temperature"); size_t nbytes = (nmax - nmax_old) * sizeof(double); diff --git a/src/KOKKOS/fix_property_atom_kokkos.h b/src/KOKKOS/fix_property_atom_kokkos.h index 90eddc98e0..29b07b1f5b 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.h +++ b/src/KOKKOS/fix_property_atom_kokkos.h @@ -28,6 +28,7 @@ namespace LAMMPS_NS { class FixPropertyAtomKokkos : public FixPropertyAtom { public: FixPropertyAtomKokkos(class LAMMPS *, int, char **); + ~FixPropertyAtomKokkos() override; void grow_arrays(int) override; }; From b38e828c49a755f16afe5b06f28b8101a1050de5 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 5 Oct 2023 22:51:48 -0600 Subject: [PATCH 064/107] Prevent segfault with unsupported radial basis --- src/KOKKOS/pair_pace_kokkos.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/KOKKOS/pair_pace_kokkos.cpp b/src/KOKKOS/pair_pace_kokkos.cpp index 56a6656d78..153a6d0333 100644 --- a/src/KOKKOS/pair_pace_kokkos.cpp +++ b/src/KOKKOS/pair_pace_kokkos.cpp @@ -237,6 +237,9 @@ void PairPACEKokkos::copy_splines() ACERadialFunctions* radial_functions = dynamic_cast(basis_set->radial_functions); + if (radial_functions == nullptr) + error->all(FLERR,"Chosen radial basis style not supported by pair style pace/kk"); + for (int i = 0; i < nelements; i++) { for (int j = 0; j < nelements; j++) { k_splines_gk.h_view(i, j) = radial_functions->splines_gk(i, j); From 4ff226b00f664defa3a761244f41b4d1f00457b6 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 9 Oct 2023 16:12:27 -0600 Subject: [PATCH 065/107] Fix issues in Kokkos fix property/atom --- src/KOKKOS/atom_kokkos.cpp | 52 ++++++++++++- src/KOKKOS/atom_kokkos.h | 4 + src/KOKKOS/atom_vec_dpd_kokkos.cpp | 8 -- src/KOKKOS/atom_vec_kokkos.h | 2 + src/KOKKOS/fix_property_atom_kokkos.cpp | 98 +++++++++++++++++++++---- src/KOKKOS/fix_property_atom_kokkos.h | 10 +++ src/KOKKOS/kokkos_base.h | 8 +- src/fix_property_atom.cpp | 14 +++- src/fix_property_atom.h | 1 + 9 files changed, 163 insertions(+), 34 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 03537e7b88..a009972628 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -44,6 +44,10 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp) h_tag_min = Kokkos::subview(h_tag_min_max,0); h_tag_max = Kokkos::subview(h_tag_min_max,1); + + nprop_atom = 0; + prop_atom = nullptr; + fix_prop_atom = nullptr; } /* ---------------------------------------------------------------------- */ @@ -112,6 +116,7 @@ AtomKokkos::~AtomKokkos() memoryKK->destroy_kokkos(k_dvector, dvector); dvector = nullptr; + delete [] fix_prop_atom; } /* ---------------------------------------------------------------------- */ @@ -121,15 +126,46 @@ void AtomKokkos::init() Atom::init(); sort_classic = lmp->kokkos->sort_classic; + + nprop_atom = 0; + for (int ifix = 0; ifix < modify->nfix; ifix++) { + if (modify->fix[ifix] && utils::strmatch(modify->fix[ifix]->style, "^property/atom")) { + auto fix_i = modify->fix[ifix]; + if (!fix_i->kokkosable) + error->all(FLERR, "KOKKOS package requires a Kokkos-enabled version of fix property/atom"); + + memory->grow(prop_atom,nprop_atom+1,"atom::prop_atom"); + prop_atom[nprop_atom++] = ifix; + } + } + + delete [] fix_prop_atom; + fix_prop_atom = new FixPropertyAtomKokkos*[nprop_atom]; + + printf("HERE %i\n",nprop_atom); + + for (int n = 0; n < nprop_atom; n++) { + auto fix_n = dynamic_cast(modify->fix[prop_atom[n]]); + fix_n->atom_init_flag = 1; + fix_prop_atom[n] = fix_n; + } + + memory->destroy(prop_atom); } /* ---------------------------------------------------------------------- */ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) { - if (space == Device && lmp->kokkos->auto_sync) avecKK->modified(Host, mask); + if (space == Device && lmp->kokkos->auto_sync) { + avecKK->modified(Host, mask); + for (int n = 0; n < nprop_atom; n++) + fix_prop_atom[n]->modified(Host, mask); + } avecKK->sync(space, mask); + for (int n = 0; n < nprop_atom; n++) + fix_prop_atom[n]->sync(space, mask); } /* ---------------------------------------------------------------------- */ @@ -137,13 +173,23 @@ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask) { avecKK->modified(space, mask); + for (int n = 0; n < nprop_atom; n++) + fix_prop_atom[n]->modified(space, mask); - if (space == Device && lmp->kokkos->auto_sync) avecKK->sync(Host, mask); + if (space == Device && lmp->kokkos->auto_sync) { + avecKK->sync(Host, mask); + for (int n = 0; n < nprop_atom; n++) + fix_prop_atom[n]->sync(Host, mask); + } } +/* ---------------------------------------------------------------------- */ + void AtomKokkos::sync_overlapping_device(const ExecutionSpace space, unsigned int mask) { avecKK->sync_overlapping_device(space, mask); + for (int n = 0; n < nprop_atom; n++) + fix_prop_atom[n]->sync_overlapping_device(space, mask); } /* ---------------------------------------------------------------------- */ @@ -375,7 +421,7 @@ AtomVec *AtomKokkos::new_avec(const std::string &style, int trysuffix, int &sfla int hybrid_substyle_flag = (avec != nullptr); AtomVec *avec = Atom::new_avec(style, trysuffix, sflag); - if (!avec->kokkosable) error->all(FLERR, "KOKKOS package requires a kokkos enabled atom_style"); + if (!avec->kokkosable) error->all(FLERR, "KOKKOS package requires a Kokkos-enabled atom_style"); if (!hybrid_substyle_flag) avecKK = dynamic_cast(avec); diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index f8b00f21f2..c26a1b291f 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -14,6 +14,7 @@ #include "atom.h" // IWYU pragma: export #include "kokkos_type.h" +#include "fix_property_atom_kokkos.h" #include @@ -25,6 +26,9 @@ namespace LAMMPS_NS { class AtomKokkos : public Atom { public: bool sort_classic; + int nprop_atom; + int* prop_atom; + FixPropertyAtomKokkos** fix_prop_atom; DAT::tdual_tagint_1d k_tag; DAT::tdual_int_1d k_type, k_mask; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index a8ce29f666..c3430b9f6e 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -963,7 +963,6 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) if (mask & UCG_MASK) atomKK->k_uCG.sync(); if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync(); if (mask & DUCHEM_MASK) atomKK->k_duChem.sync(); - if (mask & DVECTOR_MASK) atomKK->k_dvector.sync(); } else { if (mask & X_MASK) atomKK->k_x.sync(); if (mask & V_MASK) atomKK->k_v.sync(); @@ -980,7 +979,6 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask) if (mask & UCG_MASK) atomKK->k_uCG.sync(); if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync(); if (mask & DUCHEM_MASK) atomKK->k_duChem.sync(); - if (mask & DVECTOR_MASK) atomKK->k_dvector.sync(); } } @@ -1019,8 +1017,6 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in perform_async_copy(atomKK->k_uCGnew,space); if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync()) perform_async_copy(atomKK->k_duChem,space); - if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync()) - perform_async_copy(atomKK->k_dvector,space); } else { if ((mask & X_MASK) && atomKK->k_x.need_sync()) perform_async_copy(atomKK->k_x,space); @@ -1052,8 +1048,6 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in perform_async_copy(atomKK->k_uCGnew,space); if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync()) perform_async_copy(atomKK->k_duChem,space); - if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync()) - perform_async_copy(atomKK->k_dvector,space); } } @@ -1077,7 +1071,6 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) if (mask & UCG_MASK) atomKK->k_uCG.modify(); if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify(); if (mask & DUCHEM_MASK) atomKK->k_duChem.modify(); - if (mask & DVECTOR_MASK) atomKK->k_dvector.modify(); } else { if (mask & X_MASK) atomKK->k_x.modify(); if (mask & V_MASK) atomKK->k_v.modify(); @@ -1094,6 +1087,5 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask) if (mask & UCG_MASK) atomKK->k_uCG.modify(); if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify(); if (mask & DUCHEM_MASK) atomKK->k_duChem.modify(); - if (mask & DVECTOR_MASK) atomKK->k_dvector.modify(); } } diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index d3b2578b68..c10ff5b40a 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -139,6 +139,8 @@ class AtomVecKokkos : virtual public AtomVec { DAT::tdual_int_1d k_count; + public: + #ifdef LMP_KOKKOS_GPU template Kokkos::Viewnmax); + kokkosable = 1; + + dvector_flag = 0; + for (int nv = 0; nv < nvalue; nv++) + if (styles[nv] == DVEC) dvector_flag = 1; + + atom_init_flag = 0; } /* ---------------------------------------------------------------------- */ @@ -68,19 +74,20 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) { for (int nv = 0; nv < nvalue; nv++) { if (styles[nv] == MOLECULE) { - atomKK->sync(Device,MOLECULE_MASK); - memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); - size_t nbytes = (nmax-nmax_old) * sizeof(tagint); - atomKK->modified(Device,MOLECULE_MASK); + if (!atom_init_flag) this->modified(Host,MOLECULE_MASK); + else atomKK->sync(Device,MOLECULE_MASK); + memoryKK->grow_kokkos(atomKK->k_molecule,atom->molecule,nmax,"atom:molecule"); + if (atom_init_flag) atomKK->modified(Device,MOLECULE_MASK); } else if (styles[nv] == CHARGE) { - atomKK->sync(Device,Q_MASK); - memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); - size_t nbytes = (nmax-nmax_old) * sizeof(double); - atomKK->modified(Device,Q_MASK); + if (!atom_init_flag) this->modified(Host,Q_MASK); + else atomKK->sync(Device,Q_MASK); + memoryKK->grow_kokkos(atomKK->k_q,atom->q,nmax,"atom:q"); + if (atom_init_flag) atomKK->modified(Device,Q_MASK); } else if (styles[nv] == RMASS) { - atomKK->sync(Device,MOLECULE_MASK); - memoryKK->grow_kokkos(atomKK->k_rmass,atomKK->rmass,nmax,"atom:rmass"); - atomKK->modified(Device,RMASS_MASK); + if (!atom_init_flag) this->modified(Host,RMASS_MASK); + else atomKK->sync(Device,RMASS_MASK); + memoryKK->grow_kokkos(atomKK->k_rmass,atom->rmass,nmax,"atom:rmass"); + if (atom_init_flag) atomKK->modified(Device,RMASS_MASK); } else if (styles[nv] == TEMPERATURE) { memory->grow(atom->temperature, nmax, "atom:temperature"); size_t nbytes = (nmax - nmax_old) * sizeof(double); @@ -94,10 +101,11 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) size_t nbytes = (nmax-nmax_old) * sizeof(int); memset(&atom->ivector[index[nv]][nmax_old],0,nbytes); } else if (styles[nv] == DVEC) { - atomKK->sync(Device,DVECTOR_MASK); - memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax, + if (!atom_init_flag) this->modified(Host,DVECTOR_MASK); + else atomKK->sync(Device,DVECTOR_MASK); + memoryKK->grow_kokkos(atomKK->k_dvector,atom->dvector,atomKK->k_dvector.extent(0),nmax, "atom:dvector"); - atomKK->modified(Device,DVECTOR_MASK); + if (atom_init_flag) atomKK->modified(Device,DVECTOR_MASK); } else if (styles[nv] == IARRAY) { memory->grow(atom->iarray[index[nv]], nmax, cols[nv], "atom:iarray"); size_t nbytes = (size_t) (nmax - nmax_old) * cols[nv] * sizeof(int); @@ -110,3 +118,63 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) } nmax_old = nmax; } + +/* ---------------------------------------------------------------------- */ + +void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + + if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync(); + if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync(); + if (rmass_flag && (mask & RMASS_MASK)) {atomKK->k_rmass.sync();} + if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.sync(); + } else { + if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync(); + if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync(); + if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.sync(); + if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.sync(); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixPropertyAtomKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync()) + atomKK->avecKK->perform_async_copy(atomKK->k_molecule,space); + if ((mask & Q_MASK) && atomKK->k_q.need_sync()) + atomKK->avecKK->perform_async_copy(atomKK->k_q,space); + if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync()) + atomKK->avecKK->perform_async_copy(atomKK->k_rmass,space); + if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync()) + atomKK->avecKK->perform_async_copy(atomKK->k_dvector,space); + } else { + if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync()) + atomKK->avecKK->perform_async_copy(atomKK->k_molecule,space); + if ((mask & Q_MASK) && atomKK->k_q.need_sync()) + atomKK->avecKK->perform_async_copy(atomKK->k_q,space); + if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync()) + atomKK->avecKK->perform_async_copy(atomKK->k_rmass,space); + if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync()) + atomKK->avecKK->perform_async_copy(atomKK->k_dvector,space); + } +} + +/* ---------------------------------------------------------------------- */ + +void FixPropertyAtomKokkos::modified(ExecutionSpace space, unsigned int mask) +{ + if (space == Device) { + if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify(); + if (q_flag && (mask & Q_MASK)) atomKK->k_q.modify(); + if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.modify(); + if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.modify(); + } else { + if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify(); + if (q_flag && (mask & Q_MASK)) atomKK->k_q.modify(); + if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.modify(); + if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.modify(); + } +} diff --git a/src/KOKKOS/fix_property_atom_kokkos.h b/src/KOKKOS/fix_property_atom_kokkos.h index 29b07b1f5b..74a20a28b5 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.h +++ b/src/KOKKOS/fix_property_atom_kokkos.h @@ -22,6 +22,7 @@ FixStyle(property/atom/kk,FixPropertyAtomKokkos); #define LMP_FIX_PROPERTY_ATOM_KOKKOS_H #include "fix_property_atom.h" +#include "atom_vec_kokkos.h" namespace LAMMPS_NS { @@ -31,6 +32,15 @@ class FixPropertyAtomKokkos : public FixPropertyAtom { ~FixPropertyAtomKokkos() override; void grow_arrays(int) override; + + void sync(ExecutionSpace space, unsigned int mask); + void modified(ExecutionSpace space, unsigned int mask); + void sync_overlapping_device(ExecutionSpace space, unsigned int mask); + + int atom_init_flag; + + private: + int dvector_flag; }; } diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index 7d9ecb5d80..1e22a38657 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -41,11 +41,6 @@ class KokkosBase { int, int *) {return 0;}; virtual void unpack_forward_comm_fix_kokkos(int, int, DAT::tdual_xfloat_1d &) {} - - // Region - virtual void match_all_kokkos(int, DAT::tdual_int_1d) {} - - // Fix virtual int pack_exchange_kokkos(const int & /*nsend*/, DAT::tdual_xfloat_2d & /*k_buf*/, DAT::tdual_int_1d /*k_sendlist*/, DAT::tdual_int_1d /*k_copylist*/, @@ -54,6 +49,9 @@ class KokkosBase { DAT::tdual_int_1d & /*indices*/, int /*nrecv*/, ExecutionSpace /*space*/) {} + // Region + virtual void match_all_kokkos(int, DAT::tdual_int_1d) {} + using KeyViewType = DAT::t_x_array; using BinOp = BinOp3DLAMMPS; virtual void diff --git a/src/fix_property_atom.cpp b/src/fix_property_atom.cpp index 994b4f0f19..3a53110839 100644 --- a/src/fix_property_atom.cpp +++ b/src/fix_property_atom.cpp @@ -198,16 +198,24 @@ FixPropertyAtom::FixPropertyAtom(LAMMPS *lmp, int narg, char **arg) : astyle = utils::strdup(atom->atom_style); - // perform initial allocation of atom-based array // register with Atom class - nmax_old = 0; - if (!lmp->kokkos) FixPropertyAtom::grow_arrays(atom->nmax); atom->add_callback(Atom::GROW); atom->add_callback(Atom::RESTART); if (border) atom->add_callback(Atom::BORDER); } + +/* ---------------------------------------------------------------------- */ + +void FixPropertyAtom::post_constructor() +{ + // perform initial allocation of atom-based array + + nmax_old = 0; + grow_arrays(atom->nmax); +} + /* ---------------------------------------------------------------------- */ FixPropertyAtom::~FixPropertyAtom() diff --git a/src/fix_property_atom.h b/src/fix_property_atom.h index 92497d6188..c50b6049dc 100644 --- a/src/fix_property_atom.h +++ b/src/fix_property_atom.h @@ -27,6 +27,7 @@ namespace LAMMPS_NS { class FixPropertyAtom : public Fix { public: FixPropertyAtom(class LAMMPS *, int, char **); + void post_constructor() override; ~FixPropertyAtom() override; int setmask() override; void init() override; From 2b1985f755fb5019f03d4e76b6174808e7f543c1 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 10 Oct 2023 11:36:49 -0400 Subject: [PATCH 066/107] restore GNU make build capability for ML-PACE --- lib/pace/Makefile | 6 +++--- lib/pace/Makefile.lammps | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/pace/Makefile b/lib/pace/Makefile index 5a1588ef93..37f00ee70e 100644 --- a/lib/pace/Makefile +++ b/lib/pace/Makefile @@ -1,4 +1,5 @@ SHELL = /bin/sh +CMAKE= cmake # ------ FILES ------ @@ -35,7 +36,7 @@ lib: $(OBJ) lib-yaml-cpp lib-yaml-cpp: - cd $(YAML_CPP_PATH) && $(MAKE) lib + cd $(YAML_CPP_PATH) && $(CMAKE) . && $(MAKE) all # ------ COMPILE RULES ------ @@ -45,9 +46,8 @@ lib-yaml-cpp: # ------ CLEAN ------ clean-all: -rm -f *~ $(OBJ) $(LIB) - cd $(YAML_CPP_PATH) && $(MAKE) clean-all + cd $(YAML_CPP_PATH) && $(MAKE) clean clean-build: -rm -f *~ $(OBJ) - cd $(YAML_CPP_PATH) && $(MAKE) clean-build diff --git a/lib/pace/Makefile.lammps b/lib/pace/Makefile.lammps index 6411e49a07..1c549e31d0 100644 --- a/lib/pace/Makefile.lammps +++ b/lib/pace/Makefile.lammps @@ -1,3 +1,3 @@ pace_SYSINC =-I../../lib/pace/src/ML-PACE -I../../lib/pace/src/yaml-cpp/include -I../../lib/pace/src/wigner-cpp/include -DEXTRA_C_PROJECTIONS -pace_SYSLIB = -L../../lib/pace/ -lpace -L../../lib/pace/src/yaml-cpp/ -lyaml-cpp +pace_SYSLIB = -L../../lib/pace/ -lpace -L../../lib/pace/src/yaml-cpp/ -lyaml-cpp-pace pace_SYSPATH = From 07a03336702ff90501b2158b1ab14d6f1f2391fe Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 10 Oct 2023 13:53:56 -0400 Subject: [PATCH 067/107] prevent potential memory leak --- src/compute_msd_chunk.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/compute_msd_chunk.cpp b/src/compute_msd_chunk.cpp index 07234ecfdb..6e7436d6ad 100644 --- a/src/compute_msd_chunk.cpp +++ b/src/compute_msd_chunk.cpp @@ -27,8 +27,8 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ ComputeMSDChunk::ComputeMSDChunk(LAMMPS *lmp, int narg, char **arg) : - ComputeChunk(lmp, narg, arg), id_fix(nullptr), massproc(nullptr), masstotal(nullptr), - com(nullptr), comall(nullptr), msd(nullptr) + ComputeChunk(lmp, narg, arg), id_fix(nullptr), fix(nullptr), massproc(nullptr), + masstotal(nullptr), com(nullptr), comall(nullptr), msd(nullptr) { if (narg != 4) error->all(FLERR, "Illegal compute msd/chunk command"); @@ -196,6 +196,12 @@ void ComputeMSDChunk::compute_array() void ComputeMSDChunk::allocate() { ComputeChunk::allocate(); + memory->destroy(massproc); + memory->destroy(masstotal); + memory->destroy(com); + memory->destroy(comall); + memory->destroy(msd); + memory->create(massproc, nchunk, "msd/chunk:massproc"); memory->create(masstotal, nchunk, "msd/chunk:masstotal"); memory->create(com, nchunk, 3, "msd/chunk:com"); From 84eab70fb4252eb691d52000dae1ce2280ffb2ea Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Tue, 10 Oct 2023 22:18:41 -0400 Subject: [PATCH 068/107] must select entire non-blank string under cursor for completion insertions --- tools/lammps-gui/codeeditor.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tools/lammps-gui/codeeditor.cpp b/tools/lammps-gui/codeeditor.cpp index 34193bc320..7a1624c192 100644 --- a/tools/lammps-gui/codeeditor.cpp +++ b/tools/lammps-gui/codeeditor.cpp @@ -564,7 +564,7 @@ void CodeEditor::keyPressEvent(QKeyEvent *event) // process key event in parent class QPlainTextEdit::keyPressEvent(event); - // if enabled, try pop up completion automatically after 3 characters + // if enabled, try pop up completion automatically after 2 characters if (automatic_completion) { auto cursor = textCursor(); auto line = cursor.block().text(); @@ -990,8 +990,26 @@ void CodeEditor::insertCompletedCommand(const QString &completion) { auto *completer = qobject_cast(sender()); if (completer->widget() != this) return; + + // select the entire word (non-space text) under the cursor + // we need to do it in this compicated way, since QTextCursor does not recognize + // special characters as part of a word. auto cursor = textCursor(); - cursor.movePosition(QTextCursor::StartOfWord, QTextCursor::KeepAnchor); + auto line = cursor.block().text(); + int begin = cursor.positionInBlock(); + do { + if (line[begin].isSpace()) break; + --begin; + } while (begin >= 0); + + int end = begin + 1; + while (end < line.length()) { + if (line[end].isSpace()) break; + ++end; + } + + cursor.setPosition(cursor.position() - cursor.positionInBlock() + begin + 1); + cursor.movePosition(QTextCursor::NextCharacter, QTextCursor::KeepAnchor, end - begin); cursor.insertText(completion); setTextCursor(cursor); } From 86a5d6bb151c196bb1843d947d901fe228625669 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 11 Oct 2023 02:05:23 -0400 Subject: [PATCH 069/107] must re-initialized threads also for neigbor lists --- src/OPENMP/npair_omp.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/OPENMP/npair_omp.h b/src/OPENMP/npair_omp.h index 318fddfd54..7249c59406 100644 --- a/src/OPENMP/npair_omp.h +++ b/src/OPENMP/npair_omp.h @@ -32,6 +32,7 @@ namespace LAMMPS_NS { // get access to number of threads and per-thread data structures via FixOMP #define NPAIR_OMP_INIT \ const int nthreads = comm->nthreads; \ + omp_set_num_threads(nthreads); \ const int ifix = modify->find_fix("package_omp") // get thread id and then assign each thread a fixed chunk of atoms From cc5c5bbd498553dc529f94516401a24979b65a79 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 11 Oct 2023 02:12:03 -0400 Subject: [PATCH 070/107] block waiting for end of stdout data not more than a second --- tools/lammps-gui/stdcapture.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lammps-gui/stdcapture.cpp b/tools/lammps-gui/stdcapture.cpp index 428277cc10..b09aebf053 100644 --- a/tools/lammps-gui/stdcapture.cpp +++ b/tools/lammps-gui/stdcapture.cpp @@ -77,6 +77,7 @@ bool StdCapture::EndCapture() int bytesRead; bool fd_blocked; + int maxwait = 100; do { bytesRead = 0; @@ -93,9 +94,10 @@ bool StdCapture::EndCapture() buf[bytesRead] = 0; m_captured += buf; } else if (bytesRead < 0) { - fd_blocked = ((errno == EAGAIN) || (errno == EWOULDBLOCK) || (errno == EINTR)); + fd_blocked = ((errno == EAGAIN) || (errno == EWOULDBLOCK) || (errno == EINTR)) && (maxwait > 0); if (fd_blocked) std::this_thread::sleep_for(std::chrono::milliseconds(10)); + --maxwait; } } while (fd_blocked || (bytesRead == (bufSize - 1))); m_capturing = false; From eff74405c1382b4d9b20734e62134333bcf5f88d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 11 Oct 2023 02:16:23 -0400 Subject: [PATCH 071/107] don't replace end of line character from completion --- tools/lammps-gui/codeeditor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lammps-gui/codeeditor.cpp b/tools/lammps-gui/codeeditor.cpp index 7a1624c192..b7a14871b7 100644 --- a/tools/lammps-gui/codeeditor.cpp +++ b/tools/lammps-gui/codeeditor.cpp @@ -1009,7 +1009,7 @@ void CodeEditor::insertCompletedCommand(const QString &completion) } cursor.setPosition(cursor.position() - cursor.positionInBlock() + begin + 1); - cursor.movePosition(QTextCursor::NextCharacter, QTextCursor::KeepAnchor, end - begin); + cursor.movePosition(QTextCursor::NextCharacter, QTextCursor::KeepAnchor, end - begin - 1); cursor.insertText(completion); setTextCursor(cursor); } From bb7e8c66edfedccb7f192d06a6a1595f2c931b13 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 11 Oct 2023 08:26:50 -0400 Subject: [PATCH 072/107] clarify --- doc/src/pair_reaxff.rst | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/src/pair_reaxff.rst b/doc/src/pair_reaxff.rst index 4dac9baf85..067eb3afc3 100644 --- a/doc/src/pair_reaxff.rst +++ b/doc/src/pair_reaxff.rst @@ -43,22 +43,22 @@ Examples Description """"""""""" -Style *reaxff* computes the ReaxFF potential of van Duin, Goddard and -co-workers. ReaxFF uses distance-dependent bond-order functions to +Pair style *reaxff* computes the ReaxFF potential of van Duin, Goddard +and co-workers. ReaxFF uses distance-dependent bond-order functions to represent the contributions of chemical bonding to the potential -energy. There is more than one version of ReaxFF. The version +energy. There is more than one version of ReaxFF. The version implemented in LAMMPS uses the functional forms documented in the supplemental information of the following paper: -:ref:`(Chenoweth et al., 2008) `. The version integrated -into LAMMPS matches the version of ReaxFF From Summer 2010. For more -technical details about the pair reaxff implementation of ReaxFF, see -the :ref:`(Aktulga) ` paper. The *reaxff* style was initially -implemented as a stand-alone C code and is now converted to C++ and -integrated into LAMMPS as a package. +:ref:`(Chenoweth et al., 2008) ` and matches the +version of the reference ReaxFF implementation from Summer 2010. For +more technical details about the implementation of ReaxFF in pair style +*reaxff*, see the :ref:`(Aktulga) ` paper. The *reaxff* style +was initially implemented as a stand-alone C code and is now converted +to C++ and integrated into LAMMPS as a package. The *reaxff/kk* style is a Kokkos version of the ReaxFF potential that -is derived from the *reaxff* style. The Kokkos version can run on GPUs -and can also use OpenMP multithreading. For more information about the +is derived from the *reaxff* style. The Kokkos version can run on GPUs +and can also use OpenMP multithreading. For more information about the Kokkos package, see :doc:`Packages details ` and :doc:`Speed kokkos ` doc pages. One important consideration when using the *reaxff/kk* style is the choice of either From 45172d33a9c2296111e341260912b5cb380e5aac Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 11 Oct 2023 09:20:05 -0400 Subject: [PATCH 073/107] use potential file name distributed with LAMMPS in examples --- doc/src/pair_ilp_tmd.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/src/pair_ilp_tmd.rst b/doc/src/pair_ilp_tmd.rst index 482d75a100..70a4768389 100644 --- a/doc/src/pair_ilp_tmd.rst +++ b/doc/src/pair_ilp_tmd.rst @@ -22,12 +22,12 @@ Examples .. code-block:: LAMMPS pair_style hybrid/overlay ilp/tmd 16.0 1 - pair_coeff * * ilp/tmd TMD.ILP Mo S S + pair_coeff * * ilp/tmd MoS2.ILP Mo S S pair_style hybrid/overlay sw/mod sw/mod ilp/tmd 16.0 pair_coeff * * sw/mod 1 tmd.sw.mod Mo S S NULL NULL NULL pair_coeff * * sw/mod 2 tmd.sw.mod NULL NULL NULL Mo S S - pair_coeff * * ilp/tmd TMD.ILP Mo S S Mo S S + pair_coeff * * ilp/tmd MoS2.ILP Mo S S Mo S S Description """"""""""" @@ -69,7 +69,7 @@ calculating the normals. each atom `i`, its six nearest neighboring atoms belonging to the same sub-layer are chosen to define the normal vector `{\bf n}_i`. -The parameter file (e.g. TMD.ILP), is intended for use with *metal* +The parameter file (e.g. MoS2.ILP), is intended for use with *metal* :doc:`units `, with energies in meV. Two additional parameters, *S*, and *rcut* are included in the parameter file. *S* is designed to facilitate scaling of energies. *rcut* is designed to build the neighbor @@ -77,7 +77,7 @@ list for calculating the normals for each atom pair. .. note:: - The parameters presented in the parameter file (e.g. TMD.ILP), + The parameters presented in the parameter file (e.g. MoS2.ILP), are fitted with taper function by setting the cutoff equal to 16.0 Angstrom. Using different cutoff or taper function should be careful. These parameters provide a good description in both short- and long-range @@ -133,10 +133,10 @@ if LAMMPS was built with that package. See the :doc:`Build package This pair style requires the newton setting to be *on* for pair interactions. -The TMD.ILP potential file provided with LAMMPS (see the potentials +The MoS2.ILP potential file provided with LAMMPS (see the potentials directory) are parameterized for *metal* units. You can use this potential with any LAMMPS units, but you would need to create your own -custom TMD.ILP potential file with coefficients listed in the appropriate +custom MoS2.ILP potential file with coefficients listed in the appropriate units, if your simulation does not use *metal* units. Related commands From fbdd61e28c814ba21a1c2e774386d1f469fc0981 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 11 Oct 2023 18:15:13 -0400 Subject: [PATCH 074/107] enable spacefilling sphere option also for systems with per-atom radius --- tools/lammps-gui/imageviewer.cpp | 14 +++++++++----- tools/lammps-gui/imageviewer.h | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tools/lammps-gui/imageviewer.cpp b/tools/lammps-gui/imageviewer.cpp index 86be0b66df..ee81f61253 100644 --- a/tools/lammps-gui/imageviewer.cpp +++ b/tools/lammps-gui/imageviewer.cpp @@ -131,7 +131,7 @@ static const QString blank(" "); ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidget *parent) : QDialog(parent), menuBar(new QMenuBar), imageLabel(new QLabel), scrollArea(new QScrollArea), - lammps(_lammps), group("all"), filename(fileName), useelements(false) + lammps(_lammps), group("all"), filename(fileName), useelements(false), usediameter(false) { imageLabel->setBackgroundRole(QPalette::Base); imageLabel->setSizePolicy(QSizePolicy::Ignored, QSizePolicy::Ignored); @@ -268,7 +268,7 @@ ImageViewer::ImageViewer(const QString &fileName, LammpsWrapper *_lammps, QWidge // properties directly since lookup in reset_view() will have failed dobox->setChecked(showbox); dovdw->setChecked(vdwfactor > 1.0); - dovdw->setEnabled(useelements); + dovdw->setEnabled(useelements || usediameter); doaxes->setChecked(showaxes); dossao->setChecked(usessao); doanti->setChecked(antialias); @@ -435,7 +435,7 @@ void ImageViewer::createImage() dumpcmd += "'" + dumpfile.fileName() + "'"; settings.beginGroup("snapshot"); - int hhrot = (hrot > 180) ? 360 - hrot : hrot; + int hhrot = (hrot > 180) ? 360 - hrot : hrot; // determine elements from masses and set their covalent radii int ntypes = lammps->extract_setting("ntypes"); @@ -454,9 +454,10 @@ void ImageViewer::createImage() adiams += QString("adiam %1 %2 ").arg(i).arg(vdwfactor * pte_vdw_radius[idx]); } } + usediameter = lammps->extract_setting("radius_flag") != 0; // adjust pushbutton state and clear adiams string to disable VDW display, if needed - if (useelements) { + if (useelements || usediameter) { auto *button = findChild("vdw"); if (button) button->setEnabled(true); } else { @@ -469,7 +470,10 @@ void ImageViewer::createImage() dumpcmd += blank + "element"; else dumpcmd += blank + settings.value("color", "type").toString(); - dumpcmd += blank + settings.value("diameter", "type").toString(); + if (usediameter && (vdwfactor > 1.0)) + dumpcmd += blank + "diameter"; + else + dumpcmd += blank + settings.value("diameter", "type").toString(); dumpcmd += QString(" size %1 %2").arg(xsize).arg(ysize); dumpcmd += QString(" zoom %1").arg(zoom); dumpcmd += " shiny 0.5 "; diff --git a/tools/lammps-gui/imageviewer.h b/tools/lammps-gui/imageviewer.h index 8946c6cc8b..1be7790666 100644 --- a/tools/lammps-gui/imageviewer.h +++ b/tools/lammps-gui/imageviewer.h @@ -88,7 +88,7 @@ private: int xsize, ysize; int hrot, vrot; double zoom, vdwfactor; - bool showbox, showaxes, antialias, usessao, useelements; + bool showbox, showaxes, antialias, usessao, useelements, usediameter; }; #endif From 9b108a631bb4b7361855b755ce6accfd657bbbf8 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 11 Oct 2023 16:36:15 -0600 Subject: [PATCH 075/107] Do not zero forces in pair if using hybrid/overlay --- src/KOKKOS/pair_buck_coul_cut_kokkos.h | 13 +-- src/KOKKOS/pair_buck_coul_long_kokkos.h | 46 ++++++----- src/KOKKOS/pair_buck_kokkos.h | 15 ++-- src/KOKKOS/pair_coul_cut_kokkos.h | 13 +-- src/KOKKOS/pair_coul_debye_kokkos.h | 13 +-- src/KOKKOS/pair_coul_long_kokkos.h | 46 ++++++----- src/KOKKOS/pair_kokkos.h | 81 +++++++++++-------- ...ir_lj_charmm_coul_charmm_implicit_kokkos.h | 46 ++++++----- .../pair_lj_charmm_coul_charmm_kokkos.h | 46 ++++++----- src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h | 46 ++++++----- src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h | 13 +-- src/KOKKOS/pair_lj_class2_coul_long_kokkos.h | 46 ++++++----- src/KOKKOS/pair_lj_class2_kokkos.h | 15 ++-- src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h | 13 +-- src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h | 13 +-- src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h | 13 +-- src/KOKKOS/pair_lj_cut_coul_long_kokkos.h | 46 ++++++----- src/KOKKOS/pair_lj_cut_kokkos.h | 15 ++-- src/KOKKOS/pair_lj_expand_coul_long_kokkos.h | 46 ++++++----- src/KOKKOS/pair_lj_expand_kokkos.h | 15 ++-- .../pair_lj_gromacs_coul_gromacs_kokkos.h | 46 ++++++----- src/KOKKOS/pair_lj_gromacs_kokkos.h | 46 ++++++----- src/KOKKOS/pair_lj_spica_kokkos.h | 15 ++-- src/KOKKOS/pair_morse_kokkos.h | 15 ++-- src/KOKKOS/pair_table_kokkos.cpp | 12 +-- src/KOKKOS/pair_table_kokkos.h | 51 ++++++------ src/KOKKOS/pair_yukawa_kokkos.h | 19 +++-- src/KOKKOS/pair_zbl_kokkos.h | 15 ++-- 28 files changed, 465 insertions(+), 354 deletions(-) diff --git a/src/KOKKOS/pair_buck_coul_cut_kokkos.h b/src/KOKKOS/pair_buck_coul_cut_kokkos.h index b91348d557..9b6cc31898 100644 --- a/src/KOKKOS/pair_buck_coul_cut_kokkos.h +++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.h @@ -112,15 +112,18 @@ class PairBuckCoulCutKokkos : public PairBuckCoulCut { void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairBuckCoulCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairBuckCoulCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairBuckCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairBuckCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairBuckCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairBuckCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairBuckCoulCutKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairBuckCoulCutKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairBuckCoulCutKokkos*); diff --git a/src/KOKKOS/pair_buck_coul_long_kokkos.h b/src/KOKKOS/pair_buck_coul_long_kokkos.h index b776a84e3c..bed9b0d0f8 100644 --- a/src/KOKKOS/pair_buck_coul_long_kokkos.h +++ b/src/KOKKOS/pair_buck_coul_long_kokkos.h @@ -115,27 +115,33 @@ class PairBuckCoulLongKokkos : public PairBuckCoulLong { void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairBuckCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairBuckCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairBuckCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairBuckCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairBuckCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairBuckCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairBuckCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairBuckCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairBuckCoulLongKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairBuckCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairBuckCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairBuckCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairBuckCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairBuckCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairBuckCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairBuckCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairBuckCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairBuckCoulLongKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairBuckCoulLongKokkos*); diff --git a/src/KOKKOS/pair_buck_kokkos.h b/src/KOKKOS/pair_buck_kokkos.h index 364716453b..15325cd56a 100644 --- a/src/KOKKOS/pair_buck_kokkos.h +++ b/src/KOKKOS/pair_buck_kokkos.h @@ -91,16 +91,19 @@ class PairBuckKokkos : public PairBuck { int nlocal,nall,eflag,vflag; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute(PairBuckKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairBuckKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairBuckKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairBuckKokkos*); }; diff --git a/src/KOKKOS/pair_coul_cut_kokkos.h b/src/KOKKOS/pair_coul_cut_kokkos.h index 6626889660..3e0501edd9 100644 --- a/src/KOKKOS/pair_coul_cut_kokkos.h +++ b/src/KOKKOS/pair_coul_cut_kokkos.h @@ -112,15 +112,18 @@ class PairCoulCutKokkos : public PairCoulCut { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairCoulCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairCoulCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairCoulCutKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairCoulCutKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairCoulCutKokkos*); diff --git a/src/KOKKOS/pair_coul_debye_kokkos.h b/src/KOKKOS/pair_coul_debye_kokkos.h index b6bed9d557..d239291a25 100644 --- a/src/KOKKOS/pair_coul_debye_kokkos.h +++ b/src/KOKKOS/pair_coul_debye_kokkos.h @@ -112,15 +112,18 @@ class PairCoulDebyeKokkos : public PairCoulDebye { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairCoulDebyeKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairCoulDebyeKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairCoulDebyeKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairCoulDebyeKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairCoulDebyeKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairCoulDebyeKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairCoulDebyeKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairCoulDebyeKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairCoulDebyeKokkos*); diff --git a/src/KOKKOS/pair_coul_long_kokkos.h b/src/KOKKOS/pair_coul_long_kokkos.h index fcb1402028..232cdbb6df 100644 --- a/src/KOKKOS/pair_coul_long_kokkos.h +++ b/src/KOKKOS/pair_coul_long_kokkos.h @@ -114,27 +114,33 @@ class PairCoulLongKokkos : public PairCoulLong { void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairCoulLongKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairCoulLongKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairCoulLongKokkos*); diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index 2c2a622791..d3c766f5ae 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -50,7 +50,7 @@ struct DoCoul<1> { //Specialisation for Neighborlist types Half, HalfThread, Full -template +template struct PairComputeFunctor { typedef typename PairStyle::device_type device_type ; typedef ArrayTypes AT; @@ -137,7 +137,7 @@ struct PairComputeFunctor { F_FLOAT fytmp = 0.0; F_FLOAT fztmp = 0.0; - if (NEIGHFLAG == FULL) { + if (NEIGHFLAG == FULL && ZEROFLAG) { f(i,0) = 0.0; f(i,1) = 0.0; f(i,2) = 0.0; @@ -211,7 +211,7 @@ struct PairComputeFunctor { F_FLOAT fytmp = 0.0; F_FLOAT fztmp = 0.0; - if (NEIGHFLAG == FULL) { + if (NEIGHFLAG == FULL && ZEROFLAG) { f(i,0) = 0.0; f(i,1) = 0.0; f(i,2) = 0.0; @@ -292,11 +292,13 @@ struct PairComputeFunctor { const X_FLOAT ztmp = c.x(i,2); const int itype = c.type(i); - Kokkos::single(Kokkos::PerThread(team), [&] (){ - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; - }); + if (ZEROFLAG) { + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; + }); + } const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; @@ -355,11 +357,13 @@ struct PairComputeFunctor { const int itype = c.type(i); const F_FLOAT qtmp = c.q(i); - Kokkos::single(Kokkos::PerThread(team), [&] (){ - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; - }); + if (ZEROFLAG) { + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; + }); + } const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; @@ -423,11 +427,13 @@ struct PairComputeFunctor { const X_FLOAT ztmp = c.x(i,2); const int itype = c.type(i); - Kokkos::single(Kokkos::PerThread(team), [&] (){ - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; - }); + if (ZEROFLAG) { + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; + }); + } const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; @@ -525,11 +531,13 @@ struct PairComputeFunctor { const int itype = c.type(i); const F_FLOAT qtmp = c.q(i); - Kokkos::single(Kokkos::PerThread(team), [&] (){ - f(i,0) = 0.0; - f(i,1) = 0.0; - f(i,2) = 0.0; - }); + if (ZEROFLAG) { + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) = 0.0; + f(i,1) = 0.0; + f(i,2) = 0.0; + }); + } const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); const int jnum = list.d_numneigh[i]; @@ -740,7 +748,7 @@ struct PairComputeFunctor { // By having the enable_if with a ! and without it, exactly one of the functions // pair_compute_neighlist will match - either the dummy version // or the real one further below. -template +template EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t*> list) { EV_FLOAT ev; (void) fpair; @@ -770,7 +778,7 @@ int GetTeamSize(FunctorStyle& KOKKOS_GPU_ARG(functor), int KOKKOS_GPU_ARG(inum), } // Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL -template +template EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*> list) { EV_FLOAT ev; @@ -784,13 +792,13 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P int atoms_per_team = 32; if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { - PairComputeFunctor ff(fpair,list); + PairComputeFunctor ff(fpair,list); atoms_per_team = GetTeamSize(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length); Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); else Kokkos::parallel_for(policy,ff); } else { - PairComputeFunctor ff(fpair,list); + PairComputeFunctor ff(fpair,list); atoms_per_team = GetTeamSize(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length); Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); @@ -798,12 +806,12 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P } } else { if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { - PairComputeFunctor ff(fpair,list); + PairComputeFunctor ff(fpair,list); if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); else Kokkos::parallel_for(list->inum,ff); ff.contribute(); } else { - PairComputeFunctor ff(fpair,list); + PairComputeFunctor ff(fpair,list); if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); else Kokkos::parallel_for(list->inum,ff); ff.contribute(); @@ -812,16 +820,21 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P return ev; } -template +template EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos* list) { EV_FLOAT ev; if (fpair->neighflag == FULL) { - fpair->fuse_force_clear_flag = 1; - ev = pair_compute_neighlist (fpair,list); + if (utils::strmatch(fpair->lmp->force->pair_style,"^hybrid/overlay")) { + fpair->fuse_force_clear_flag = 0; + ev = pair_compute_neighlist (fpair,list); + } else { + fpair->fuse_force_clear_flag = 1; + ev = pair_compute_neighlist (fpair,list); + } } else if (fpair->neighflag == HALFTHREAD) { - ev = pair_compute_neighlist (fpair,list); + ev = pair_compute_neighlist (fpair,list); } else if (fpair->neighflag == HALF) { - ev = pair_compute_neighlist (fpair,list); + ev = pair_compute_neighlist (fpair,list); } return ev; } diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h index ae27ee68ab..7e21676fd5 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.h @@ -110,27 +110,33 @@ class PairLJCharmmCoulCharmmImplicitKokkos : public PairLJCharmmCoulCharmmImplic void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJCharmmCoulCharmmImplicitKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmCoulCharmmImplicitKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJCharmmCoulCharmmImplicitKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmCoulCharmmImplicitKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCharmmCoulCharmmImplicitKokkos*); diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h index 912ad573c6..1f26242ded 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.h @@ -108,27 +108,33 @@ class PairLJCharmmCoulCharmmKokkos : public PairLJCharmmCoulCharmm { void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJCharmmCoulCharmmKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmCoulCharmmKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJCharmmCoulCharmmKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmCoulCharmmKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCharmmCoulCharmmKokkos*); diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h index 4ae8a12944..c6c80e76dc 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.h @@ -106,27 +106,33 @@ class PairLJCharmmCoulLongKokkos : public PairLJCharmmCoulLong { void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJCharmmCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmCoulLongKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJCharmmCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCharmmCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCharmmCoulLongKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCharmmCoulLongKokkos*); diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h index 5ca276c28e..9399345458 100644 --- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h +++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.h @@ -104,15 +104,18 @@ class PairLJClass2CoulCutKokkos : public PairLJClass2CoulCut { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairLJClass2CoulCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJClass2CoulCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJClass2CoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJClass2CoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJClass2CoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJClass2CoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJClass2CoulCutKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairLJClass2CoulCutKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJClass2CoulCutKokkos*); diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h index 599cc2a83c..1cf6590855 100644 --- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.h @@ -107,27 +107,33 @@ class PairLJClass2CoulLongKokkos : public PairLJClass2CoulLong { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJClass2CoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJClass2CoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJClass2CoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJClass2CoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJClass2CoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJClass2CoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJClass2CoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJClass2CoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJClass2CoulLongKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJClass2CoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJClass2CoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJClass2CoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJClass2CoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJClass2CoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJClass2CoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJClass2CoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJClass2CoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJClass2CoulLongKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJClass2CoulLongKokkos*); diff --git a/src/KOKKOS/pair_lj_class2_kokkos.h b/src/KOKKOS/pair_lj_class2_kokkos.h index 0936399ca8..5594680929 100644 --- a/src/KOKKOS/pair_lj_class2_kokkos.h +++ b/src/KOKKOS/pair_lj_class2_kokkos.h @@ -96,16 +96,19 @@ class PairLJClass2Kokkos : public PairLJClass2 { int nlocal,nall,eflag,vflag; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute(PairLJClass2Kokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJClass2Kokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairLJClass2Kokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJClass2Kokkos*); }; diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h index 87464b37dc..affc67bf16 100644 --- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.h @@ -104,15 +104,18 @@ class PairLJCutCoulCutKokkos : public PairLJCutCoulCut { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulCutKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairLJCutCoulCutKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCutCoulCutKokkos*); diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h index ea0b401959..eeed483b76 100644 --- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.h @@ -104,15 +104,18 @@ class PairLJCutCoulDebyeKokkos : public PairLJCutCoulDebye { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDebyeKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDebyeKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDebyeKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDebyeKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDebyeKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDebyeKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDebyeKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairLJCutCoulDebyeKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCutCoulDebyeKokkos*); diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h index e420bd22a9..d9e5fcfe49 100644 --- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.h @@ -101,15 +101,18 @@ class PairLJCutCoulDSFKokkos : public PairLJCutCoulDSF { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDSFKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDSFKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDSFKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDSFKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDSFKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDSFKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutCoulDSFKokkos*,NeighListKokkos*); friend EV_FLOAT pair_compute(PairLJCutCoulDSFKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCutCoulDSFKokkos*); diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h index bcb97a59cd..ec6e2db176 100644 --- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.h @@ -107,27 +107,33 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJCutCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCutCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCutCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJCutCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCutCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCutCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCutCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCutCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCutCoulLongKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJCutCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCutCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJCutCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJCutCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJCutCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCutCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCutCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJCutCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJCutCoulLongKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCutCoulLongKokkos*); diff --git a/src/KOKKOS/pair_lj_cut_kokkos.h b/src/KOKKOS/pair_lj_cut_kokkos.h index 106f1a9048..b44c1aa6fe 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.h +++ b/src/KOKKOS/pair_lj_cut_kokkos.h @@ -92,16 +92,19 @@ class PairLJCutKokkos : public PairLJCut { int nlocal,nall,eflag,vflag; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute(PairLJCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJCutKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairLJCutKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJCutKokkos*); }; diff --git a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h index 09a694a122..30e82b7dab 100644 --- a/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h +++ b/src/KOKKOS/pair_lj_expand_coul_long_kokkos.h @@ -116,27 +116,33 @@ class PairLJExpandCoulLongKokkos : public PairLJExpandCoulLong { double qqrd2e; void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJExpandCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJExpandCoulLongKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJExpandCoulLongKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJExpandCoulLongKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJExpandCoulLongKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJExpandCoulLongKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJExpandCoulLongKokkos*); }; diff --git a/src/KOKKOS/pair_lj_expand_kokkos.h b/src/KOKKOS/pair_lj_expand_kokkos.h index 0df0a6f8f8..64fe7d8b8e 100644 --- a/src/KOKKOS/pair_lj_expand_kokkos.h +++ b/src/KOKKOS/pair_lj_expand_kokkos.h @@ -97,16 +97,19 @@ class PairLJExpandKokkos : public PairLJExpand { int nlocal,nall,eflag,vflag; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute(PairLJExpandKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJExpandKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairLJExpandKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJExpandKokkos*); }; diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h index 359c4a1229..020b621e33 100644 --- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h +++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.h @@ -115,27 +115,33 @@ class PairLJGromacsCoulGromacsKokkos : public PairLJGromacsCoulGromacs { void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJGromacsCoulGromacsKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJGromacsCoulGromacsKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJGromacsCoulGromacsKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsCoulGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJGromacsCoulGromacsKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJGromacsCoulGromacsKokkos*); diff --git a/src/KOKKOS/pair_lj_gromacs_kokkos.h b/src/KOKKOS/pair_lj_gromacs_kokkos.h index 95c600a415..ad41ca5120 100644 --- a/src/KOKKOS/pair_lj_gromacs_kokkos.h +++ b/src/KOKKOS/pair_lj_gromacs_kokkos.h @@ -115,27 +115,33 @@ class PairLJGromacsKokkos : public PairLJGromacs { void allocate() override; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJGromacsKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJGromacsKokkos*, NeighListKokkos*); - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist >(PairLJGromacsKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute >(PairLJGromacsKokkos*, + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend struct PairComputeFunctor>; + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist>(PairLJGromacsKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute>(PairLJGromacsKokkos*, NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJGromacsKokkos*); diff --git a/src/KOKKOS/pair_lj_spica_kokkos.h b/src/KOKKOS/pair_lj_spica_kokkos.h index b330af4bfd..06c70ebd3e 100644 --- a/src/KOKKOS/pair_lj_spica_kokkos.h +++ b/src/KOKKOS/pair_lj_spica_kokkos.h @@ -97,16 +97,19 @@ class PairLJSPICAKokkos : public PairLJSPICA { int nlocal,nall,eflag,vflag; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairLJSPICAKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJSPICAKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairLJSPICAKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute(PairLJSPICAKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJSPICAKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJSPICAKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJSPICAKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairLJSPICAKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairLJSPICAKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairLJSPICAKokkos*); }; diff --git a/src/KOKKOS/pair_morse_kokkos.h b/src/KOKKOS/pair_morse_kokkos.h index d06cf2deb1..ccf27b018b 100644 --- a/src/KOKKOS/pair_morse_kokkos.h +++ b/src/KOKKOS/pair_morse_kokkos.h @@ -92,16 +92,19 @@ class PairMorseKokkos : public PairMorse { int nlocal,nall,eflag,vflag; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairMorseKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairMorseKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairMorseKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute(PairMorseKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairMorseKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairMorseKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairMorseKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairMorseKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairMorseKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairMorseKokkos*); }; diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index 83bd74d4af..99d01be4a5 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -133,19 +133,19 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) EV_FLOAT ev; if (atom->ntypes > MAX_TYPES_STACKPARAMS) { if (neighflag == FULL) { - PairComputeFunctor,FULL,false,S_TableCompute > + PairComputeFunctor,FULL,false,0,S_TableCompute > ff(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); else Kokkos::parallel_for(list->inum,ff); ff.contribute(); } else if (neighflag == HALFTHREAD) { - PairComputeFunctor,HALFTHREAD,false,S_TableCompute > + PairComputeFunctor,HALFTHREAD,false,0,S_TableCompute > ff(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,ff,ev); else Kokkos::parallel_for(list->inum,ff); ff.contribute(); } else if (neighflag == HALF) { - PairComputeFunctor,HALF,false,S_TableCompute > + PairComputeFunctor,HALF,false,0,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); @@ -153,19 +153,19 @@ void PairTableKokkos::compute_style(int eflag_in, int vflag_in) } } else { if (neighflag == FULL) { - PairComputeFunctor,FULL,true,S_TableCompute > + PairComputeFunctor,FULL,true,0,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); f.contribute(); } else if (neighflag == HALFTHREAD) { - PairComputeFunctor,HALFTHREAD,true,S_TableCompute > + PairComputeFunctor,HALFTHREAD,true,0,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); f.contribute(); } else if (neighflag == HALF) { - PairComputeFunctor,HALF,true,S_TableCompute > + PairComputeFunctor,HALF,true,0,S_TableCompute > f(this,(NeighListKokkos*) list); if (eflag || vflag) Kokkos::parallel_reduce(list->inum,f,ev); else Kokkos::parallel_for(list->inum,f); diff --git a/src/KOKKOS/pair_table_kokkos.h b/src/KOKKOS/pair_table_kokkos.h index 80226d3770..18112e4c18 100644 --- a/src/KOKKOS/pair_table_kokkos.h +++ b/src/KOKKOS/pair_table_kokkos.h @@ -35,9 +35,6 @@ struct S_TableCompute { static constexpr int TabStyle = TABSTYLE; }; -template -struct PairTableComputeFunctor; - template class PairTableKokkos : public PairTable { public: @@ -135,33 +132,33 @@ class PairTableKokkos : public PairTable { F_FLOAT compute_ecoul(const F_FLOAT& /*rsq*/, const int& /*i*/, const int& /*j*/, const int& /*itype*/, const int& /*jtype*/) const { return 0; } - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; - friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; + friend struct PairComputeFunctor >; friend void pair_virial_fdotr_compute(PairTableKokkos*); }; diff --git a/src/KOKKOS/pair_yukawa_kokkos.h b/src/KOKKOS/pair_yukawa_kokkos.h index e04f65264b..dc93e83aea 100644 --- a/src/KOKKOS/pair_yukawa_kokkos.h +++ b/src/KOKKOS/pair_yukawa_kokkos.h @@ -95,20 +95,19 @@ class PairYukawaKokkos : public PairYukawa { int nlocal,nall,eflag,vflag; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist( - PairYukawaKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist( - PairYukawaKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist( - PairYukawaKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute( - PairYukawaKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairYukawaKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairYukawaKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairYukawaKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairYukawaKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairYukawaKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairYukawaKokkos*); }; diff --git a/src/KOKKOS/pair_zbl_kokkos.h b/src/KOKKOS/pair_zbl_kokkos.h index bd33cdb5e0..b7638a25e0 100644 --- a/src/KOKKOS/pair_zbl_kokkos.h +++ b/src/KOKKOS/pair_zbl_kokkos.h @@ -89,16 +89,19 @@ class PairZBLKokkos : public PairZBL { void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist(PairZBLKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairZBLKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist(PairZBLKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute(PairZBLKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairZBLKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairZBLKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairZBLKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairZBLKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute(PairZBLKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairZBLKokkos*); }; From 8bc396558342e3dea89abcfb4364bf6260706be3 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 12 Oct 2023 12:06:55 -0600 Subject: [PATCH 076/107] More robust refactor of Kokkos fix property/atom --- src/KOKKOS/atom_kokkos.cpp | 8 ++++--- src/KOKKOS/atom_kokkos.h | 1 + src/KOKKOS/fix_property_atom_kokkos.cpp | 32 ++++++++++++++----------- src/KOKKOS/fix_property_atom_kokkos.h | 4 +--- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index a009972628..f118b2a204 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -126,7 +126,12 @@ void AtomKokkos::init() Atom::init(); sort_classic = lmp->kokkos->sort_classic; +} +/* ---------------------------------------------------------------------- */ + +void AtomKokkos::update_property_atom() +{ nprop_atom = 0; for (int ifix = 0; ifix < modify->nfix; ifix++) { if (modify->fix[ifix] && utils::strmatch(modify->fix[ifix]->style, "^property/atom")) { @@ -142,11 +147,8 @@ void AtomKokkos::init() delete [] fix_prop_atom; fix_prop_atom = new FixPropertyAtomKokkos*[nprop_atom]; - printf("HERE %i\n",nprop_atom); - for (int n = 0; n < nprop_atom; n++) { auto fix_n = dynamic_cast(modify->fix[prop_atom[n]]); - fix_n->atom_init_flag = 1; fix_prop_atom[n] = fix_n; } diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index c26a1b291f..26ea7da296 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -148,6 +148,7 @@ class AtomKokkos : public Atom { } void init() override; + void update_property_atom(); void allocate_type_arrays() override; void sync(const ExecutionSpace space, unsigned int mask); void modified(const ExecutionSpace space, unsigned int mask); diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp index 080a8f6fe4..dcd943cac6 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.cpp +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -35,8 +35,15 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg) dvector_flag = 0; for (int nv = 0; nv < nvalue; nv++) if (styles[nv] == DVEC) dvector_flag = 1; +} - atom_init_flag = 0; +/* ---------------------------------------------------------------------- */ + +void FixPropertyAtomKokkos::post_constructor() +{ + atomKK->update_property_atom(); + + FixPropertyAtom::post_constructor(); } /* ---------------------------------------------------------------------- */ @@ -61,6 +68,8 @@ FixPropertyAtomKokkos::~FixPropertyAtomKokkos() atom->rmass = nullptr; } } + + atomKK->update_property_atom(); } /* ---------------------------------------------------------------------- @@ -74,20 +83,17 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) { for (int nv = 0; nv < nvalue; nv++) { if (styles[nv] == MOLECULE) { - if (!atom_init_flag) this->modified(Host,MOLECULE_MASK); - else atomKK->sync(Device,MOLECULE_MASK); + atomKK->sync(Device,MOLECULE_MASK); memoryKK->grow_kokkos(atomKK->k_molecule,atom->molecule,nmax,"atom:molecule"); - if (atom_init_flag) atomKK->modified(Device,MOLECULE_MASK); + atomKK->modified(Device,MOLECULE_MASK); } else if (styles[nv] == CHARGE) { - if (!atom_init_flag) this->modified(Host,Q_MASK); - else atomKK->sync(Device,Q_MASK); + atomKK->sync(Device,Q_MASK); memoryKK->grow_kokkos(atomKK->k_q,atom->q,nmax,"atom:q"); - if (atom_init_flag) atomKK->modified(Device,Q_MASK); + atomKK->modified(Device,Q_MASK); } else if (styles[nv] == RMASS) { - if (!atom_init_flag) this->modified(Host,RMASS_MASK); - else atomKK->sync(Device,RMASS_MASK); + atomKK->sync(Device,RMASS_MASK); memoryKK->grow_kokkos(atomKK->k_rmass,atom->rmass,nmax,"atom:rmass"); - if (atom_init_flag) atomKK->modified(Device,RMASS_MASK); + atomKK->modified(Device,RMASS_MASK); } else if (styles[nv] == TEMPERATURE) { memory->grow(atom->temperature, nmax, "atom:temperature"); size_t nbytes = (nmax - nmax_old) * sizeof(double); @@ -101,11 +107,10 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) size_t nbytes = (nmax-nmax_old) * sizeof(int); memset(&atom->ivector[index[nv]][nmax_old],0,nbytes); } else if (styles[nv] == DVEC) { - if (!atom_init_flag) this->modified(Host,DVECTOR_MASK); - else atomKK->sync(Device,DVECTOR_MASK); + atomKK->sync(Device,DVECTOR_MASK); memoryKK->grow_kokkos(atomKK->k_dvector,atom->dvector,atomKK->k_dvector.extent(0),nmax, "atom:dvector"); - if (atom_init_flag) atomKK->modified(Device,DVECTOR_MASK); + atomKK->modified(Device,DVECTOR_MASK); } else if (styles[nv] == IARRAY) { memory->grow(atom->iarray[index[nv]], nmax, cols[nv], "atom:iarray"); size_t nbytes = (size_t) (nmax - nmax_old) * cols[nv] * sizeof(int); @@ -124,7 +129,6 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask) { if (space == Device) { - if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync(); if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync(); if (rmass_flag && (mask & RMASS_MASK)) {atomKK->k_rmass.sync();} diff --git a/src/KOKKOS/fix_property_atom_kokkos.h b/src/KOKKOS/fix_property_atom_kokkos.h index 74a20a28b5..adbe6ab20b 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.h +++ b/src/KOKKOS/fix_property_atom_kokkos.h @@ -29,16 +29,14 @@ namespace LAMMPS_NS { class FixPropertyAtomKokkos : public FixPropertyAtom { public: FixPropertyAtomKokkos(class LAMMPS *, int, char **); + void post_constructor() override; ~FixPropertyAtomKokkos() override; - void grow_arrays(int) override; void sync(ExecutionSpace space, unsigned int mask); void modified(ExecutionSpace space, unsigned int mask); void sync_overlapping_device(ExecutionSpace space, unsigned int mask); - int atom_init_flag; - private: int dvector_flag; }; From 4910401f6c43c54318e1c4509475c6f693129f0b Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 12 Oct 2023 12:52:57 -0600 Subject: [PATCH 077/107] Add accelerator package text --- doc/src/fix_efield.rst | 6 ++++++ doc/src/fix_spring_self.rst | 6 ++++++ doc/src/pair_yukawa_colloid.rst | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/doc/src/fix_efield.rst b/doc/src/fix_efield.rst index c6ac3a0722..71be030266 100644 --- a/doc/src/fix_efield.rst +++ b/doc/src/fix_efield.rst @@ -211,6 +211,12 @@ the iteration count during the minimization. system (the quantity being minimized), you MUST enable the :doc:`fix_modify ` *energy* option for this fix. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" diff --git a/doc/src/fix_spring_self.rst b/doc/src/fix_spring_self.rst index 0def6d51f0..3a3e246455 100644 --- a/doc/src/fix_spring_self.rst +++ b/doc/src/fix_spring_self.rst @@ -81,6 +81,12 @@ invoked by the :doc:`minimize ` command. you MUST enable the :doc:`fix_modify ` *energy* option for this fix. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" none diff --git a/doc/src/pair_yukawa_colloid.rst b/doc/src/pair_yukawa_colloid.rst index 96893f8e37..c6f201d249 100644 --- a/doc/src/pair_yukawa_colloid.rst +++ b/doc/src/pair_yukawa_colloid.rst @@ -132,6 +132,12 @@ per-type polydispersity is allowed. This means all particles of the same type must have the same diameter. Each type can have a different diameter. +---------- + +.. include:: accel_styles.rst + +---------- + Related commands """""""""""""""" From 26133bd26ad1251dd43e16c34f988997fa792b09 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 12 Oct 2023 16:29:01 -0400 Subject: [PATCH 078/107] Revert "restore GNU make build capability for ML-PACE" This reverts commit 2b1985f755fb5019f03d4e76b6174808e7f543c1. --- lib/pace/Makefile | 6 +++--- lib/pace/Makefile.lammps | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/pace/Makefile b/lib/pace/Makefile index 37f00ee70e..5a1588ef93 100644 --- a/lib/pace/Makefile +++ b/lib/pace/Makefile @@ -1,5 +1,4 @@ SHELL = /bin/sh -CMAKE= cmake # ------ FILES ------ @@ -36,7 +35,7 @@ lib: $(OBJ) lib-yaml-cpp lib-yaml-cpp: - cd $(YAML_CPP_PATH) && $(CMAKE) . && $(MAKE) all + cd $(YAML_CPP_PATH) && $(MAKE) lib # ------ COMPILE RULES ------ @@ -46,8 +45,9 @@ lib-yaml-cpp: # ------ CLEAN ------ clean-all: -rm -f *~ $(OBJ) $(LIB) - cd $(YAML_CPP_PATH) && $(MAKE) clean + cd $(YAML_CPP_PATH) && $(MAKE) clean-all clean-build: -rm -f *~ $(OBJ) + cd $(YAML_CPP_PATH) && $(MAKE) clean-build diff --git a/lib/pace/Makefile.lammps b/lib/pace/Makefile.lammps index 1c549e31d0..6411e49a07 100644 --- a/lib/pace/Makefile.lammps +++ b/lib/pace/Makefile.lammps @@ -1,3 +1,3 @@ pace_SYSINC =-I../../lib/pace/src/ML-PACE -I../../lib/pace/src/yaml-cpp/include -I../../lib/pace/src/wigner-cpp/include -DEXTRA_C_PROJECTIONS -pace_SYSLIB = -L../../lib/pace/ -lpace -L../../lib/pace/src/yaml-cpp/ -lyaml-cpp-pace +pace_SYSLIB = -L../../lib/pace/ -lpace -L../../lib/pace/src/yaml-cpp/ -lyaml-cpp pace_SYSPATH = From 1a9dac83d7513e9dad2815b2a291f7b810eaa3a6 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 12 Oct 2023 21:52:32 -0400 Subject: [PATCH 079/107] update fix srd docs --- doc/src/fix_srd.rst | 49 +++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/doc/src/fix_srd.rst b/doc/src/fix_srd.rst index 1fc574a7ad..8bfbcf2387 100644 --- a/doc/src/fix_srd.rst +++ b/doc/src/fix_srd.rst @@ -71,14 +71,15 @@ imbue the SRD particles with fluid-like properties, including an effective viscosity. Thus simulations with large solute particles can be run more quickly, to measure solute properties like diffusivity and viscosity in a background fluid. The usual LAMMPS fixes for such -simulations, such as :doc:`fix deform `, :doc:`fix viscosity `, and :doc:`fix nvt/sllod `, +simulations, such as :doc:`fix deform `, +:doc:`fix viscosity `, and :doc:`fix nvt/sllod `, can be used in conjunction with the SRD model. -For more details on how the SRD model is implemented in LAMMPS, :ref:`this paper ` describes the implementation and usage of pure SRD -fluids. :ref:`This paper `, which is nearly complete, describes -the implementation and usage of mixture systems (solute particles in -an SRD fluid). See the examples/srd directory for sample input -scripts using SRD particles in both settings. +For more details on how the SRD model is implemented in LAMMPS, +:ref:`(Petersen) ` describes the implementation and usage of +pure SRD fluids. See the ``examples/srd`` directory for sample input +scripts using SRD particles for that and for mixture systems (solute +particles in an SRD fluid). This fix does two things: @@ -357,28 +358,28 @@ These are the 12 quantities. All are values for the current timestep, except for quantity 5 and the last three, each of which are cumulative quantities since the beginning of the run. -* (1) # of SRD/big collision checks performed -* (2) # of SRDs which had a collision -* (3) # of SRD/big collisions (including multiple bounces) -* (4) # of SRD particles inside a big particle -* (5) # of SRD particles whose velocity was rescaled to be < Vmax -* (6) # of bins for collision searching -* (7) # of bins for SRD velocity rotation -* (8) # of bins in which SRD temperature was computed -* (9) SRD temperature -* (10) # of SRD particles which have undergone max # of bounces -* (11) max # of bounces any SRD particle has had in a single step -* (12) # of reneighborings due to SRD particles moving too far +(1) # of SRD/big collision checks performed +(2) # of SRDs which had a collision +(3) # of SRD/big collisions (including multiple bounces) +(4) # of SRD particles inside a big particle +(5) # of SRD particles whose velocity was rescaled to be < Vmax +(6) # of bins for collision searching +(7) # of bins for SRD velocity rotation +(8) # of bins in which SRD temperature was computed +(9) SRD temperature +(10) # of SRD particles which have undergone max # of bounces +(11) max # of bounces any SRD particle has had in a single step +(12) # of reneighborings due to SRD particles moving too far No parameter of this fix can be used with the *start/stop* keywords of -the :doc:`run ` command. This fix is not invoked during :doc:`energy minimization `. +the :doc:`run ` command. This fix is not invoked during +:doc:`energy minimization `. Restrictions """""""""""" -This command can only be used if LAMMPS was built with the SRD -package. See the :doc:`Build package ` doc -page for more info. +This command can only be used if LAMMPS was built with the SRD package. +See the :doc:`Build package ` doc page for more info. Related commands """""""""""""""" @@ -403,7 +404,3 @@ no, and rescale = yes. **(Petersen)** Petersen, Lechman, Plimpton, Grest, in' t Veld, Schunk, J Chem Phys, 132, 174106 (2010). - -.. _Lechman: - -**(Lechman)** Lechman, et al, in preparation (2010). From 6ad5c0eced4a80b80a0c9b7aa2c3d8ee23450408 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 12 Oct 2023 23:24:11 -0400 Subject: [PATCH 080/107] port LAMMPS GUI to Qt6 while still supporting Qt5 --- tools/lammps-gui/CMakeLists.txt | 41 +++++++++++++++++++++++++------- tools/lammps-gui/chartviewer.cpp | 12 ++++++++-- tools/lammps-gui/chartviewer.h | 24 ++++++++++++------- tools/lammps-gui/lammpsgui.cpp | 16 ++++++++----- tools/lammps-gui/lammpsgui.h | 2 ++ 5 files changed, 71 insertions(+), 24 deletions(-) diff --git a/tools/lammps-gui/CMakeLists.txt b/tools/lammps-gui/CMakeLists.txt index 44fc45c0e2..83d5dc3216 100644 --- a/tools/lammps-gui/CMakeLists.txt +++ b/tools/lammps-gui/CMakeLists.txt @@ -11,6 +11,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) option(LAMMPS_GUI_USE_PLUGIN "Load LAMMPS library dynamically at runtime" OFF) mark_as_advanced(LAMMPS_GUI_USE_PLUGIN) +option(LAMMPS_GUI_USE_QT5 "Prefer using Qt5 over Qt6" OFF) # checks # when this file is included as subdirectory in the LAMMPS build, many settings are directly imported @@ -73,7 +74,15 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows") endif() # we require Qt 5 and at least version 5.12 at that. -find_package(Qt5 5.12 REQUIRED COMPONENTS Widgets Charts) +if(NOT LAMMPS_GUI_USE_QT5) + find_package(Qt6 6.2 COMPONENTS Widgets Charts) +endif() +if(NOT Qt6_FOUND) + find_package(Qt5 5.12 REQUIRED COMPONENTS Widgets Charts) + set(QT_VERSION_MAJOR "5") +else() + set(QT_VERSION_MAJOR "6") +endif() set(PROJECT_SOURCES main.cpp @@ -105,7 +114,11 @@ set(PROJECT_SOURCES ${PLUGIN_LOADER_SRC} ${ICON_RC_FILE} ) -qt5_add_resources(PROJECT_SOURCES lammpsgui.qrc) +if(QT_VERSION_MAJOR EQUAL 6) + qt6_add_resources(PROJECT_SOURCES lammpsgui.qrc) +else() + qt5_add_resources(PROJECT_SOURCES lammpsgui.qrc) +endif() if(APPLE) set(MACOSX_ICON_FILE ${LAMMPS_DIR}/cmake/packaging/lammps.icns) @@ -113,10 +126,22 @@ if(APPLE) set(MACOSX_BACKGROUND_FILE ${LAMMPS_DIR}/cmake/packaging/LAMMPS_DMG_Background.png) endif() -add_executable(lammps-gui - ${MACOSX_ICON_FILE} - ${PROJECT_SOURCES} -) +if(QT_VERSION_MAJOR EQUAL 6) + qt_add_executable(lammps-gui + MANUAL_FINALIZATION + ${MACOSX_ICON_FILE} + ${PROJECT_SOURCES} + ) +else() + add_executable(lammps-gui + ${MACOSX_ICON_FILE} + ${PROJECT_SOURCES} + ) +endif() + +if(QT_VERSION_MAJOR EQUAL 6) + qt_finalize_executable(lammps-gui) +endif() # compilation settings if(LAMMPS_GUI_USE_PLUGIN) @@ -128,7 +153,7 @@ else() endif() target_include_directories(lammps-gui PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_compile_definitions(lammps-gui PRIVATE LAMMPS_GUI_VERSION="${PROJECT_VERSION}") -target_link_libraries(lammps-gui PRIVATE Qt5::Widgets Qt5::Charts) +target_link_libraries(lammps-gui PRIVATE Qt${QT_VERSION_MAJOR}::Widgets Qt${VERSION_MAJOR}::Charts) if(BUILD_OMP) find_package(OpenMP COMPONENTS CXX REQUIRED) target_link_libraries(lammps-gui PRIVATE OpenMP::OpenMP_CXX) @@ -209,7 +234,7 @@ elseif((CMAKE_SYSTEM_NAME STREQUAL "Windows") AND CMAKE_CROSSCOMPILING) COMMENT "Create zip file with windows binaries" BYPRODUCT LAMMPS-Win10-amd64.zip WORKING_DIRECTORY ${CMAKE_BINARY_DIR}) -elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") +elseif((CMAKE_SYSTEM_NAME STREQUAL "Linux") AND NOT LAMMPS_GUI_USE_PLUGIN) install(TARGETS lammps-gui DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/lammps-gui.desktop DESTINATION ${CMAKE_INSTALL_DATADIR}/applications/) install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/lammps-input.xml DESTINATION ${CMAKE_INSTALL_DATADIR}/mime/packages/) diff --git a/tools/lammps-gui/chartviewer.cpp b/tools/lammps-gui/chartviewer.cpp index f28625d9dc..ab03a110b5 100644 --- a/tools/lammps-gui/chartviewer.cpp +++ b/tools/lammps-gui/chartviewer.cpp @@ -15,11 +15,19 @@ #include "lammpsgui.h" +#include +#include +#include #include +#include +#include #include +#include +#include #include #include #include +#include #include using namespace QtCharts; @@ -288,7 +296,7 @@ void ChartViewer::add_data(int step, double data) if (last_step < step) { last_step = step; series->append(step, data); - auto points = series->pointsVector(); + auto points = series->points(); qreal xmin = 1.0e100; qreal xmax = -1.0e100; @@ -309,7 +317,7 @@ void ChartViewer::add_data(int step, double data) void ChartViewer::reset_zoom() { - auto points = series->pointsVector(); + auto points = series->points(); qreal xmin = 1.0e100; qreal xmax = -1.0e100; diff --git a/tools/lammps-gui/chartviewer.h b/tools/lammps-gui/chartviewer.h index 248fdad7bb..0954f4a9a8 100644 --- a/tools/lammps-gui/chartviewer.h +++ b/tools/lammps-gui/chartviewer.h @@ -14,16 +14,17 @@ #ifndef CHARTVIEWER_H #define CHARTVIEWER_H +#include #include #include #include -#include class QAction; class QMenuBar; class QMenu; -class QComboBox; +namespace QtCharts { class ChartViewer; +} class ChartWindow : public QWidget { Q_OBJECT @@ -64,12 +65,18 @@ private: QAction *closeAct, *stopAct, *quitAct; QString filename; - QList charts; + QList charts; }; /* -------------------------------------------------------------------- */ -class ChartViewer : public QtCharts::QChartView { +#include +#include +#include +#include + +namespace QtCharts { +class ChartViewer : public QChartView { Q_OBJECT public: @@ -86,11 +93,12 @@ public: private: int last_step, index; - QtCharts::QChart *chart; - QtCharts::QLineSeries *series; - QtCharts::QValueAxis *xaxis; - QtCharts::QValueAxis *yaxis; + QChart *chart; + QLineSeries *series; + QValueAxis *xaxis; + QValueAxis *yaxis; }; +} // namespace QtCharts #endif // Local Variables: diff --git a/tools/lammps-gui/lammpsgui.cpp b/tools/lammps-gui/lammpsgui.cpp index ba080dbec3..e0bcb38dc6 100644 --- a/tools/lammps-gui/lammpsgui.cpp +++ b/tools/lammps-gui/lammpsgui.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -69,8 +70,10 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) : // enforce using the plain ASCII C locale within the GUI. QLocale::setDefault(QLocale("C")); - // register QList +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) + // register QList only needed for Qt5 qRegisterMetaTypeStreamOperators>("QList"); +#endif ui->setupUi(this); this->setCentralWidget(ui->textEdit); @@ -588,7 +591,8 @@ void LammpsGui::open_file(const QString &fileName) if (!file.open(QIODevice::ReadOnly | QFile::Text)) { QMessageBox::warning(this, "Warning", "Cannot open file " + path.absoluteFilePath() + ": " + - file.errorString() + ".\nWill create new file on saving editor buffer."); + file.errorString() + + ".\nWill create new file on saving editor buffer."); ui->textEdit->document()->setPlainText(QString()); } else { QTextStream in(&file); @@ -1039,9 +1043,9 @@ void LammpsGui::do_run(bool use_buffer) logwindow->document()->setDefaultFont(text_font); logwindow->setLineWrapMode(LogWindow::NoWrap); logwindow->setMinimumSize(400, 300); - QShortcut *shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_W), logwindow); + QShortcut *shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_W), logwindow); QObject::connect(shortcut, &QShortcut::activated, logwindow, &LogWindow::close); - shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_Slash), logwindow); + shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash), logwindow); QObject::connect(shortcut, &QShortcut::activated, this, &LammpsGui::stop_run); if (settings.value("viewlog", true).toBool()) logwindow->show(); @@ -1058,9 +1062,9 @@ void LammpsGui::do_run(bool use_buffer) .arg(run_counter)); chartwindow->setWindowIcon(QIcon(":/icons/lammps-icon-128x128.png")); chartwindow->setMinimumSize(400, 300); - shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_W), chartwindow); + shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_W), chartwindow); QObject::connect(shortcut, &QShortcut::activated, chartwindow, &ChartWindow::close); - shortcut = new QShortcut(QKeySequence(Qt::CTRL + Qt::Key_Slash), chartwindow); + shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash), chartwindow); QObject::connect(shortcut, &QShortcut::activated, this, &LammpsGui::stop_run); if (settings.value("viewchart", true).toBool()) chartwindow->show(); diff --git a/tools/lammps-gui/lammpsgui.h b/tools/lammps-gui/lammpsgui.h index 0c622f0285..0dd34f2c49 100644 --- a/tools/lammps-gui/lammpsgui.h +++ b/tools/lammps-gui/lammpsgui.h @@ -16,8 +16,10 @@ #include +#include #include #include +#include #include #include From 506de7015df9c6049033f9084a6cf62420ef2b5e Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 05:13:30 -0400 Subject: [PATCH 081/107] document support for building LAMMPS GUI with Qt6 --- doc/src/Tools.rst | 15 +++++++++++---- tools/lammps-gui/TODO.md | 1 - 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/doc/src/Tools.rst b/doc/src/Tools.rst index a42e7c56a5..49022a4ee9 100644 --- a/doc/src/Tools.rst +++ b/doc/src/Tools.rst @@ -702,11 +702,15 @@ Prerequisites and portability LAMMPS GUI is programmed in C++ based on the C++11 standard and using the `Qt GUI framework `_. Currently, Qt version 5.12 or later is required; Qt 5.15LTS is -recommended; Qt 6.x not (yet) supported. Building LAMMPS with CMake is -required. The LAMMPS GUI has been successfully compiled and tested on: +recommended; support for Qt version 6.x is under active development and +thus far only tested with Qt 6.5LTS on Linux. Building LAMMPS with +CMake is required. + +The LAMMPS GUI has been successfully compiled and tested on: - Ubuntu Linux 20.04LTS x86_64 using GCC 9, Qt version 5.12 - Fedora Linux 38 x86\_64 using GCC 13 and Clang 16, Qt version 5.15LTS +- Fedora Linux 38 x86\_64 using GCC 13, Qt version 6.5LTS - Apple macOS 12 (Monterey) and macOS 13 (Ventura) with Xcode on arm64 and x86\_64, Qt version 5.15LTS - Windows 10 and 11 x86_64 with Visual Studio 2022 and Visual C++ 14.36, Qt version 5.15LTS - Windows 10 and 11 x86_64 with MinGW / GCC 10.0 cross-compiler on Fedora 38, Qt version 5.15LTS @@ -717,7 +721,7 @@ required. The LAMMPS GUI has been successfully compiled and tested on: Pre-compiled executables ^^^^^^^^^^^^^^^^^^^^^^^^ -Pre-compiled LAMMPS executables including the GUI are currently +Pre-compiled LAMMPS executable packages that include the GUI are currently available from https://download.lammps.org/static or https://github.com/lammps/lammps/releases. You can unpack the archives (or mount the macOS disk image) and run the GUI directly in place. The @@ -742,7 +746,10 @@ stored in a location where CMake can find them without additional help. Otherwise, the location of the Qt library installation must be indicated by setting ``-D Qt5_DIR=/path/to/qt5/lib/cmake/Qt5``, which is a path to a folder inside the Qt installation that contains the file -``Qt5Config.cmake``. +``Qt5Config.cmake``. Similarly, for Qt6 the location of the Qt library +installation can be indicated by setting ``-D Qt6_DIR=/path/to/qt6/lib/cmake/Qt6``, +if necessary. When both, Qt5 and Qt6 are available, Qt6 will be preferred +unless ``-D LAMMPS_GUI_USE_QT5=yes`` is set. It should be possible to build the LAMMPS GUI as a standalone compilation (e.g. when LAMMPS has been compiled with traditional make), diff --git a/tools/lammps-gui/TODO.md b/tools/lammps-gui/TODO.md index e4ca44ba3d..ee05e67225 100644 --- a/tools/lammps-gui/TODO.md +++ b/tools/lammps-gui/TODO.md @@ -23,7 +23,6 @@ LAMMPS-GUI TODO list: # Long term ideas (v2.x) - rewrite entire application to build the App and its layout manually -- port to Qt6 (with compatibility to Qt5?) - also a rewrite should establish consistent naming conventions. now we have a mix of LAMMPS style, Qt style, and others. - add option to attach a debugger to the running program (highly non-portable, need customization support in preferences) - write a "wizard" dialog that can be used for beginners to create an input file template for a few typical use scenarios From 6195be5af72baea2a7cd8870b3c0bdb615401da7 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 05:59:20 -0400 Subject: [PATCH 082/107] make keyboard shortcut handling consistent across the entire app --- tools/lammps-gui/chartviewer.cpp | 7 ++++--- tools/lammps-gui/imageviewer.cpp | 5 +++-- tools/lammps-gui/logwindow.cpp | 32 +++++++++++++++++++++++++++----- tools/lammps-gui/logwindow.h | 1 + tools/lammps-gui/slideshow.cpp | 7 ++++--- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/tools/lammps-gui/chartviewer.cpp b/tools/lammps-gui/chartviewer.cpp index ab03a110b5..ee29ab9540 100644 --- a/tools/lammps-gui/chartviewer.cpp +++ b/tools/lammps-gui/chartviewer.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -61,13 +62,13 @@ ChartWindow::ChartWindow(const QString &_filename, QWidget *parent) : file->addSeparator(); stopAct = file->addAction("Stop &Run", this, &ChartWindow::stop_run); stopAct->setIcon(QIcon(":/icons/process-stop.png")); - stopAct->setShortcut(QKeySequence::fromString("Ctrl+/")); + stopAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash)); closeAct = file->addAction("&Close", this, &QWidget::close); closeAct->setIcon(QIcon(":/icons/window-close.png")); - closeAct->setShortcut(QKeySequence::fromString("Ctrl+W")); + closeAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_W)); quitAct = file->addAction("&Quit", this, &ChartWindow::quit); quitAct->setIcon(QIcon(":/icons/application-exit.png")); - quitAct->setShortcut(QKeySequence::fromString("Ctrl+Q")); + quitAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q)); auto *layout = new QVBoxLayout; layout->addLayout(top); setLayout(layout); diff --git a/tools/lammps-gui/imageviewer.cpp b/tools/lammps-gui/imageviewer.cpp index ee81f61253..7406f1860a 100644 --- a/tools/lammps-gui/imageviewer.cpp +++ b/tools/lammps-gui/imageviewer.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -558,10 +559,10 @@ void ImageViewer::createActions() fileMenu->addSeparator(); QAction *exitAct = fileMenu->addAction("&Close", this, &QWidget::close); exitAct->setIcon(QIcon(":/icons/window-close.png")); - exitAct->setShortcut(QKeySequence::fromString("Ctrl+W")); + exitAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_W)); QAction *quitAct = fileMenu->addAction("&Quit", this, &ImageViewer::quit); quitAct->setIcon(QIcon(":/icons/application-exit.png")); - quitAct->setShortcut(QKeySequence::fromString("Ctrl+Q")); + quitAct->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q)); } void ImageViewer::updateActions() diff --git a/tools/lammps-gui/logwindow.cpp b/tools/lammps-gui/logwindow.cpp index ab1886f1bd..374c4c89b1 100644 --- a/tools/lammps-gui/logwindow.cpp +++ b/tools/lammps-gui/logwindow.cpp @@ -35,12 +35,14 @@ LogWindow::LogWindow(const QString &_filename, QWidget *parent) : QSettings settings; resize(settings.value("logx", 500).toInt(), settings.value("logy", 320).toInt()); - auto action = new QShortcut(QKeySequence::fromString("Ctrl+S"), this); + auto action = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_S), this); connect(action, &QShortcut::activated, this, &LogWindow::save_as); - action = new QShortcut(QKeySequence::fromString("Ctrl+Q"), this); + action = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q), this); connect(action, &QShortcut::activated, this, &LogWindow::quit); - action = new QShortcut(QKeySequence(Qt::Key_Slash, Qt::CTRL), this); + action = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash), this); connect(action, &QShortcut::activated, this, &LogWindow::stop_run); + + installEventFilter(this); } void LogWindow::closeEvent(QCloseEvent *event) @@ -99,15 +101,35 @@ void LogWindow::contextMenuEvent(QContextMenuEvent *event) menu->addSeparator(); auto action = menu->addAction(QString("Save Log to File ...")); action->setIcon(QIcon(":/icons/document-save-as.png")); - action->setShortcut(QKeySequence::fromString("Ctrl+S")); + action->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_S)); connect(action, &QAction::triggered, this, &LogWindow::save_as); action = menu->addAction("&Close Window", this, &QWidget::close); action->setIcon(QIcon(":/icons/window-close.png")); - action->setShortcut(QKeySequence::fromString("Ctrl+W")); + action->setShortcut(QKeySequence(Qt::CTRL | Qt::Key_W)); menu->exec(event->globalPos()); delete menu; } +// event filter to handle "Ambiguous shortcut override" issues +bool LogWindow::eventFilter(QObject *watched, QEvent *event) +{ + if (event->type() == QEvent::ShortcutOverride) { + QKeyEvent *keyEvent = dynamic_cast(event); + if (!keyEvent) return QWidget::eventFilter(watched, event); + if (keyEvent->modifiers().testFlag(Qt::ControlModifier) && keyEvent->key() == '/') { + stop_run(); + event->accept(); + return true; + } + if (keyEvent->modifiers().testFlag(Qt::ControlModifier) && keyEvent->key() == 'W') { + close(); + event->accept(); + return true; + } + } + return QWidget::eventFilter(watched, event); +} + // Local Variables: // c-basic-offset: 4 // End: diff --git a/tools/lammps-gui/logwindow.h b/tools/lammps-gui/logwindow.h index 8923e35ee5..ad0691d0cc 100644 --- a/tools/lammps-gui/logwindow.h +++ b/tools/lammps-gui/logwindow.h @@ -30,6 +30,7 @@ private slots: protected: void closeEvent(QCloseEvent *event) override; void contextMenuEvent(QContextMenuEvent *event) override; + bool eventFilter(QObject *watched, QEvent *event) override; private: QString filename; diff --git a/tools/lammps-gui/slideshow.cpp b/tools/lammps-gui/slideshow.cpp index de7742f22f..92eca9a530 100644 --- a/tools/lammps-gui/slideshow.cpp +++ b/tools/lammps-gui/slideshow.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -50,11 +51,11 @@ SlideShow::SlideShow(const QString &fileName, QWidget *parent) : imageName->setAlignment(Qt::AlignCenter); imageName->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding); - auto *shortcut = new QShortcut(QKeySequence::fromString("Ctrl+W"), this); + auto *shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_W), this); QObject::connect(shortcut, &QShortcut::activated, this, &QWidget::close); - shortcut = new QShortcut(QKeySequence::fromString("Ctrl+/"), this); + shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Slash), this); QObject::connect(shortcut, &QShortcut::activated, this, &SlideShow::stop_run); - shortcut = new QShortcut(QKeySequence::fromString("Ctrl+Q"), this); + shortcut = new QShortcut(QKeySequence(Qt::CTRL | Qt::Key_Q), this); QObject::connect(shortcut, &QShortcut::activated, this, &SlideShow::quit); buttonBox = new QDialogButtonBox(QDialogButtonBox::Close); From 3ebcb0f6f18cf68c38ae89a4c0b65911fda5c90d Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 06:00:29 -0400 Subject: [PATCH 083/107] store settings to different locations for different Qt versions --- tools/lammps-gui/lammpsgui.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/lammps-gui/lammpsgui.cpp b/tools/lammps-gui/lammpsgui.cpp index e0bcb38dc6..11f2554b55 100644 --- a/tools/lammps-gui/lammpsgui.cpp +++ b/tools/lammps-gui/lammpsgui.cpp @@ -84,9 +84,13 @@ LammpsGui::LammpsGui(QWidget *parent, const char *filename) : // use $HOME if we get dropped to "/" like on macOS if (current_dir == "/") current_dir = QDir::homePath(); +#define stringify(x) myxstr(x) +#define myxstr(x) #x QCoreApplication::setOrganizationName("The LAMMPS Developers"); QCoreApplication::setOrganizationDomain("lammps.org"); - QCoreApplication::setApplicationName("LAMMPS GUI"); + QCoreApplication::setApplicationName("LAMMPS GUI - QT" stringify(QT_VERSION_MAJOR)); +#undef stringify +#undef myxstr // restore and initialize settings QSettings settings; From d29ae7af92a2d6716cb106eb02b4296000b5886e Mon Sep 17 00:00:00 2001 From: Yury Lysogorskiy Date: Fri, 13 Oct 2023 12:05:28 +0200 Subject: [PATCH 084/107] upd ML-PACE.cmake and Install.py (new version v.2023.10.04 and checksum) that should fix build issues with KOKOKOS --- cmake/Modules/Packages/ML-PACE.cmake | 4 ++-- lib/pace/Install.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/Modules/Packages/ML-PACE.cmake b/cmake/Modules/Packages/ML-PACE.cmake index 30aa433a58..ce8f02f5f4 100644 --- a/cmake/Modules/Packages/ML-PACE.cmake +++ b/cmake/Modules/Packages/ML-PACE.cmake @@ -1,6 +1,6 @@ -set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.10.04.pre.tar.gz" CACHE STRING "URL for PACE evaluator library sources") +set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.10.04.tar.gz" CACHE STRING "URL for PACE evaluator library sources") -set(PACELIB_MD5 "61ba11a37ee00de8365b18b521d394a6" CACHE STRING "MD5 checksum of PACE evaluator library tarball") +set(PACELIB_MD5 "70ff79f4e59af175e55d24f3243ad1ff" CACHE STRING "MD5 checksum of PACE evaluator library tarball") mark_as_advanced(PACELIB_URL) mark_as_advanced(PACELIB_MD5) GetFallbackURL(PACELIB_URL PACELIB_FALLBACK) diff --git a/lib/pace/Install.py b/lib/pace/Install.py index 2a8cd2f1f3..8d31852e44 100644 --- a/lib/pace/Install.py +++ b/lib/pace/Install.py @@ -18,11 +18,11 @@ from install_helpers import fullpath, geturl, checkmd5sum, getfallback # settings thisdir = fullpath('.') -version ='v.2023.10.04.pre' +version ='v.2023.10.04' # known checksums for different PACE versions. used to validate the download. checksums = { \ - 'v.2023.10.04.pre': '61ba11a37ee00de8365b18b521d394a6' + 'v.2023.10.04': '70ff79f4e59af175e55d24f3243ad1ff' } parser = ArgumentParser(prog='Install.py', description="LAMMPS library build wrapper script") From 262913695824a407091b16a493cd1165f0fff591 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 06:18:34 -0400 Subject: [PATCH 085/107] avoid uninitialized data access and segfaults when calling main widget functions --- tools/lammps-gui/chartviewer.cpp | 8 ++++---- tools/lammps-gui/imageviewer.cpp | 4 ++-- tools/lammps-gui/logwindow.cpp | 8 ++++---- tools/lammps-gui/preferences.cpp | 4 ++-- tools/lammps-gui/slideshow.cpp | 8 ++++---- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/lammps-gui/chartviewer.cpp b/tools/lammps-gui/chartviewer.cpp index ee29ab9540..c981b2777d 100644 --- a/tools/lammps-gui/chartviewer.cpp +++ b/tools/lammps-gui/chartviewer.cpp @@ -124,10 +124,10 @@ void ChartWindow::add_data(int step, double data, int index) void ChartWindow::quit() { - LammpsGui *main; + LammpsGui *main = nullptr; for (QWidget *widget : QApplication::topLevelWidgets()) if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); - main->quit(); + if (main) main->quit(); } void ChartWindow::reset_zoom() @@ -138,10 +138,10 @@ void ChartWindow::reset_zoom() void ChartWindow::stop_run() { - LammpsGui *main; + LammpsGui *main = nullptr; for (QWidget *widget : QApplication::topLevelWidgets()) if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); - main->stop_run(); + if (main) main->stop_run(); } void ChartWindow::saveAs() diff --git a/tools/lammps-gui/imageviewer.cpp b/tools/lammps-gui/imageviewer.cpp index 7406f1860a..00b08f3f47 100644 --- a/tools/lammps-gui/imageviewer.cpp +++ b/tools/lammps-gui/imageviewer.cpp @@ -533,10 +533,10 @@ void ImageViewer::copy() {} void ImageViewer::quit() { - LammpsGui *main; + LammpsGui *main = nullptr; for (QWidget *widget : QApplication::topLevelWidgets()) if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); - main->quit(); + if (main) main->quit(); } void ImageViewer::saveFile(const QString &fileName) diff --git a/tools/lammps-gui/logwindow.cpp b/tools/lammps-gui/logwindow.cpp index 374c4c89b1..73ec81d06c 100644 --- a/tools/lammps-gui/logwindow.cpp +++ b/tools/lammps-gui/logwindow.cpp @@ -57,18 +57,18 @@ void LogWindow::closeEvent(QCloseEvent *event) void LogWindow::quit() { - LammpsGui *main; + LammpsGui *main = nullptr; for (QWidget *widget : QApplication::topLevelWidgets()) if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); - main->quit(); + if (main) main->quit(); } void LogWindow::stop_run() { - LammpsGui *main; + LammpsGui *main = nullptr; for (QWidget *widget : QApplication::topLevelWidgets()) if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); - main->stop_run(); + if (main) main->stop_run(); } void LogWindow::save_as() diff --git a/tools/lammps-gui/preferences.cpp b/tools/lammps-gui/preferences.cpp index c760e6610b..f7fe9ec025 100644 --- a/tools/lammps-gui/preferences.cpp +++ b/tools/lammps-gui/preferences.cpp @@ -286,12 +286,12 @@ GeneralTab::GeneralTab(QSettings *_settings, LammpsWrapper *_lammps, QWidget *pa void GeneralTab::updatefonts(const QFont &all, const QFont &text) { - LammpsGui *main; + LammpsGui *main = nullptr; for (QWidget *widget : QApplication::topLevelWidgets()) if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); QApplication::setFont(all); - main->ui->textEdit->document()->setDefaultFont(text); + if (main) main->ui->textEdit->document()->setDefaultFont(text); } void GeneralTab::newallfont() diff --git a/tools/lammps-gui/slideshow.cpp b/tools/lammps-gui/slideshow.cpp index 92eca9a530..140c703ca3 100644 --- a/tools/lammps-gui/slideshow.cpp +++ b/tools/lammps-gui/slideshow.cpp @@ -199,18 +199,18 @@ void SlideShow::loadImage(int idx) void SlideShow::quit() { - LammpsGui *main; + LammpsGui *main = nullptr; for (QWidget *widget : QApplication::topLevelWidgets()) if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); - main->quit(); + if (main) main->quit(); } void SlideShow::stop_run() { - LammpsGui *main; + LammpsGui *main = nullptr; for (QWidget *widget : QApplication::topLevelWidgets()) if (widget->objectName() == "LammpsGui") main = dynamic_cast(widget); - main->stop_run(); + if (main) main->stop_run(); } void SlideShow::movie() From 926de1575c74d98cec2319c54ef4ddc2b75e4ad0 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 06:18:56 -0400 Subject: [PATCH 086/107] avoid completion on empty lines --- tools/lammps-gui/codeeditor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lammps-gui/codeeditor.cpp b/tools/lammps-gui/codeeditor.cpp index b7a14871b7..f374b098e2 100644 --- a/tools/lammps-gui/codeeditor.cpp +++ b/tools/lammps-gui/codeeditor.cpp @@ -568,6 +568,7 @@ void CodeEditor::keyPressEvent(QKeyEvent *event) if (automatic_completion) { auto cursor = textCursor(); auto line = cursor.block().text(); + if (line.isEmpty()) return; // QTextCursor::WordUnderCursor is unusable here since recognizes '/' as word boundary. // Work around it by manually searching for the location of the beginning of the word. From 3db7e1f9cf3b370da126dbe9b4d667d108833735 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 06:50:06 -0400 Subject: [PATCH 087/107] avoid invalid data accesses --- tools/lammps-gui/chartviewer.cpp | 5 ++++- tools/lammps-gui/chartviewer.h | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/lammps-gui/chartviewer.cpp b/tools/lammps-gui/chartviewer.cpp index c981b2777d..fbd888f1cd 100644 --- a/tools/lammps-gui/chartviewer.cpp +++ b/tools/lammps-gui/chartviewer.cpp @@ -85,7 +85,10 @@ int ChartWindow::get_step() const { if (charts.size() > 0) { auto *v = charts[0]; - return (int)v->get_step(v->get_count() - 1); + if (v) + return (int)v->get_step(v->get_count() - 1); + else + return -1; } else { return -1; } diff --git a/tools/lammps-gui/chartviewer.h b/tools/lammps-gui/chartviewer.h index 0954f4a9a8..da0468eaf8 100644 --- a/tools/lammps-gui/chartviewer.h +++ b/tools/lammps-gui/chartviewer.h @@ -88,8 +88,8 @@ public: int get_index() const { return index; }; int get_count() const { return series->count(); } const char *get_title() const { return series->name().toLocal8Bit(); } - double get_step(int index) const { return series->at(index).x(); } - double get_data(int index) const { return series->at(index).y(); } + double get_step(int index) const { return (index < 0) ? 0.0 : series->at(index).x(); } + double get_data(int index) const { return (index < 0) ? 0.0 : series->at(index).y(); } private: int last_step, index; From 50d2ebc27d7fd96cc94153540d953a02d9abd5bf Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 06:50:52 -0400 Subject: [PATCH 088/107] auto-detect OpenMP support also when compiling a standalone binary --- tools/lammps-gui/CMakeLists.txt | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/lammps-gui/CMakeLists.txt b/tools/lammps-gui/CMakeLists.txt index 83d5dc3216..edfeeb1128 100644 --- a/tools/lammps-gui/CMakeLists.txt +++ b/tools/lammps-gui/CMakeLists.txt @@ -13,6 +13,41 @@ option(LAMMPS_GUI_USE_PLUGIN "Load LAMMPS library dynamically at runtime" OFF) mark_as_advanced(LAMMPS_GUI_USE_PLUGIN) option(LAMMPS_GUI_USE_QT5 "Prefer using Qt5 over Qt6" OFF) +include(CheckIncludeFileCXX) +# helper function to check for usable omp.h header +function(check_omp_h_include) + find_package(OpenMP COMPONENTS CXX QUIET) + if(OpenMP_CXX_FOUND) + set(CMAKE_REQUIRED_FLAGS ${OpenMP_CXX_FLAGS}) + set(CMAKE_REQUIRED_INCLUDES ${OpenMP_CXX_INCLUDE_DIRS}) + set(CMAKE_REQUIRED_LINK_OPTIONS ${OpenMP_CXX_FLAGS}) + set(CMAKE_REQUIRED_LIBRARIES ${OpenMP_CXX_LIBRARIES}) + check_include_file_cxx(omp.h _have_omp_h) + else() + set(_have_omp_h FALSE) + endif() + set(HAVE_OMP_H_INCLUDE ${_have_omp_h} PARENT_SCOPE) +endfunction() + +# detect if we may enable OpenMP support by default +set(BUILD_OMP_DEFAULT OFF) +find_package(OpenMP COMPONENTS CXX QUIET) +if(OpenMP_CXX_FOUND) + check_omp_h_include() + if(HAVE_OMP_H_INCLUDE) + set(BUILD_OMP_DEFAULT ON) + endif() +endif() + +option(BUILD_OMP "Build with OpenMP support" ${BUILD_OMP_DEFAULT}) +if(BUILD_OMP) + find_package(OpenMP COMPONENTS CXX REQUIRED) + check_omp_h_include() + if(NOT HAVE_OMP_H_INCLUDE) + message(FATAL_ERROR "Cannot find the 'omp.h' header file required for full OpenMP support") + endif() +endif() + # checks # when this file is included as subdirectory in the LAMMPS build, many settings are directly imported if(LAMMPS_DIR) From 2f6782636382e25eb832292606951ac387d2a3e7 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 06:59:06 -0400 Subject: [PATCH 089/107] indicate whether multi-thread is available in preferences --- tools/lammps-gui/preferences.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/lammps-gui/preferences.cpp b/tools/lammps-gui/preferences.cpp index f7fe9ec025..fd01bb5046 100644 --- a/tools/lammps-gui/preferences.cpp +++ b/tools/lammps-gui/preferences.cpp @@ -410,11 +410,19 @@ AcceleratorTab::AcceleratorTab(QSettings *_settings, LammpsWrapper *_lammps, QWi #endif auto *choices = new QFrame; auto *choiceLayout = new QVBoxLayout; +#if defined(_OPENMP) auto *ntlabel = new QLabel(QString("Number of threads (max %1):").arg(maxthreads)); auto *ntchoice = new QLineEdit(settings->value("nthreads", maxthreads).toString()); +#else + auto *ntlabel = new QLabel(QString("Number of threads (OpenMP not available):")); + auto *ntchoice = new QLineEdit("1"); +#endif auto *intval = new QIntValidator(1, maxthreads, this); ntchoice->setValidator(intval); ntchoice->setObjectName("nthreads"); +#if !defined(_OPENMP) + ntchoice->setEnabled(false); +#endif choiceLayout->addWidget(ntlabel); choiceLayout->addWidget(ntchoice); From ae8791c6820ca858b809237aef7a229ae815ffbb Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 08:17:10 -0400 Subject: [PATCH 090/107] avoid out of bounds access to string under cursor --- tools/lammps-gui/codeeditor.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lammps-gui/codeeditor.cpp b/tools/lammps-gui/codeeditor.cpp index f374b098e2..e95f576be0 100644 --- a/tools/lammps-gui/codeeditor.cpp +++ b/tools/lammps-gui/codeeditor.cpp @@ -572,7 +572,8 @@ void CodeEditor::keyPressEvent(QKeyEvent *event) // QTextCursor::WordUnderCursor is unusable here since recognizes '/' as word boundary. // Work around it by manually searching for the location of the beginning of the word. - int begin = cursor.positionInBlock(); + int begin = qMin(cursor.positionInBlock(), line.length() - 1); + while (begin >= 0) { if (line[begin].isSpace()) break; --begin; @@ -749,7 +750,7 @@ void CodeEditor::runCompletion() // QTextCursor::WordUnderCursor is unusable here since it recognizes '/' as word boundary. // Work around it by manually searching for the beginning and end position of the word // under the cursor and then using that substring. - int begin = cursor.positionInBlock(); + int begin = qMin(cursor.positionInBlock(), line.length() - 1); line = cursor.block().text(); while (begin >= 0) { if (line[begin].isSpace()) break; From d8e6b432fa537efde99ac561cd3e66a3fb8f1482 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 13 Oct 2023 10:48:37 -0600 Subject: [PATCH 091/107] Port recent changes --- src/KOKKOS/pair_yukawa_colloid_kokkos.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.h b/src/KOKKOS/pair_yukawa_colloid_kokkos.h index 060b621a8a..83ce58e898 100644 --- a/src/KOKKOS/pair_yukawa_colloid_kokkos.h +++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.h @@ -96,19 +96,21 @@ class PairYukawaColloidKokkos : public PairYukawaColloid { int nlocal,nall,eflag,vflag; void allocate() override; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; friend struct PairComputeFunctor; friend struct PairComputeFunctor; - friend EV_FLOAT pair_compute_neighlist( + friend EV_FLOAT pair_compute_neighlist(PairYukawaColloidKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist(PairYukawaColloidKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist( PairYukawaColloidKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist( + friend EV_FLOAT pair_compute_neighlist( PairYukawaColloidKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute_neighlist( - PairYukawaColloidKokkos*,NeighListKokkos*); - friend EV_FLOAT pair_compute( + friend EV_FLOAT pair_compute( PairYukawaColloidKokkos*,NeighListKokkos*); friend void pair_virial_fdotr_compute(PairYukawaColloidKokkos*); From 41a7f648474cb2ef34550323d0f030c53dba7f70 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 13 Oct 2023 10:58:14 -0600 Subject: [PATCH 092/107] Update dependency --- src/Depend.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Depend.sh b/src/Depend.sh index b88c527b55..dbffb2dba0 100755 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -64,6 +64,7 @@ fi if (test $1 = "COLLOID") then depend GPU + depend KOKKOS depend OPENMP fi From 6c6f35dd690a7b5bc5e276035f0889555051035b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 14:39:20 -0400 Subject: [PATCH 093/107] modernize loops over fixes --- src/KOKKOS/atom_kokkos.cpp | 42 +++++++++++------------------- src/KOKKOS/fix_dt_reset_kokkos.cpp | 2 +- src/KOKKOS/neigh_bond_kokkos.cpp | 5 ++-- 3 files changed, 18 insertions(+), 31 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index f118b2a204..e97213cb4b 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -132,27 +132,20 @@ void AtomKokkos::init() void AtomKokkos::update_property_atom() { - nprop_atom = 0; - for (int ifix = 0; ifix < modify->nfix; ifix++) { - if (modify->fix[ifix] && utils::strmatch(modify->fix[ifix]->style, "^property/atom")) { - auto fix_i = modify->fix[ifix]; - if (!fix_i->kokkosable) - error->all(FLERR, "KOKKOS package requires a Kokkos-enabled version of fix property/atom"); + std::vector prop_atom_fixes; + for (auto &ifix : modify->get_fix_by_style("^property/atom")) { + if (!ifix->kokkosable) + error->all(FLERR, "KOKKOS package requires a Kokkos-enabled version of fix property/atom"); - memory->grow(prop_atom,nprop_atom+1,"atom::prop_atom"); - prop_atom[nprop_atom++] = ifix; - } + prop_atom_fixes.push_back(ifix); } - delete [] fix_prop_atom; - fix_prop_atom = new FixPropertyAtomKokkos*[nprop_atom]; + delete[] fix_prop_atom; + fix_prop_atom = new FixPropertyAtomKokkos *[prop_atom_fixes.size()]; - for (int n = 0; n < nprop_atom; n++) { - auto fix_n = dynamic_cast(modify->fix[prop_atom[n]]); - fix_prop_atom[n] = fix_n; - } - - memory->destroy(prop_atom); + int n = 0; + for (auto &ifix : prop_atom_fixes) + fix_prop_atom[n++] = dynamic_cast(ifix); } /* ---------------------------------------------------------------------- */ @@ -161,13 +154,11 @@ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) { if (space == Device && lmp->kokkos->auto_sync) { avecKK->modified(Host, mask); - for (int n = 0; n < nprop_atom; n++) - fix_prop_atom[n]->modified(Host, mask); + for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(Host, mask); } avecKK->sync(space, mask); - for (int n = 0; n < nprop_atom; n++) - fix_prop_atom[n]->sync(space, mask); + for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync(space, mask); } /* ---------------------------------------------------------------------- */ @@ -175,13 +166,11 @@ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask) void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask) { avecKK->modified(space, mask); - for (int n = 0; n < nprop_atom; n++) - fix_prop_atom[n]->modified(space, mask); + for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(space, mask); if (space == Device && lmp->kokkos->auto_sync) { avecKK->sync(Host, mask); - for (int n = 0; n < nprop_atom; n++) - fix_prop_atom[n]->sync(Host, mask); + for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync(Host, mask); } } @@ -190,8 +179,7 @@ void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask) void AtomKokkos::sync_overlapping_device(const ExecutionSpace space, unsigned int mask) { avecKK->sync_overlapping_device(space, mask); - for (int n = 0; n < nprop_atom; n++) - fix_prop_atom[n]->sync_overlapping_device(space, mask); + for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync_overlapping_device(space, mask); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_dt_reset_kokkos.cpp b/src/KOKKOS/fix_dt_reset_kokkos.cpp index f3435e711e..4c7545cee0 100644 --- a/src/KOKKOS/fix_dt_reset_kokkos.cpp +++ b/src/KOKKOS/fix_dt_reset_kokkos.cpp @@ -113,7 +113,7 @@ void FixDtResetKokkos::end_of_step() update->dt = dt; update->dt_default = 0; if (force->pair) force->pair->reset_dt(); - for (int i = 0; i < modify->nfix; i++) modify->fix[i]->reset_dt(); + for (auto &ifix : modify->get_fix_list()) ifix->reset_dt(); output->reset_dt(); } diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp index 4cfe440b1f..b749590779 100644 --- a/src/KOKKOS/neigh_bond_kokkos.cpp +++ b/src/KOKKOS/neigh_bond_kokkos.cpp @@ -112,9 +112,8 @@ void NeighBondKokkos::init_topology_kk() { int i,m; int bond_off = 0; int angle_off = 0; - for (i = 0; i < modify->nfix; i++) - if ((strcmp(modify->fix[i]->style,"shake") == 0) - || (strcmp(modify->fix[i]->style,"rattle") == 0)) + for (const auto &ifix : modify->get_fix_list()) + if (utils::strmatch(ifix->style,"^shake") || utils::strmatch(ifix->style,"^rattle")) bond_off = angle_off = 1; if (force->bond && force->bond_match("quartic")) bond_off = 1; From 69d82b9bc6dbbbfed3a31e9f55891f906d33f22e Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Fri, 13 Oct 2023 12:47:46 -0600 Subject: [PATCH 094/107] Remove unused variable --- src/KOKKOS/atom_kokkos.cpp | 1 - src/KOKKOS/atom_kokkos.h | 1 - 2 files changed, 2 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index e97213cb4b..554afec210 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -46,7 +46,6 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp) h_tag_max = Kokkos::subview(h_tag_min_max,1); nprop_atom = 0; - prop_atom = nullptr; fix_prop_atom = nullptr; } diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 26ea7da296..21a9aeebbd 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -27,7 +27,6 @@ class AtomKokkos : public Atom { public: bool sort_classic; int nprop_atom; - int* prop_atom; FixPropertyAtomKokkos** fix_prop_atom; DAT::tdual_tagint_1d k_tag; From 65bd04260bc73ba2472b0ec59f96c98cbff72296 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Fri, 13 Oct 2023 14:50:03 -0400 Subject: [PATCH 095/107] bugfix for modernization --- src/KOKKOS/atom_kokkos.cpp | 5 +++-- src/KOKKOS/atom_kokkos.h | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index e97213cb4b..bc393b29d8 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -46,7 +46,6 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp) h_tag_max = Kokkos::subview(h_tag_min_max,1); nprop_atom = 0; - prop_atom = nullptr; fix_prop_atom = nullptr; } @@ -132,16 +131,18 @@ void AtomKokkos::init() void AtomKokkos::update_property_atom() { + nprop_atom = 0; std::vector prop_atom_fixes; for (auto &ifix : modify->get_fix_by_style("^property/atom")) { if (!ifix->kokkosable) error->all(FLERR, "KOKKOS package requires a Kokkos-enabled version of fix property/atom"); + ++nprop_atom; prop_atom_fixes.push_back(ifix); } delete[] fix_prop_atom; - fix_prop_atom = new FixPropertyAtomKokkos *[prop_atom_fixes.size()]; + fix_prop_atom = new FixPropertyAtomKokkos *[nprop_atom]; int n = 0; for (auto &ifix : prop_atom_fixes) diff --git a/src/KOKKOS/atom_kokkos.h b/src/KOKKOS/atom_kokkos.h index 26ea7da296..21a9aeebbd 100644 --- a/src/KOKKOS/atom_kokkos.h +++ b/src/KOKKOS/atom_kokkos.h @@ -27,7 +27,6 @@ class AtomKokkos : public Atom { public: bool sort_classic; int nprop_atom; - int* prop_atom; FixPropertyAtomKokkos** fix_prop_atom; DAT::tdual_tagint_1d k_tag; From f93ff98728ae788f1a2254d6f25271c0ce826dcb Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 16 Oct 2023 11:20:06 -0600 Subject: [PATCH 096/107] Fix issues with trim lists --- src/KOKKOS/neighbor_kokkos.cpp | 3 ++- src/KOKKOS/npair_trim_kokkos.cpp | 12 ++++++------ src/npair_trim.cpp | 8 ++++++-- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index 0b40bce841..efb1247560 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -308,7 +308,8 @@ void NeighborKokkos::build_kokkos(int topoflag) for (i = 0; i < npair_perpetual; i++) { m = plist[i]; if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK); - if (!lists[m]->copy) lists[m]->grow(nlocal,nall); + if (!lists[m]->copy || lists[m]->trim || lists[m]->kk2cpu) + lists[m]->grow(nlocal,nall); neigh_pair[m]->build_setup(); neigh_pair[m]->build(lists[m]); } diff --git a/src/KOKKOS/npair_trim_kokkos.cpp b/src/KOKKOS/npair_trim_kokkos.cpp index 97931bf250..d04d8676d7 100644 --- a/src/KOKKOS/npair_trim_kokkos.cpp +++ b/src/KOKKOS/npair_trim_kokkos.cpp @@ -62,8 +62,8 @@ void NPairTrimKokkos::trim_to_kokkos(NeighList *list) d_ilist_copy = k_list_copy->d_ilist; d_numneigh_copy = k_list_copy->d_numneigh; d_neighbors_copy = k_list_copy->d_neighbors; - int inum_copy = list->listcopy->inum; - if (list->ghost) inum_copy += list->listcopy->gnum; + int inum_trim = list->listcopy->inum; + if (list->ghost) inum_trim += list->listcopy->gnum; NeighListKokkos* k_list = static_cast*>(list); k_list->maxneighs = k_list_copy->maxneighs; // simple, but could be made more memory efficient @@ -75,7 +75,7 @@ void NPairTrimKokkos::trim_to_kokkos(NeighList *list) // loop over parent list and trim copymode = 1; - Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_copy),*this); + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum_trim),*this); copymode = 0; list->inum = k_list_copy->inum; @@ -132,8 +132,8 @@ void NPairTrimKokkos::trim_to_cpu(NeighList *list) int inum = listcopy->inum; int gnum = listcopy->gnum; - int inum_all = inum; - if (list->ghost) inum_all += gnum; + int inum_trim = inum; + if (list->ghost) inum_trim += gnum; auto h_ilist = listcopy_kk->k_ilist.h_view; auto h_numneigh = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_numneigh); auto h_neighbors = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_neighbors); @@ -151,7 +151,7 @@ void NPairTrimKokkos::trim_to_cpu(NeighList *list) MyPage *ipage = list->ipage; ipage->reset(); - for (int ii = 0; ii < inum_all; ii++) { + for (int ii = 0; ii < inum_trim; ii++) { int n = 0; neighptr = ipage->vget(); diff --git a/src/npair_trim.cpp b/src/npair_trim.cpp index 14974d72ab..a4b6c1c6a1 100644 --- a/src/npair_trim.cpp +++ b/src/npair_trim.cpp @@ -50,11 +50,15 @@ void NPairTrim::build(NeighList *list) int *numneigh_copy = listcopy->numneigh; int **firstneigh_copy = listcopy->firstneigh; int inum = listcopy->inum; + int gnum = listcopy->gnum; list->inum = inum; - list->gnum = listcopy->gnum; + list->gnum = gnum; - for (ii = 0; ii < inum; ii++) { + int inum_trim = inum; + if (list->ghost) inum_trim += gnum; + + for (ii = 0; ii < inum_trim; ii++) { n = 0; neighptr = ipage->vget(); From 902e996f2ab23553312753e813613007b94b414b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 16 Oct 2023 13:51:16 -0400 Subject: [PATCH 097/107] restore neighbor list trimming support to REBO family of pair styles --- src/MANYBODY/pair_airebo.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp index e34283f71c..129b9d2218 100644 --- a/src/MANYBODY/pair_airebo.cpp +++ b/src/MANYBODY/pair_airebo.cpp @@ -59,7 +59,6 @@ PairAIREBO::PairAIREBO(LAMMPS *lmp) nextra = 3; pvector = new double[nextra]; - trim_flag = 0; // workaround maxlocal = 0; REBO_numneigh = nullptr; REBO_firstneigh = nullptr; From ad1400ac71ba8b6f305275e97817539c04e054c6 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 16 Oct 2023 12:33:21 -0600 Subject: [PATCH 098/107] Fix broken example --- examples/mliap/in.mliap.quadratic.compute | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mliap/in.mliap.quadratic.compute b/examples/mliap/in.mliap.quadratic.compute index 929dbf3824..cc9ad331b5 100644 --- a/examples/mliap/in.mliap.quadratic.compute +++ b/examples/mliap/in.mliap.quadratic.compute @@ -65,7 +65,7 @@ compute bsum2 snapgroup2 reduce sum c_b[*] # fix bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector compute vbsum all reduce sum c_vb[*] # fix vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector -variable db_2_100 equal c_db[2][100] +variable db_2_100 equal C_db[2][100] # test output: 1: total potential energy # 2: xy component of stress tensor From 462a3935fea188aea0863f300a65fad0b8e35b1b Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 16 Oct 2023 14:07:42 -0600 Subject: [PATCH 099/107] Port bugfix to Kokkos --- src/KOKKOS/npair_halffull_kokkos.cpp | 17 +++ src/KOKKOS/npair_halffull_kokkos.h | 4 +- src/KOKKOS/npair_kokkos.cpp | 164 ++++++++++++++++++++------- src/KOKKOS/npair_kokkos.h | 6 +- 4 files changed, 143 insertions(+), 48 deletions(-) diff --git a/src/KOKKOS/npair_halffull_kokkos.cpp b/src/KOKKOS/npair_halffull_kokkos.cpp index ec17cec844..bc2549aa8d 100644 --- a/src/KOKKOS/npair_halffull_kokkos.cpp +++ b/src/KOKKOS/npair_halffull_kokkos.cpp @@ -18,6 +18,7 @@ #include "atom_masks.h" #include "atom_vec.h" #include "domain.h" +#include "force.h" #include "neigh_list_kokkos.h" #include @@ -66,6 +67,9 @@ void NPairHalffullKokkos::build(NeighList *list) d_numneigh = k_list->d_numneigh; d_neighbors = k_list->d_neighbors; + delta = 0.01 * force->angstrom; + triclinic = domain->triclinic; + // loop over parent full list copymode = 1; @@ -92,6 +96,11 @@ void NPairHalffullKokkos::operator()(TagNPairHalffullCom } // loop over full neighbor list + // use i < j < nlocal to eliminate half the local/local interactions + // for triclinic, must use delta to eliminate half the local/ghost interactions + // cannot use I/J exact coord comparision as for orthog + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon const int jnum = d_numneigh_full(i); const AtomNeighbors neighbors_i = AtomNeighbors(&d_neighbors(i,0),d_numneigh(i), @@ -103,6 +112,14 @@ void NPairHalffullKokkos::operator()(TagNPairHalffullCom if (NEWTON) { if (j < nlocal) { if (i > j) continue; + } else if (triclinic) { + if (fabs(x(j,2)-ztmp) > delta) { + if (x(j,2) < ztmp) continue; + } else if (fabs(x(j,1)-ytmp) > delta) { + if (x(j,1) < ytmp) continue; + } else { + if (x(j,0) < xtmp) continue; + } } else { if (x(j,2) < ztmp) continue; if (x(j,2) == ztmp) { diff --git a/src/KOKKOS/npair_halffull_kokkos.h b/src/KOKKOS/npair_halffull_kokkos.h index c5a09f0b62..1249a9ce8a 100644 --- a/src/KOKKOS/npair_halffull_kokkos.h +++ b/src/KOKKOS/npair_halffull_kokkos.h @@ -257,8 +257,8 @@ class NPairHalffullKokkos : public NPair { void operator()(TagNPairHalffullCompute, const int&) const; private: - int nlocal; - double cutsq_custom; + int nlocal,triclinic; + double cutsq_custom,delta; typename AT::t_x_array_randomread x; diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 06567cbeb6..8201ae028b 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -155,6 +155,8 @@ void NPairKokkos::build(NeighList *list_) list->grow(nall); + const double delta = 0.01 * force->angstrom; + NeighborKokkosExecute data(*list, k_cutneighsq.view(), @@ -176,7 +178,7 @@ void NPairKokkos::build(NeighList *list_) atomKK->molecular, nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo, bininvx,bininvy,bininvz, - exclude, nex_type, + delta, exclude, nex_type, k_ex1_type.view(), k_ex2_type.view(), k_ex_type.view(), @@ -239,7 +241,7 @@ void NPairKokkos::build(NeighList *list_) if (GHOST) { // assumes newton off - NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor); // temporarily disable team policy for ghost due to known bug @@ -261,7 +263,7 @@ void NPairKokkos::build(NeighList *list_) //#endif } else { if (SIZE) { - NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor); + NPairKokkosBuildFunctorSize f(data,atoms_per_bin * 7 * sizeof(X_FLOAT) * factor); #ifdef LMP_KOKKOS_GPU if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; @@ -279,7 +281,7 @@ void NPairKokkos::build(NeighList *list_) Kokkos::parallel_for(nall, f); #endif } else { - NPairKokkosBuildFunctor f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); + NPairKokkosBuildFunctor f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor); #ifdef LMP_KOKKOS_GPU if (ExecutionSpaceFromDevice::space == Device) { int team_size = atoms_per_bin*factor; @@ -414,6 +416,7 @@ void NeighborKokkosExecute:: const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); const int itype = type(i); + const tagint itag = tag(i); const int ibin = c_atom2bin(i); @@ -484,13 +487,29 @@ void NeighborKokkosExecute:: if (HalfNeigh && !Newton && j <= i) continue; if (!HalfNeigh && j == i) continue; + + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon + if (HalfNeigh && Newton && Tri) { - if (x(j,2) < ztmp) continue; - if (x(j,2) == ztmp) { - if (x(j,1) < ytmp) continue; - if (x(j,1) == ytmp) { - if (x(j,0) < xtmp) continue; - if (x(j,0) == xtmp && j <= i) continue; + if (j <= i) continue; + if (j >= nlocal) { + const tagint jtag = tag(j); + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x(j,2)-ztmp) > delta) { + if (x(j,2) < ztmp) continue; + } else if (fabs(x(j,1)-ytmp) > delta) { + if (x(j,1) < ytmp) continue; + } else { + if (x(j,0) < xtmp) continue; + } } } } @@ -568,8 +587,9 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic size_t sharedsize) const { auto* sharedmem = static_cast(dev.team_shmem().get_shmem(sharedsize)); - /* loop over atoms in i's bin, - */ + + // loop over atoms in i's bin + const int atoms_per_bin = c_bins.extent(1); const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin; const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); @@ -579,15 +599,14 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic if (ibin >= mbins) return; - X_FLOAT* other_x = sharedmem + 5*atoms_per_bin*MY_BIN; - int* other_id = (int*) &other_x[4 * atoms_per_bin]; + X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN; + int* other_id = (int*) &other_x[5 * atoms_per_bin]; int bincount_current = c_bincount[ibin]; for (int kk = 0; kk < TEAMS_PER_BIN; kk++) { const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; - /* if necessary, goto next page and add pages */ int n = 0; @@ -608,6 +627,7 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; other_x[MY_II + 3 * atoms_per_bin] = itype; + other_x[MY_II + 4 * atoms_per_bin] = itag; } other_id[MY_II] = i; @@ -695,6 +715,7 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic other_x[MY_II + atoms_per_bin] = x(j, 1); other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); other_x[MY_II + 3 * atoms_per_bin] = type(j); + other_x[MY_II + 4 * atoms_per_bin] = tag(j); } other_id[MY_II] = j; @@ -708,13 +729,29 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic if (HalfNeigh && !Newton && j <= i) continue; if (!HalfNeigh && j == i) continue; + + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon + if (HalfNeigh && Newton && Tri) { - if (x(j,2) < ztmp) continue; - if (x(j,2) == ztmp) { - if (x(j,1) < ytmp) continue; - if (x(j,1) == ytmp) { - if (x(j,0) < xtmp) continue; - if (x(j,0) == xtmp && j <= i) continue; + if (j <= i) continue; + if (j >= nlocal) { + const tagint jtag = other_x[m + 4 * atoms_per_bin]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x(j,2)-ztmp) > delta) { + if (x(j,2) < ztmp) continue; + } else if (fabs(x(j,1)-ytmp) > delta) { + if (x(j,1) < ytmp) continue; + } else { + if (x(j,0) < xtmp) continue; + } } } } @@ -794,6 +831,7 @@ void NeighborKokkosExecute:: const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); const int itype = type(i); + const tagint itag = tag(i); const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; @@ -905,6 +943,7 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team size_t sharedsize) const { auto* sharedmem = static_cast(dev.team_shmem().get_shmem(sharedsize)); + // loop over atoms in i's bin const int atoms_per_bin = c_bins.extent(1); @@ -916,8 +955,8 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team if (ibin >= mbins) return; - X_FLOAT* other_x = sharedmem + 5*atoms_per_bin*MY_BIN; - int* other_id = (int*) &other_x[4 * atoms_per_bin]; + X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN; + int* other_id = (int*) &other_x[5 * atoms_per_bin]; int bincount_current = c_bincount[ibin]; @@ -944,6 +983,7 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; other_x[MY_II + 3 * atoms_per_bin] = itype; + other_x[MY_II + 4 * atoms_per_bin] = itag; } other_id[MY_II] = i; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) @@ -999,6 +1039,7 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team other_x[MY_II + atoms_per_bin] = x(j, 1); other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); other_x[MY_II + 3 * atoms_per_bin] = type(j); + other_x[MY_II + 4 * atoms_per_bin] = tag(j); } other_id[MY_II] = j; @@ -1084,6 +1125,7 @@ void NeighborKokkosExecute:: const X_FLOAT ztmp = x(i, 2); const X_FLOAT radi = radius(i); const int itype = type(i); + const tagint itag = tag(i); const int ibin = c_atom2bin(i); @@ -1167,13 +1209,29 @@ void NeighborKokkosExecute:: if (HalfNeigh && !Newton && j <= i) continue; if (!HalfNeigh && j == i) continue; + + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon + if (HalfNeigh && Newton && Tri) { - if (x(j,2) < ztmp) continue; - if (x(j,2) == ztmp) { - if (x(j,1) < ytmp) continue; - if (x(j,1) == ytmp) { - if (x(j,0) < xtmp) continue; - if (x(j,0) == xtmp && j <= i) continue; + if (j <= i) continue; + if (j >= nlocal) { + const tagint jtag = tag(j); + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x(j,2)-ztmp) > delta) { + if (x(j,2) < ztmp) continue; + } else if (fabs(x(j,1)-ytmp) > delta) { + if (x(j,1) < ytmp) continue; + } else { + if (x(j,0) < xtmp) continue; + } } } } @@ -1245,8 +1303,9 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP size_t sharedsize) const { auto* sharedmem = static_cast(dev.team_shmem().get_shmem(sharedsize)); - /* loop over atoms in i's bin, - */ + + // loop over atoms in i's bin + const int atoms_per_bin = c_bins.extent(1); const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin; const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size(); @@ -1256,15 +1315,14 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP if (ibin >= mbins) return; - X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN; - int* other_id = (int*) &other_x[5 * atoms_per_bin]; + X_FLOAT* other_x = sharedmem + 7*atoms_per_bin*MY_BIN; + int* other_id = (int*) &other_x[6 * atoms_per_bin]; int bincount_current = c_bincount[ibin]; for (int kk = 0; kk < TEAMS_PER_BIN; kk++) { const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size(); const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1; - /* if necessary, goto next page and add pages */ int n = 0; @@ -1288,7 +1346,8 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; other_x[MY_II + 3 * atoms_per_bin] = itype; - other_x[MY_II + 4 * atoms_per_bin] = radi; + other_x[MY_II + 4 * atoms_per_bin] = itag; + other_x[MY_II + 5 * atoms_per_bin] = radi; } other_id[MY_II] = i; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) @@ -1323,7 +1382,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; + const X_FLOAT radsum = radi + other_x[m + 5 * atoms_per_bin]; const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); if (rsq <= cutsq) { @@ -1380,7 +1439,8 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP other_x[MY_II + atoms_per_bin] = x(j, 1); other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); other_x[MY_II + 3 * atoms_per_bin] = type(j); - other_x[MY_II + 4 * atoms_per_bin] = radius(j); + other_x[MY_II + 4 * atoms_per_bin] = tag(j); + other_x[MY_II + 5 * atoms_per_bin] = radius(j); } other_id[MY_II] = j; @@ -1394,13 +1454,29 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP if (HalfNeigh && !Newton && j <= i) continue; if (!HalfNeigh && j == i) continue; + + // for triclinic, bin stencil is full in all 3 dims + // must use itag/jtag to eliminate half the I/J interactions + // cannot use I/J exact coord comparision + // b/c transforming orthog -> lambda -> orthog for ghost atoms + // with an added PBC offset can shift all 3 coords by epsilon + if (HalfNeigh && Newton && Tri) { - if (x(j,2) < ztmp) continue; - if (x(j,2) == ztmp) { - if (x(j,1) < ytmp) continue; - if (x(j,1) == ytmp) { - if (x(j,0) < xtmp) continue; - if (x(j,0) == xtmp && j <= i) continue; + if (j <= i) continue; + if (j >= nlocal) { + const tagint jtag = other_x[m + 4 * atoms_per_bin]; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (fabs(x(j,2)-ztmp) > delta) { + if (x(j,2) < ztmp) continue; + } else if (fabs(x(j,1)-ytmp) > delta) { + if (x(j,1) < ytmp) continue; + } else { + if (x(j,0) < xtmp) continue; + } } } } @@ -1412,7 +1488,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; + const X_FLOAT radsum = radi + other_x[m + 5 * atoms_per_bin]; const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); if (rsq <= cutsq) { diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 4427012926..fe5484a771 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -189,6 +189,8 @@ class NeighborKokkosExecute public: NeighListKokkos neigh_list; + const double delta; + // data from Neighbor class const typename AT::t_xfloat_2d_randomread cutneighsq; @@ -282,7 +284,7 @@ class NeighborKokkosExecute const int & _mbinx,const int & _mbiny,const int & _mbinz, const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo, const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz, - const int & _exclude,const int & _nex_type, + const double &_delta,const int & _exclude,const int & _nex_type, const typename AT::t_int_1d_const & _ex1_type, const typename AT::t_int_1d_const & _ex2_type, const typename AT::t_int_2d_const & _ex_type, @@ -301,7 +303,7 @@ class NeighborKokkosExecute const typename ArrayTypes::t_int_scalar _h_resize, const typename AT::t_int_scalar _new_maxneighs, const typename ArrayTypes::t_int_scalar _h_new_maxneighs): - neigh_list(_neigh_list), cutneighsq(_cutneighsq),exclude(_exclude), + neigh_list(_neigh_list), cutneighsq(_cutneighsq),delta(_delta),exclude(_exclude), nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type), ex_type(_ex_type),nex_group(_nex_group), ex1_bit(_ex1_bit),ex2_bit(_ex2_bit), From 8c7493d02a9ce2f2f9171ac9a26fd155703f7a5f Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 16 Oct 2023 15:11:37 -0600 Subject: [PATCH 100/107] Fix more broken examples --- examples/mliap/in.mliap.snap.compute | 2 +- examples/snap/in.grid.snap | 2 +- examples/snap/in.grid.tri | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/mliap/in.mliap.snap.compute b/examples/mliap/in.mliap.snap.compute index 4cfccedbdf..c49365f55f 100644 --- a/examples/mliap/in.mliap.snap.compute +++ b/examples/mliap/in.mliap.snap.compute @@ -65,7 +65,7 @@ compute bsum2 snapgroup2 reduce sum c_b[*] # fix bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector compute vbsum all reduce sum c_vb[*] # fix vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector -variable db_2_25 equal c_db[2][25] +variable db_2_25 equal C_db[2][25] thermo 100 diff --git a/examples/snap/in.grid.snap b/examples/snap/in.grid.snap index 08c95a004f..d37b6ffde4 100644 --- a/examples/snap/in.grid.snap +++ b/examples/snap/in.grid.snap @@ -67,7 +67,7 @@ compute mygridlocal all sna/grid/local grid ${ngrid} ${ngrid} ${ngrid} & # define output -variable B5atom equal c_b[2][5] +variable B5atom equal C_b[2][5] variable B5grid equal c_mygrid[8][8] variable rmse_global equal "sqrt( & diff --git a/examples/snap/in.grid.tri b/examples/snap/in.grid.tri index 5283957eb8..b34c9dba30 100644 --- a/examples/snap/in.grid.tri +++ b/examples/snap/in.grid.tri @@ -87,7 +87,7 @@ compute mygridlocal all sna/grid/local grid ${ngridx} ${ngridy} ${ngridz} & # define output -variable B5atom equal c_b[7][5] +variable B5atom equal C_b[7][5] variable B5grid equal c_mygrid[13][8] # do not compare x,y,z because assignment of ids From dc67f2527061b036da95c8fd151eb25da57abde3 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Mon, 16 Oct 2023 15:17:46 -0600 Subject: [PATCH 101/107] Another tweak --- examples/snap/in.grid.snap | 10 +++++----- examples/snap/in.grid.tri | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/snap/in.grid.snap b/examples/snap/in.grid.snap index d37b6ffde4..da48957d97 100644 --- a/examples/snap/in.grid.snap +++ b/examples/snap/in.grid.snap @@ -74,11 +74,11 @@ variable rmse_global equal "sqrt( & (c_mygrid[8][1] - x[2])^2 + & (c_mygrid[8][2] - y[2])^2 + & (c_mygrid[8][3] - z[2])^2 + & - (c_mygrid[8][4] - c_b[2][1])^2 + & - (c_mygrid[8][5] - c_b[2][2])^2 + & - (c_mygrid[8][6] - c_b[2][3])^2 + & - (c_mygrid[8][7] - c_b[2][4])^2 + & - (c_mygrid[8][8] - c_b[2][5])^2 & + (c_mygrid[8][4] - C_b[2][1])^2 + & + (c_mygrid[8][5] - C_b[2][2])^2 + & + (c_mygrid[8][6] - C_b[2][3])^2 + & + (c_mygrid[8][7] - C_b[2][4])^2 + & + (c_mygrid[8][8] - C_b[2][5])^2 & )" thermo_style custom step v_B5atom v_B5grid v_rmse_global diff --git a/examples/snap/in.grid.tri b/examples/snap/in.grid.tri index b34c9dba30..95a14f3bb4 100644 --- a/examples/snap/in.grid.tri +++ b/examples/snap/in.grid.tri @@ -94,11 +94,11 @@ variable B5grid equal c_mygrid[13][8] # to atoms is not unnique for different processor grids variable rmse_global equal "sqrt( & - (c_mygrid[13][4] - c_b[7][1])^2 + & - (c_mygrid[13][5] - c_b[7][2])^2 + & - (c_mygrid[13][6] - c_b[7][3])^2 + & - (c_mygrid[13][7] - c_b[7][4])^2 + & - (c_mygrid[13][8] - c_b[7][5])^2 & + (c_mygrid[13][4] - C_b[7][1])^2 + & + (c_mygrid[13][5] - C_b[7][2])^2 + & + (c_mygrid[13][6] - C_b[7][3])^2 + & + (c_mygrid[13][7] - C_b[7][4])^2 + & + (c_mygrid[13][8] - C_b[7][5])^2 & )" thermo_style custom step v_B5atom v_B5grid v_rmse_global From 4ae0fc83124eff9e400ea43166fc21b4d1ac1ca1 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Mon, 16 Oct 2023 15:47:42 -0600 Subject: [PATCH 102/107] Fix GPU compile --- src/KOKKOS/npair_kokkos.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 8201ae028b..83e60768cd 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -614,6 +614,7 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic X_FLOAT ytmp; X_FLOAT ztmp; int itype; + tagint itag; const int index = (i >= 0 && i < nlocal) ? i : 0; const AtomNeighbors neighbors_i = neigh_transpose ? neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); @@ -623,6 +624,7 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic ytmp = x(i, 1); ztmp = x(i, 2); itype = type(i); + itag = tag(i); other_x[MY_II] = xtmp; other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; @@ -970,6 +972,7 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team X_FLOAT ytmp; X_FLOAT ztmp; int itype; + tagint itag; const int index = (i >= 0 && i < nall) ? i : 0; const AtomNeighbors neighbors_i = neigh_transpose ? neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); @@ -979,6 +982,7 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team ytmp = x(i, 1); ztmp = x(i, 2); itype = type(i); + itag = tag(i); other_x[MY_II] = xtmp; other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; @@ -1331,6 +1335,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP X_FLOAT ztmp; X_FLOAT radi; int itype; + tagint itag; const int index = (i >= 0 && i < nlocal) ? i : 0; const AtomNeighbors neighbors_i = neigh_transpose ? neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); @@ -1342,6 +1347,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP ztmp = x(i, 2); radi = radius(i); itype = type(i); + itag = tag(i); other_x[MY_II] = xtmp; other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; From 3b4fff4164a4dc73433b11f9e7fac65c279a2fb2 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 17 Oct 2023 12:01:01 -0600 Subject: [PATCH 103/107] Need to sync tag, add template param --- src/KOKKOS/npair_halffull_kokkos.cpp | 41 +++-- src/KOKKOS/npair_halffull_kokkos.h | 249 +++++++++++++++++++-------- src/KOKKOS/npair_kokkos.cpp | 37 ++-- 3 files changed, 223 insertions(+), 104 deletions(-) diff --git a/src/KOKKOS/npair_halffull_kokkos.cpp b/src/KOKKOS/npair_halffull_kokkos.cpp index bc2549aa8d..ddd7362c4e 100644 --- a/src/KOKKOS/npair_halffull_kokkos.cpp +++ b/src/KOKKOS/npair_halffull_kokkos.cpp @@ -27,8 +27,8 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -template -NPairHalffullKokkos::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) { +template +NPairHalffullKokkos::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) { atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; } @@ -42,13 +42,19 @@ NPairHalffullKokkos::NPairHalffullKokkos(LAMMPS *lmp) : if ghost, also store neighbors of ghost atoms & set inum,gnum correctly ------------------------------------------------------------------------- */ -template -void NPairHalffullKokkos::build(NeighList *list) +template +void NPairHalffullKokkos::build(NeighList *list) { if (NEWTON || TRIM) { x = atomKK->k_x.view(); atomKK->sync(execution_space,X_MASK); } + + if (TRI) { + tag = atomKK->k_tag.view(); + atomKK->sync(execution_space,TAG_MASK); + } + nlocal = atom->nlocal; cutsq_custom = cutoff_custom*cutoff_custom; @@ -68,7 +74,6 @@ void NPairHalffullKokkos::build(NeighList *list) d_neighbors = k_list->d_neighbors; delta = 0.01 * force->angstrom; - triclinic = domain->triclinic; // loop over parent full list @@ -82,9 +87,9 @@ void NPairHalffullKokkos::build(NeighList *list) k_list->k_ilist.template modify(); } -template +template KOKKOS_INLINE_FUNCTION -void NPairHalffullKokkos::operator()(TagNPairHalffullCompute, const int &ii) const { +void NPairHalffullKokkos::operator()(TagNPairHalffullCompute, const int &ii) const { int n = 0; const int i = d_ilist_full(ii); @@ -112,7 +117,7 @@ void NPairHalffullKokkos::operator()(TagNPairHalffullCom if (NEWTON) { if (j < nlocal) { if (i > j) continue; - } else if (triclinic) { + } else if (TRI) { if (fabs(x(j,2)-ztmp) > delta) { if (x(j,2) < ztmp) continue; } else if (fabs(x(j,1)-ytmp) > delta) { @@ -158,14 +163,18 @@ void NPairHalffullKokkos::operator()(TagNPairHalffullCom } namespace LAMMPS_NS { -template class NPairHalffullKokkos; -template class NPairHalffullKokkos; -template class NPairHalffullKokkos; -template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; #ifdef LMP_KOKKOS_GPU -template class NPairHalffullKokkos; -template class NPairHalffullKokkos; -template class NPairHalffullKokkos; -template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; +template class NPairHalffullKokkos; #endif } diff --git a/src/KOKKOS/npair_halffull_kokkos.h b/src/KOKKOS/npair_halffull_kokkos.h index 1249a9ce8a..7e6c28aaba 100644 --- a/src/KOKKOS/npair_halffull_kokkos.h +++ b/src/KOKKOS/npair_halffull_kokkos.h @@ -16,53 +16,79 @@ // Trim off -// Newton +// Newton, no triclinic -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonDevice; NPairStyle(halffull/newton/kk/device, NPairKokkosHalffullNewtonDevice, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | - NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE); + NP_ORTHO | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonHost; NPairStyle(halffull/newton/kk/host, NPairKokkosHalffullNewtonHost, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | - NP_ORTHO | NP_TRI | NP_KOKKOS_HOST); + NP_ORTHO | NP_KOKKOS_HOST); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonDevice; NPairStyle(halffull/newton/skip/kk/device, NPairKokkosHalffullNewtonDevice, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | - NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE); + NP_ORTHO | NP_SKIP | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonHost; NPairStyle(halffull/newton/skip/kk/host, NPairKokkosHalffullNewtonHost, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_SKIP | NP_KOKKOS_HOST); + +// Newton, triclinic + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriDevice; +NPairStyle(halffull/newton/tri/kk/device, + NPairKokkosHalffullNewtonTriDevice, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriHost; +NPairStyle(halffull/newton/tri/kk/host, + NPairKokkosHalffullNewtonTriHost, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_TRI | NP_KOKKOS_HOST); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriDevice; +NPairStyle(halffull/newton/tri/skip/kk/device, + NPairKokkosHalffullNewtonTriDevice, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriHost; +NPairStyle(halffull/newton/tri/skip/kk/host, + NPairKokkosHalffullNewtonTriHost, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_HOST); -// Newtoff +// Newtoff (can be triclinic but template param always set to 0) -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffDevice; NPairStyle(halffull/newtoff/kk/device, NPairKokkosHalffullNewtoffDevice, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffHost; NPairStyle(halffull/newtoff/kk/host, NPairKokkosHalffullNewtoffHost, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_KOKKOS_HOST); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffDevice; NPairStyle(halffull/newtoff/skip/kk/device, NPairKokkosHalffullNewtoffDevice, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffHost; NPairStyle(halffull/newtoff/skip/kk/host, NPairKokkosHalffullNewtoffHost, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | @@ -70,166 +96,244 @@ NPairStyle(halffull/newtoff/skip/kk/host, //************ Ghost ************** -// Newton +// Newton, no triclinic -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonGhostDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonDevice; NPairStyle(halffull/newton/ghost/kk/device, - NPairKokkosHalffullNewtonGhostDevice, + NPairKokkosHalffullNewtonDevice, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | - NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE); + NP_ORTHO | NP_GHOST | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonHost; NPairStyle(halffull/newton/ghost/kk/host, NPairKokkosHalffullNewtonHost, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | - NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST); + NP_ORTHO | NP_GHOST | NP_KOKKOS_HOST); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonGhostDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonDevice; NPairStyle(halffull/newton/skip/ghost/kk/device, - NPairKokkosHalffullNewtonGhostDevice, + NPairKokkosHalffullNewtonDevice, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | - NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE); + NP_ORTHO | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonHost; NPairStyle(halffull/newton/skip/ghost/kk/host, NPairKokkosHalffullNewtonHost, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST); + +// Newton, triclinic + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriDevice; +NPairStyle(halffull/newton/tri/ghost/kk/device, + NPairKokkosHalffullNewtonTriDevice, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriHost; +NPairStyle(halffull/newton/tri/ghost/kk/host, + NPairKokkosHalffullNewtonTriHost, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriDevice; +NPairStyle(halffull/newton/tri/skip/ghost/kk/device, + NPairKokkosHalffullNewtonTriDevice, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriHost; +NPairStyle(halffull/newton/tri/skip/ghost/kk/host, + NPairKokkosHalffullNewtonTriHost, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST); -// Newtoff +// Newtoff (can be triclinic but template param always set to 0) -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffGhostDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffDevice; NPairStyle(halffull/newtoff/ghost/kk/device, - NPairKokkosHalffullNewtoffGhostDevice, + NPairKokkosHalffullNewtoffDevice, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffHost; NPairStyle(halffull/newtoff/ghost/kk/host, NPairKokkosHalffullNewtoffHost, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffGhostDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffDevice; NPairStyle(halffull/newtoff/skip/ghost/kk/device, - NPairKokkosHalffullNewtoffGhostDevice, + NPairKokkosHalffullNewtoffDevice, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffHost; NPairStyle(halffull/newtoff/skip/ghost/kk/host, NPairKokkosHalffullNewtoffHost, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST); - //************ Trim ************** -// Newton +// Newton, no triclinic -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimDevice; NPairStyle(halffull/newton/trim/kk/device, NPairKokkosHalffullNewtonTrimDevice, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | - NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE); + NP_ORTHO | NP_TRIM | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimHost; NPairStyle(halffull/newton/trim/kk/host, NPairKokkosHalffullNewtonTrimHost, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_TRIM | NP_KOKKOS_HOST); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimDevice; +NPairStyle(halffull/newton/trim/skip/kk/device, + NPairKokkosHalffullNewtonTrimDevice, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimHost; +NPairStyle(halffull/newton/trim/skip/kk/host, + NPairKokkosHalffullNewtonTrimHost, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST); + +// Newton, triclinic + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriTrimDevice; +NPairStyle(halffull/newton/tri/trim/kk/device, + NPairKokkosHalffullNewtonTriTrimDevice, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriTrimHost; +NPairStyle(halffull/newton/tri/trim/kk/host, + NPairKokkosHalffullNewtonTriTrimHost, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimDevice; -NPairStyle(halffull/newton/skip/trim/kk/device, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriTrimDevice; +NPairStyle(halffull/newton/tri/trim/skip/kk/device, NPairKokkosHalffullNewtonTrimDevice, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimHost; -NPairStyle(halffull/newton/skip/trim/kk/host, - NPairKokkosHalffullNewtonTrimHost, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriTrimHost; +NPairStyle(halffull/newton/tri/trim/skip/kk/host, + NPairKokkosHalffullNewtonTriTrimHost, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST); -// Newtoff +// Newtoff (can be triclinic but template param always set to 0) -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimDevice; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimDevice; NPairStyle(halffull/newtoff/trim/kk/device, NPairKokkosHalffullNewtoffTrimDevice, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimHost; +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimHost; NPairStyle(halffull/newtoff/trim/kk/host, NPairKokkosHalffullNewtoffTrimHost, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimDevice; -NPairStyle(halffull/newtoff/skip/trim/kk/device, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimDevice; +NPairStyle(halffull/newtoff/trim/skip/kk/device, NPairKokkosHalffullNewtoffTrimDevice, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimHost; -NPairStyle(halffull/newtoff/skip/trim/kk/host, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimHost; +NPairStyle(halffull/newtoff/trim/skip/kk/host, NPairKokkosHalffullNewtoffTrimHost, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST); //************ Ghost ************** -// Newton +// Newton, no triclinic -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonGhostTrimDevice; -NPairStyle(halffull/newton/ghost/trim/kk/device, - NPairKokkosHalffullNewtonGhostTrimDevice, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimDevice; +NPairStyle(halffull/newton/tri/trim/ghost/kk/device, + NPairKokkosHalffullNewtonTrimDevice, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimHost; +NPairStyle(halffull/newton/trim/ghost/kk/host, + NPairKokkosHalffullNewtonTrimHost, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimDevice; +NPairStyle(halffull/newton/trim/skip/ghost/kk/device, + NPairKokkosHalffullNewtonTrimDevice, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE); + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimHost; +NPairStyle(halffull/newton/trim/skip/ghost/kk/host, + NPairKokkosHalffullNewtonTrimHost, + NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | + NP_ORTHO | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST); + +// Newton, triclinic + +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriTrimDevice; +NPairStyle(halffull/newton/tri/trim/ghost/kk/device, + NPairKokkosHalffullNewtonTriTrimDevice, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimHost; -NPairStyle(halffull/newton/ghost/trim/kk/host, - NPairKokkosHalffullNewtonTrimHost, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriTrimHost; +NPairStyle(halffull/newton/tri/trim/ghost/kk/host, + NPairKokkosHalffullNewtonTriTrimHost, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonGhostTrimDevice; -NPairStyle(halffull/newton/skip/ghost/trim/kk/device, - NPairKokkosHalffullNewtonGhostTrimDevice, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriTrimDevice; +NPairStyle(halffull/newton/tri/trim/skip/ghost/kk/device, + NPairKokkosHalffullNewtonTriTrimDevice, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTrimHost; -NPairStyle(halffull/newton/skip/ghost/trim/kk/host, - NPairKokkosHalffullNewtonTrimHost, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtonTriTrimHost; +NPairStyle(halffull/newton/tri/trim/skip/ghost/kk/host, + NPairKokkosHalffullNewtonTriTrimHost, NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST); -// Newtoff +// Newtoff (can be triclinic but template param always set to 0) -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffGhostTrimDevice; -NPairStyle(halffull/newtoff/ghost/trim/kk/device, - NPairKokkosHalffullNewtoffGhostTrimDevice, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimDevice; +NPairStyle(halffull/newtoff/trim/ghost/kk/device, + NPairKokkosHalffullNewtoffTrimDevice, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimHost; -NPairStyle(halffull/newtoff/ghost/trim/kk/host, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimHost; +NPairStyle(halffull/newtoff/trim/ghost/kk/host, NPairKokkosHalffullNewtoffTrimHost, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffGhostTrimDevice; -NPairStyle(halffull/newtoff/skip/ghost/trim/kk/device, - NPairKokkosHalffullNewtoffGhostTrimDevice, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimDevice; +NPairStyle(halffull/newtoff/trim/skip/ghost/kk/device, + NPairKokkosHalffullNewtoffTrimDevice, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE); -typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimHost; -NPairStyle(halffull/newtoff/skip/ghost/trim/kk/host, +typedef NPairHalffullKokkos NPairKokkosHalffullNewtoffTrimHost; +NPairStyle(halffull/newtoff/trim/skip/ghost/kk/host, NPairKokkosHalffullNewtoffTrimHost, NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI | NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST); + // clang-format on #else @@ -244,7 +348,7 @@ namespace LAMMPS_NS { struct TagNPairHalffullCompute{}; -template +template class NPairHalffullKokkos : public NPair { public: typedef DeviceType device_type; @@ -261,6 +365,7 @@ class NPairHalffullKokkos : public NPair { double cutsq_custom,delta; typename AT::t_x_array_randomread x; + typename AT::t_tagint_1d_randomread tag; typename AT::t_neighbors_2d_const d_neighbors_full; typename AT::t_int_1d_const d_ilist_full; diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 83e60768cd..4e992fb2d7 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -219,6 +219,8 @@ void NPairKokkos::build(NeighList *list_) atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK); } + if (HALF && NEWTON && TRI) atomKK->sync(Device,TAG_MASK); + data.special_flag[0] = special_flag[0]; data.special_flag[1] = special_flag[1]; data.special_flag[2] = special_flag[2]; @@ -241,7 +243,7 @@ void NPairKokkos::build(NeighList *list_) if (GHOST) { // assumes newton off - NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor); + NPairKokkosBuildFunctorGhost f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor); // temporarily disable team policy for ghost due to known bug @@ -416,7 +418,8 @@ void NeighborKokkosExecute:: const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); const int itype = type(i); - const tagint itag = tag(i); + tagint itag; + if (HalfNeigh && Newton && Tri) itag = tag(i); const int ibin = c_atom2bin(i); @@ -624,12 +627,14 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic ytmp = x(i, 1); ztmp = x(i, 2); itype = type(i); - itag = tag(i); other_x[MY_II] = xtmp; other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; other_x[MY_II + 3 * atoms_per_bin] = itype; - other_x[MY_II + 4 * atoms_per_bin] = itag; + if (HalfNeigh && Newton && Tri) { + itag = tag(i); + other_x[MY_II + 4 * atoms_per_bin] = itag; + } } other_id[MY_II] = i; @@ -717,7 +722,8 @@ void NeighborKokkosExecute::build_ItemGPU(typename Kokkos::TeamPolic other_x[MY_II + atoms_per_bin] = x(j, 1); other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); other_x[MY_II + 3 * atoms_per_bin] = type(j); - other_x[MY_II + 4 * atoms_per_bin] = tag(j); + if (HalfNeigh && Newton && Tri) + other_x[MY_II + 4 * atoms_per_bin] = tag(j); } other_id[MY_II] = j; @@ -833,7 +839,6 @@ void NeighborKokkosExecute:: const X_FLOAT ytmp = x(i, 1); const X_FLOAT ztmp = x(i, 2); const int itype = type(i); - const tagint itag = tag(i); const typename ArrayTypes::t_int_1d_const_um stencil = d_stencil; @@ -957,8 +962,8 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team if (ibin >= mbins) return; - X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN; - int* other_id = (int*) &other_x[5 * atoms_per_bin]; + X_FLOAT* other_x = sharedmem + 5*atoms_per_bin*MY_BIN; + int* other_id = (int*) &other_x[4 * atoms_per_bin]; int bincount_current = c_bincount[ibin]; @@ -972,7 +977,6 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team X_FLOAT ytmp; X_FLOAT ztmp; int itype; - tagint itag; const int index = (i >= 0 && i < nall) ? i : 0; const AtomNeighbors neighbors_i = neigh_transpose ? neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index); @@ -982,12 +986,10 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team ytmp = x(i, 1); ztmp = x(i, 2); itype = type(i); - itag = tag(i); other_x[MY_II] = xtmp; other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; other_x[MY_II + 3 * atoms_per_bin] = itype; - other_x[MY_II + 4 * atoms_per_bin] = itag; } other_id[MY_II] = i; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) @@ -1043,7 +1045,6 @@ void NeighborKokkosExecute::build_ItemGhostGPU(typename Kokkos::Team other_x[MY_II + atoms_per_bin] = x(j, 1); other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); other_x[MY_II + 3 * atoms_per_bin] = type(j); - other_x[MY_II + 4 * atoms_per_bin] = tag(j); } other_id[MY_II] = j; @@ -1129,7 +1130,8 @@ void NeighborKokkosExecute:: const X_FLOAT ztmp = x(i, 2); const X_FLOAT radi = radius(i); const int itype = type(i); - const tagint itag = tag(i); + tagint itag; + if (HalfNeigh && Newton && Tri) itag = tag(i); const int ibin = c_atom2bin(i); @@ -1347,12 +1349,14 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP ztmp = x(i, 2); radi = radius(i); itype = type(i); - itag = tag(i); other_x[MY_II] = xtmp; other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; other_x[MY_II + 3 * atoms_per_bin] = itype; - other_x[MY_II + 4 * atoms_per_bin] = itag; + if (HalfNeigh && Newton && Tri) { + itag = tag(i); + other_x[MY_II + 4 * atoms_per_bin] = itag; + } other_x[MY_II + 5 * atoms_per_bin] = radi; } other_id[MY_II] = i; @@ -1445,7 +1449,8 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP other_x[MY_II + atoms_per_bin] = x(j, 1); other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); other_x[MY_II + 3 * atoms_per_bin] = type(j); - other_x[MY_II + 4 * atoms_per_bin] = tag(j); + if (HalfNeigh && Newton && Tri) + other_x[MY_II + 4 * atoms_per_bin] = tag(j); other_x[MY_II + 5 * atoms_per_bin] = radius(j); } From 750957d585ede258dc025a9091d86cde28ce8c60 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 17 Oct 2023 12:27:35 -0600 Subject: [PATCH 104/107] Remove unused var --- src/KOKKOS/npair_halffull_kokkos.cpp | 5 ----- src/KOKKOS/npair_halffull_kokkos.h | 1 - 2 files changed, 6 deletions(-) diff --git a/src/KOKKOS/npair_halffull_kokkos.cpp b/src/KOKKOS/npair_halffull_kokkos.cpp index ddd7362c4e..c8c4d57fc9 100644 --- a/src/KOKKOS/npair_halffull_kokkos.cpp +++ b/src/KOKKOS/npair_halffull_kokkos.cpp @@ -50,11 +50,6 @@ void NPairHalffullKokkos::build(NeighList *list) atomKK->sync(execution_space,X_MASK); } - if (TRI) { - tag = atomKK->k_tag.view(); - atomKK->sync(execution_space,TAG_MASK); - } - nlocal = atom->nlocal; cutsq_custom = cutoff_custom*cutoff_custom; diff --git a/src/KOKKOS/npair_halffull_kokkos.h b/src/KOKKOS/npair_halffull_kokkos.h index 7e6c28aaba..98526c7fee 100644 --- a/src/KOKKOS/npair_halffull_kokkos.h +++ b/src/KOKKOS/npair_halffull_kokkos.h @@ -365,7 +365,6 @@ class NPairHalffullKokkos : public NPair { double cutsq_custom,delta; typename AT::t_x_array_randomread x; - typename AT::t_tagint_1d_randomread tag; typename AT::t_neighbors_2d_const d_neighbors_full; typename AT::t_int_1d_const d_ilist_full; From c051a4cf2df46de7437e7805e4b5598fded4ca27 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Tue, 17 Oct 2023 14:18:16 -0600 Subject: [PATCH 105/107] Fix perf regression --- src/KOKKOS/npair_kokkos.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 4e992fb2d7..f677b3a1bf 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -1353,11 +1353,11 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP other_x[MY_II + atoms_per_bin] = ytmp; other_x[MY_II + 2 * atoms_per_bin] = ztmp; other_x[MY_II + 3 * atoms_per_bin] = itype; + other_x[MY_II + 4 * atoms_per_bin] = radi; if (HalfNeigh && Newton && Tri) { itag = tag(i); - other_x[MY_II + 4 * atoms_per_bin] = itag; + other_x[MY_II + 5 * atoms_per_bin] = itag; } - other_x[MY_II + 5 * atoms_per_bin] = radi; } other_id[MY_II] = i; #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) @@ -1392,7 +1392,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const X_FLOAT radsum = radi + other_x[m + 5 * atoms_per_bin]; + const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); if (rsq <= cutsq) { @@ -1449,9 +1449,9 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP other_x[MY_II + atoms_per_bin] = x(j, 1); other_x[MY_II + 2 * atoms_per_bin] = x(j, 2); other_x[MY_II + 3 * atoms_per_bin] = type(j); + other_x[MY_II + 4 * atoms_per_bin] = radius(j); if (HalfNeigh && Newton && Tri) - other_x[MY_II + 4 * atoms_per_bin] = tag(j); - other_x[MY_II + 5 * atoms_per_bin] = radius(j); + other_x[MY_II + 5 * atoms_per_bin] = tag(j); } other_id[MY_II] = j; @@ -1475,7 +1475,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP if (HalfNeigh && Newton && Tri) { if (j <= i) continue; if (j >= nlocal) { - const tagint jtag = other_x[m + 4 * atoms_per_bin]; + const tagint jtag = other_x[m + 5 * atoms_per_bin]; if (itag > jtag) { if ((itag+jtag) % 2 == 0) continue; } else if (itag < jtag) { @@ -1499,7 +1499,7 @@ void NeighborKokkosExecute::build_ItemSizeGPU(typename Kokkos::TeamP const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin]; const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin]; const X_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const X_FLOAT radsum = radi + other_x[m + 5 * atoms_per_bin]; + const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin]; const X_FLOAT cutsq = (radsum + skin) * (radsum + skin); if (rsq <= cutsq) { From 45d2a91c6289466f3940571e5845aad527b912a8 Mon Sep 17 00:00:00 2001 From: Maria-Lesniewski Date: Wed, 18 Oct 2023 13:13:37 -0400 Subject: [PATCH 106/107] Barostat fix - see lammps PR 879 and 942 --- src/BOCS/fix_bocs.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/BOCS/fix_bocs.cpp b/src/BOCS/fix_bocs.cpp index d17884855a..52ca948657 100644 --- a/src/BOCS/fix_bocs.cpp +++ b/src/BOCS/fix_bocs.cpp @@ -1024,7 +1024,10 @@ void FixBocs::final_integrate() if (pstat_flag) { if (pstyle == ISO) pressure->compute_scalar(); - else pressure->compute_vector(); + else { + temperature->compute_vector(); + pressure->compute_vector(); + } couple(); pressure->addstep(update->ntimestep+1); } @@ -1961,7 +1964,7 @@ void FixBocs::nhc_press_integrate() int ich,i,pdof; double expfac,factor_etap,kecurrent; double kt = boltz * t_target; - + double lkt_press; // Update masses, to preserve initial freq, if flag set if (omega_mass_flag) { @@ -2006,7 +2009,8 @@ void FixBocs::nhc_press_integrate() } } - double lkt_press = pdof * kt; + if (pstyle == ISO) lkt_press = kt; + else lkt_press = pdof * kt; etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0]; double ncfac = 1.0/nc_pchain; From 27e0a7184954c9b5a2d9c5b3b68c700022c7c050 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 19 Oct 2023 07:44:44 -0600 Subject: [PATCH 107/107] whitespace --- src/BOCS/fix_bocs.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/BOCS/fix_bocs.cpp b/src/BOCS/fix_bocs.cpp index 52ca948657..17bb1af002 100644 --- a/src/BOCS/fix_bocs.cpp +++ b/src/BOCS/fix_bocs.cpp @@ -1025,8 +1025,8 @@ void FixBocs::final_integrate() if (pstat_flag) { if (pstyle == ISO) pressure->compute_scalar(); else { - temperature->compute_vector(); - pressure->compute_vector(); + temperature->compute_vector(); + pressure->compute_vector(); } couple(); pressure->addstep(update->ntimestep+1); @@ -1965,6 +1965,7 @@ void FixBocs::nhc_press_integrate() double expfac,factor_etap,kecurrent; double kt = boltz * t_target; double lkt_press; + // Update masses, to preserve initial freq, if flag set if (omega_mass_flag) {