From bee5afcd0832cba5468696c8a8a5fa5cc7c4ba1f Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 21 Apr 2023 00:22:36 -0500 Subject: [PATCH 01/19] Added yukawa/colloid/kk --- src/KOKKOS/Install.sh | 2 + src/KOKKOS/pair_yukawa_colloid_kokkos.cpp | 271 ++++++++++++++++++++++ src/KOKKOS/pair_yukawa_colloid_kokkos.h | 121 ++++++++++ 3 files changed, 394 insertions(+) create mode 100644 src/KOKKOS/pair_yukawa_colloid_kokkos.cpp create mode 100644 src/KOKKOS/pair_yukawa_colloid_kokkos.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index ede766cbf8..766daaff19 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -361,6 +361,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp action pair_vashishta_kokkos.h pair_vashishta.h action pair_yukawa_kokkos.cpp action pair_yukawa_kokkos.h +action pair_yukawa_colloid_kokkos.cpp pair_yukawa_colloid.cpp +action pair_yukawa_colloid_kokkos.h pair_yukawa_colloid.h action pair_zbl_kokkos.cpp action pair_zbl_kokkos.h action pppm_kokkos.cpp pppm.cpp diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp new file mode 100644 index 0000000000..ca491a3800 --- /dev/null +++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp @@ -0,0 +1,271 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Nguyen (U Chicago) +------------------------------------------------------------------------- */ + +#include "pair_yukawa_colloid_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "neighbor.h" +#include "respa.h" +#include "update.h" + +#include + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +PairYukawaColloidKokkos::PairYukawaColloidKokkos(LAMMPS *lmp) : PairYukawaColloid(lmp) +{ + respa_enable = 0; + + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK | RADIUS_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairYukawaColloidKokkos::~PairYukawaColloidKokkos() +{ + if (copymode) return; + + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_cutsq,cutsq); + } +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template +void PairYukawaColloidKokkos::allocate() +{ + PairYukawaColloid::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view(); + k_params = Kokkos::DualView( + "PairYukawaColloid::params",n+1,n+1); + + params = k_params.template view(); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairYukawaColloidKokkos::init_style() +{ + PairYukawaColloid::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && utils::strmatch(update->integrate_style,"^respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // adjust neighbor list request for KOKKOS + + neighflag = lmp->kokkos->neighflag; + auto request = neighbor->find_request(this); + request->set_kokkos_host(std::is_same::value && + !std::is_same::value); + request->set_kokkos_device(std::is_same::value); + if (neighflag == FULL) request->enable_full(); +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ +// Rewrite this. +template +double PairYukawaColloidKokkos::init_one(int i, int j) +{ + double cutone = PairYukawaColloid::init_one(i,j); + + k_params.h_view(i,j).a = a[i][j]; + k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cutsq = cutone*cutone; + k_params.h_view(j,i) = k_params.h_view(i,j); + + if (i(); + k_params.template modify(); + + return cutone; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairYukawaColloidKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + ev_init(eflag,vflag,0); + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"pair:vatom"); + d_vatom = k_vatom.view(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync(); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + c_x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + radius = atomKK->k_radius.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + + // loop over neighbors of my atoms + + EV_FLOAT ev = pair_compute,void >( + this,(NeighListKokkos*)list); + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } +} + + + +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairYukawaColloidKokkos:: +compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT radi = radius[i]; + const F_FLOAT radj = radius[j]; + const F_FLOAT rr = sqrt(rsq); + // Fetch the params either off the stack or from some mapped memory? + const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a + : params(itype,jtype).a; + + // U = a * exp(-kappa*r-(radi+radj)) / kappa + // f = a * exp(-kappa*r) + // f/r = a * exp(-kappa*r) / r + const F_FLOAT rinv = 1.0 / rr; + const F_FLOAT rinv2 = rinv*rinv; + const F_FLOAT screening = exp(-kappa*(rr-(radi+radj))); + const F_FLOAT forceyukawa = aa * screening; + const F_FLOAT fpair = forceyukawa * rinv; + + return fpair; +} + +template +template +KOKKOS_INLINE_FUNCTION +F_FLOAT PairYukawaColloidKokkos:: +compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT radi = radius[i]; + const F_FLOAT radj = radius[j]; + const F_FLOAT rr = sqrt(rsq); + const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a + : params(itype,jtype).a; + const F_FLOAT offset = STACKPARAMS ? m_params[itype][jtype].offset + : params(itype,jtype).offset; + + // U = a * exp(-kappa*r) / kappa + const F_FLOAT rinv = 1.0 / rr; + const F_FLOAT screening = exp(-kappa*(rr-(radi+radj))); + + return aa / kappa * screening - offset; +} + + +namespace LAMMPS_NS { +template class PairYukawaColloidKokkos; +#ifdef LMP_KOKKOS_GPU +template class PairYukawaColloidKokkos; +#endif +} diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.h b/src/KOKKOS/pair_yukawa_colloid_kokkos.h new file mode 100644 index 0000000000..060b621a8a --- /dev/null +++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.h @@ -0,0 +1,121 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(yukawa/colloid/kk,PairYukawaColloidKokkos); +PairStyle(yukawa/colloid/kk/device,PairYukawaColloidKokkos); +PairStyle(yukawa/colloid/kk/host,PairYukawaColloidKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_PAIR_YUKAWA_COLLOID_KOKKOS_H +#define LMP_PAIR_YUKAWA_COLLOID_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_yukawa_colloid.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +class PairYukawaColloidKokkos : public PairYukawaColloid { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + + PairYukawaColloidKokkos(class LAMMPS *); + ~PairYukawaColloidKokkos() override; + + void compute(int, int) override; + void init_style() override; + double init_one(int,int) override; + + struct params_yukawa { + KOKKOS_INLINE_FUNCTION + params_yukawa() { cutsq=0, a = 0; offset = 0; } + KOKKOS_INLINE_FUNCTION + params_yukawa(int /*i*/) { cutsq=0, a = 0; offset = 0; } + F_FLOAT cutsq, a, offset; + }; + + + protected: + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& /*rsq*/, const int& /*i*/, const int& /*j*/, + const int& /*itype*/, const int& /*jtype*/) const { return 0; } + + + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const_um params; + params_yukawa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_float_1d_randomread radius; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + + int newton_pair; + double special_lj[4]; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + + + int neighflag; + int nlocal,nall,eflag,vflag; + + void allocate() override; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend struct PairComputeFunctor; + friend EV_FLOAT pair_compute_neighlist( + PairYukawaColloidKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist( + PairYukawaColloidKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute_neighlist( + PairYukawaColloidKokkos*,NeighListKokkos*); + friend EV_FLOAT pair_compute( + PairYukawaColloidKokkos*,NeighListKokkos*); + friend void pair_virial_fdotr_compute(PairYukawaColloidKokkos*); + +}; + +} + +#endif +#endif + From f62a4c537258e6eab4c20df0237b607378963d37 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 17 Jul 2023 16:49:26 -0500 Subject: [PATCH 02/19] Working on fix efield/kk --- src/KOKKOS/fix_efield_kokkos.cpp | 87 ++++++++++++++++++++++++++++++++ src/KOKKOS/fix_efield_kokkos.h | 52 +++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 src/KOKKOS/fix_efield_kokkos.cpp create mode 100644 src/KOKKOS/fix_efield_kokkos.h diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp new file mode 100644 index 0000000000..697116402d --- /dev/null +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -0,0 +1,87 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_efield_kokkos.h" + +#include "atom_kokkos.h" +#include "atom_masks.h" +#include "atom_vec.h" +#include "input.h" +#include "modify.h" +#include "update.h" +#include "variable.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +FixEfieldKokkos::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) : + FixEfield(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *)atom; + execution_space = ExecutionSpaceFromDevice::space; + + datamask_read = X_MASK | Q_MASK | F_MASK | RMASS_MASK | MASK_MASK | TYPE_MASK; + datamask_modify = F_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEfieldKokkos::post_force(int /*vflag*/) +{ + // update efield due to variables + + update_efield_variables(); + + atomKK->sync(execution_space,datamask_read); + atomKK->modified(execution_space,datamask_modify); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + q = atomKK->k_q.view(); + type = atomKK->k_type.view(); + mask = atomKK->k_mask.view(); + int nlocal = atomKK->nlocal; + if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; + + copymode = 1; + + eflag = 0; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), *this); + + copymode = 0; +} + +template +KOKKOS_INLINE_FUNCTION +void FixEfieldKokkos::operator()(const int i) const +{ + if (mask[i] & groupbit) { + double qi = q[i]; + f(i,0) += qi*ex; + f(i,1) += qi*ey; + f(i,2) += qi*ez; + } +} + +namespace LAMMPS_NS { +template class FixEfieldKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixEfieldKokkos; +#endif +} diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h new file mode 100644 index 0000000000..e5171ce6f5 --- /dev/null +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -0,0 +1,52 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(efield/kk,FixFixEfieldKokkos); +FixStyle(efield/kk/device,FixFixEfieldKokkos); +FixStyle(efield/kk/host,FixFixEfieldKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_EFIELD_KOKKOS_H +#define LMP_FIX_EFIELD_KOKKOS_H + +#include "fix_efield.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixEfieldKokkos : public FixEfield { + public: + FixEfieldKokkos(class LAMMPS *, int, char **); + + void post_force(int) override; + + KOKKOS_INLINE_FUNCTION + void operator()(const int, double &) const; + + private: + typename ArrayTypes::t_x_array x; + typename ArrayTypes::t_f_array f; + typename ArrayTypes::t_int_1d type; + typename ArrayTypes::t_int_1d mask; + typename ArrayTypes::t_float_1d_randomread q; +}; + +} // namespace LAMMPS_NS + +#endif // LMP_FIX_EFIELD_KOKKOS_H +#endif // FIX_CLASS From 6a991ff0a094a089a50b5f3723a7b1e8c1bbf990 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 1 Aug 2023 10:53:20 -0500 Subject: [PATCH 03/19] Updated fix efield/kk, needs work on unwrap --- src/KOKKOS/fix_efield_kokkos.cpp | 173 ++++++++++++++++++++++++++----- src/KOKKOS/fix_efield_kokkos.h | 67 +++++++++--- src/fix_efield.cpp | 2 + 3 files changed, 197 insertions(+), 45 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index 697116402d..fc84967e02 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -15,14 +15,23 @@ #include "fix_efield_kokkos.h" #include "atom_kokkos.h" -#include "atom_masks.h" -#include "atom_vec.h" -#include "input.h" -#include "modify.h" #include "update.h" +#include "modify.h" +#include "domain.h" +#include "region.h" +#include "input.h" #include "variable.h" +#include "memory_kokkos.h" +#include "error.h" +#include "atom_masks.h" +#include "kokkos_base.h" + +#include using namespace LAMMPS_NS; +using namespace FixConst; + +enum{NONE,CONSTANT,EQUAL,ATOM}; /* ---------------------------------------------------------------------- */ @@ -31,11 +40,36 @@ FixEfieldKokkos::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) FixEfield(lmp, narg, arg) { kokkosable = 1; - atomKK = (AtomKokkos *)atom; + atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; - datamask_read = X_MASK | Q_MASK | F_MASK | RMASS_MASK | MASK_MASK | TYPE_MASK; - datamask_modify = F_MASK; + memory->destroy(efield); + memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield"); + d_efield = k_efield.view(); +} + +/* ---------------------------------------------------------------------- */ + +template +FixEfieldKokkos::~FixEfieldKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_efield,efield); + efield = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixEfieldKokkos::init() +{ + FixEfield::init(); + + if (utils::strmatch(update->integrate_style,"^respa")) + error->all(FLERR,"Cannot (yet) use respa with Kokkos"); } /* ---------------------------------------------------------------------- */ @@ -43,39 +77,121 @@ FixEfieldKokkos::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) template void FixEfieldKokkos::post_force(int /*vflag*/) { - // update efield due to variables + atomKK->sync(execution_space, F_MASK | Q_MASK | MASK_MASK); - update_efield_variables(); - - atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); - - x = atomKK->k_x.view(); f = atomKK->k_f.view(); q = atomKK->k_q.view(); - type = atomKK->k_type.view(); mask = atomKK->k_mask.view(); - int nlocal = atomKK->nlocal; - if (igroup == atomKK->firstgroup) nlocal = atomKK->nfirst; - copymode = 1; + int nlocal = atom->nlocal; - eflag = 0; + // update region if necessary - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), *this); - - copymode = 0; + if (region) { + if (!utils::strmatch(region->style, "^block")) + error->all(FLERR,"Cannot (yet) use {}-style region with fix efield/kk",region->style); + region->prematch(); + DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal); + KokkosBase* regionKKBase = dynamic_cast(region); + regionKKBase->match_all_kokkos(groupbit,k_match); + k_match.template sync(); + d_match = k_match.template view(); + } + + // reallocate sforce array if necessary + + if (varflag == ATOM && atom->nmax > maxatom) { + maxatom = atom->nmax; + memoryKK->destroy_kokkos(k_efield,efield); + memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield"); + d_efield = k_efield.view(); + } + + fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0; + double_4 fsum_kk; + force_flag = 0; + + if (varflag == CONSTANT) { + copymode = 1; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + copymode = 0; + + // variable force, wrap with clear/add + + } else { + + atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos + + modify->clearstep_compute(); + + if (xstyle == EQUAL) ex = input->variable->compute_equal(xvar); + else if (xstyle == ATOM) + input->variable->compute_atom(xvar,igroup,&efield[0][0],4,0); + if (ystyle == EQUAL) ey = input->variable->compute_equal(yvar); + else if (ystyle == ATOM) + input->variable->compute_atom(yvar,igroup,&efield[0][1],4,0); + if (zstyle == EQUAL) ez = input->variable->compute_equal(zvar); + else if (zstyle == ATOM) + input->variable->compute_atom(zvar,igroup,&efield[0][2],4,0); + + modify->addstep_compute(update->ntimestep + 1); + + if (varflag == ATOM) { // this can be removed when variable class is ported to Kokkos + k_efield.modify(); + k_efield.sync(); + } + + copymode = 1; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + copymode = 0; + } + + atomKK->modified(execution_space, F_MASK); + + fsum[0] = fsum_kk.d0; + fsum[1] = fsum_kk.d1; + fsum[2] = fsum_kk.d2; + fsum[3] = fsum_kk.d3; } template KOKKOS_INLINE_FUNCTION -void FixEfieldKokkos::operator()(const int i) const -{ +void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { if (mask[i] & groupbit) { - double qi = q[i]; - f(i,0) += qi*ex; - f(i,1) += qi*ey; - f(i,2) += qi*ez; + if (region && !d_match[i]) return; + const F_FLOAT qtmp = q[i]; + const F_FLOAT fx = qtmp * ex; + const F_FLOAT fy = qtmp * ey; + const F_FLOAT fz = qtmp * ez; + f(i,0) += fx; + f(i,1) += fy; + f(i,2) += fz; + //fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d1 += fx; + fsum_kk.d2 += fy; + fsum_kk.d3 += fz; + } +} + +template +KOKKOS_INLINE_FUNCTION +void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const { + if (mask[i] & groupbit) { + if (region && !d_match[i]) return; + const F_FLOAT qtmp = q[i]; + const F_FLOAT fx = qtmp * ex; + const F_FLOAT fy = qtmp * ey; + const F_FLOAT fz = qtmp * ez; + if (xstyle == ATOM) f(i,0) += d_efield(i,0); + else if (xstyle) f(i,0) += fx; + if (ystyle == ATOM) f(i,1) = d_efield(i,1); + else if (ystyle) f(i,1) += fy; + if (zstyle == ATOM) f(i,2) = d_efield(i,2); + else if (zstyle) f(i,2) += fz; + //fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d1 += fx; + fsum_kk.d2 += fy; + fsum_kk.d3 += fz; } } @@ -85,3 +201,4 @@ template class FixEfieldKokkos; template class FixEfieldKokkos; #endif } + diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index e5171ce6f5..8d8d2ee97a 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -13,9 +13,9 @@ #ifdef FIX_CLASS // clang-format off -FixStyle(efield/kk,FixFixEfieldKokkos); -FixStyle(efield/kk/device,FixFixEfieldKokkos); -FixStyle(efield/kk/host,FixFixEfieldKokkos); +FixStyle(efield/kk,FixEfieldKokkos); +FixStyle(efield/kk/device,FixEfieldKokkos); +FixStyle(efield/kk/host,FixEfieldKokkos); // clang-format on #else @@ -28,25 +28,58 @@ FixStyle(efield/kk/host,FixFixEfieldKokkos); namespace LAMMPS_NS { +struct e_double_4 { + double d0, d1, d2, d3; + KOKKOS_INLINE_FUNCTION + e_double_4() { + d0 = d1 = d2 = d3 = 0.0; + } + KOKKOS_INLINE_FUNCTION + e_double_4& operator+=(const e_double_4 &rhs) { + d0 += rhs.d0; + d1 += rhs.d1; + d2 += rhs.d2; + d3 += rhs.d3; + return *this; + } +}; +typedef e_double_4 double_4; + +struct TagFixEfieldConstant{}; + +struct TagFixEfieldNonConstant{}; + template class FixEfieldKokkos : public FixEfield { - public: - FixEfieldKokkos(class LAMMPS *, int, char **); + public: + typedef DeviceType device_type; + typedef double_4 value_type; + typedef ArrayTypes AT; - void post_force(int) override; + FixEfieldKokkos(class LAMMPS *, int, char **); + ~FixEfieldKokkos() override; + void init() override; + void post_force(int) override; - KOKKOS_INLINE_FUNCTION - void operator()(const int, double &) const; + KOKKOS_INLINE_FUNCTION + void operator()(TagFixEfieldConstant, const int&, double_4&) const; - private: - typename ArrayTypes::t_x_array x; - typename ArrayTypes::t_f_array f; - typename ArrayTypes::t_int_1d type; - typename ArrayTypes::t_int_1d mask; - typename ArrayTypes::t_float_1d_randomread q; + KOKKOS_INLINE_FUNCTION + void operator()(TagFixEfieldNonConstant, const int&, double_4&) const; + + private: + DAT::tdual_ffloat_2d k_efield; + typename AT::t_ffloat_2d_randomread d_efield; + typename AT::t_int_1d d_match; + + typename AT::t_x_array_randomread x; + typename AT::t_float_1d_randomread q; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread mask; }; -} // namespace LAMMPS_NS +} + +#endif +#endif -#endif // LMP_FIX_EFIELD_KOKKOS_H -#endif // FIX_CLASS diff --git a/src/fix_efield.cpp b/src/fix_efield.cpp index d01a498d39..23277f8af3 100644 --- a/src/fix_efield.cpp +++ b/src/fix_efield.cpp @@ -129,6 +129,8 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) : FixEfield::~FixEfield() { + if (copymode) return; + delete[] xstr; delete[] ystr; delete[] zstr; From 34c398dd372a5eae6ecb53a4fa10a2bf2e0cdad0 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Wed, 2 Aug 2023 06:59:24 -0500 Subject: [PATCH 04/19] Tried two ways of doing parallel reduce for fsum --- src/KOKKOS/fix_efield_kokkos.cpp | 75 +++++++++++++++++++++++++++++--- src/KOKKOS/fix_efield_kokkos.h | 1 + 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index fc84967e02..d4ef6dc3f2 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -17,7 +17,7 @@ #include "atom_kokkos.h" #include "update.h" #include "modify.h" -#include "domain.h" +#include "domain_kokkos.h" #include "region.h" #include "input.h" #include "variable.h" @@ -77,10 +77,11 @@ void FixEfieldKokkos::init() template void FixEfieldKokkos::post_force(int /*vflag*/) { - atomKK->sync(execution_space, F_MASK | Q_MASK | MASK_MASK); + atomKK->sync(execution_space, F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); f = atomKK->k_f.view(); q = atomKK->k_q.view(); + image = atomKK->k_image.view(); mask = atomKK->k_mask.view(); int nlocal = atom->nlocal; @@ -113,7 +114,50 @@ void FixEfieldKokkos::post_force(int /*vflag*/) if (varflag == CONSTANT) { copymode = 1; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + + { + // local variables for lambda capture + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; + auto l_ex = ex; + auto l_ey = ey; + auto l_ez = ez; + + auto l_x = x; + auto l_q = q; + auto l_f = f; + auto l_mask = mask; + auto l_image = image; + auto l_groupbit = groupbit; + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), + LAMMPS_LAMBDA(int i, double_4& fsum_kk) { + if (l_mask[i] & l_groupbit) { + + Few x_i; + x_i[0] = l_x(i,0); + x_i[1] = l_x(i,1); + x_i[2] = l_x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i)); + auto qtmp = l_q(i); + auto fx = qtmp * l_ex; + auto fy = qtmp * l_ey; + auto fz = qtmp * l_ez; + l_f(i,0) += fx; + l_f(i,1) += fy; + l_f(i,2) += fz; + + fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d1 += fx; + fsum_kk.d2 += fy; + fsum_kk.d3 += fz; + } + }, fsum_kk); + + } + copymode = 0; // variable force, wrap with clear/add @@ -159,6 +203,14 @@ KOKKOS_INLINE_FUNCTION void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { if (mask[i] & groupbit) { if (region && !d_match[i]) return; + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; + Few x_i; + x_i[0] = x(i,0); + x_i[1] = x(i,1); + x_i[2] = x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); const F_FLOAT qtmp = q[i]; const F_FLOAT fx = qtmp * ex; const F_FLOAT fy = qtmp * ey; @@ -166,7 +218,8 @@ void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, f(i,0) += fx; f(i,1) += fy; f(i,2) += fz; - //fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + + fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; fsum_kk.d3 += fz; @@ -176,19 +229,27 @@ void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, template KOKKOS_INLINE_FUNCTION void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const { + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; if (mask[i] & groupbit) { if (region && !d_match[i]) return; + Few x_i; + x_i[0] = x(i,0); + x_i[1] = x(i,1); + x_i[2] = x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); const F_FLOAT qtmp = q[i]; const F_FLOAT fx = qtmp * ex; const F_FLOAT fy = qtmp * ey; const F_FLOAT fz = qtmp * ez; if (xstyle == ATOM) f(i,0) += d_efield(i,0); else if (xstyle) f(i,0) += fx; - if (ystyle == ATOM) f(i,1) = d_efield(i,1); + if (ystyle == ATOM) f(i,1) += d_efield(i,1); else if (ystyle) f(i,1) += fy; - if (zstyle == ATOM) f(i,2) = d_efield(i,2); + if (zstyle == ATOM) f(i,2) += d_efield(i,2); else if (zstyle) f(i,2) += fz; - //fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; fsum_kk.d3 += fz; diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index 8d8d2ee97a..d159473d1d 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -75,6 +75,7 @@ class FixEfieldKokkos : public FixEfield { typename AT::t_x_array_randomread x; typename AT::t_float_1d_randomread q; typename AT::t_f_array f; + typename AT::t_imageint_1d_randomread image; typename AT::t_int_1d_randomread mask; }; From dc8f17e8e6d75e795e93fd2b2d85e93324f6ec12 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sun, 6 Aug 2023 00:04:46 -0500 Subject: [PATCH 05/19] Fixed bugs with missing x array, removed the () operator overloads because they require access to domain within the kernels --- src/KOKKOS/fix_efield_kokkos.cpp | 63 +++++++++++++++++++++++++++----- src/KOKKOS/fix_efield_kokkos.h | 4 +- 2 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index d4ef6dc3f2..ecf4418cf6 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -77,8 +77,9 @@ void FixEfieldKokkos::init() template void FixEfieldKokkos::post_force(int /*vflag*/) { - atomKK->sync(execution_space, F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); + atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); + x = atomKK->k_f.view(); f = atomKK->k_f.view(); q = atomKK->k_q.view(); image = atomKK->k_image.view(); @@ -132,10 +133,8 @@ void FixEfieldKokkos::post_force(int /*vflag*/) auto l_image = image; auto l_groupbit = groupbit; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), - LAMMPS_LAMBDA(int i, double_4& fsum_kk) { + Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) { if (l_mask[i] & l_groupbit) { - Few x_i; x_i[0] = l_x(i,0); x_i[1] = l_x(i,1); @@ -148,14 +147,12 @@ void FixEfieldKokkos::post_force(int /*vflag*/) l_f(i,0) += fx; l_f(i,1) += fy; l_f(i,2) += fz; - fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; fsum_kk.d3 += fz; } - }, fsum_kk); - + },fsum_kk); } copymode = 0; @@ -186,7 +183,53 @@ void FixEfieldKokkos::post_force(int /*vflag*/) } copymode = 1; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); + + { + // local variables for lambda capture + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; + auto l_ex = ex; + auto l_ey = ey; + auto l_ez = ez; + auto l_d_efield = d_efield; + + auto l_x = x; + auto l_q = q; + auto l_f = f; + auto l_mask = mask; + auto l_image = image; + auto l_groupbit = groupbit; + auto l_xstyle = xstyle; + auto l_ystyle = ystyle; + auto l_zstyle = zstyle; + + Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) { + if (l_mask[i] & l_groupbit) { + Few x_i; + x_i[0] = l_x(i,0); + x_i[1] = l_x(i,1); + x_i[2] = l_x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i)); + auto qtmp = l_q(i); + auto fx = qtmp * l_ex; + auto fy = qtmp * l_ey; + auto fz = qtmp * l_ez; + if (l_xstyle == ATOM) l_f(i,0) += l_d_efield(i,0); + else if (l_xstyle) l_f(i,0) += fx; + if (l_ystyle == ATOM) l_f(i,1) += l_d_efield(i,1); + else if (l_ystyle) l_f(i,1) += fy; + if (l_zstyle == ATOM) l_f(i,2) += l_d_efield(i,2); + else if (l_zstyle) l_f(i,2) += fz; + fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; + fsum_kk.d1 += fx; + fsum_kk.d2 += fy; + fsum_kk.d3 += fz; + } + },fsum_kk); + } + copymode = 0; } @@ -197,7 +240,7 @@ void FixEfieldKokkos::post_force(int /*vflag*/) fsum[2] = fsum_kk.d2; fsum[3] = fsum_kk.d3; } - +/* template KOKKOS_INLINE_FUNCTION void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { @@ -255,7 +298,7 @@ void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int fsum_kk.d3 += fz; } } - +*/ namespace LAMMPS_NS { template class FixEfieldKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index d159473d1d..2739d03ffc 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -60,13 +60,13 @@ class FixEfieldKokkos : public FixEfield { ~FixEfieldKokkos() override; void init() override; void post_force(int) override; - +/* KOKKOS_INLINE_FUNCTION void operator()(TagFixEfieldConstant, const int&, double_4&) const; KOKKOS_INLINE_FUNCTION void operator()(TagFixEfieldNonConstant, const int&, double_4&) const; - +*/ private: DAT::tdual_ffloat_2d k_efield; typename AT::t_ffloat_2d_randomread d_efield; From 4a8275446439aaaa0e859fbbe7fc53de9ac6c4b4 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 7 Aug 2023 00:30:32 -0500 Subject: [PATCH 06/19] Fixed an obvious bug with x --- src/KOKKOS/fix_efield_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index ecf4418cf6..1f29d1f809 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -79,7 +79,7 @@ void FixEfieldKokkos::post_force(int /*vflag*/) { atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); - x = atomKK->k_f.view(); + x = atomKK->k_x.view(); f = atomKK->k_f.view(); q = atomKK->k_q.view(); image = atomKK->k_image.view(); From 4ca32f0ceccedc2bee1c657038a72e0802c58605 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 7 Aug 2023 15:24:16 -0500 Subject: [PATCH 07/19] Added comments to the use of operators overloaded when accessing unwrap on the GPU (serial works fine) --- src/KOKKOS/fix_efield_kokkos.cpp | 14 +++++++++----- src/KOKKOS/fix_efield_kokkos.h | 4 ++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index 1f29d1f809..bbf106f515 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -115,6 +115,8 @@ void FixEfieldKokkos::post_force(int /*vflag*/) if (varflag == CONSTANT) { copymode = 1; + + // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); { @@ -183,8 +185,8 @@ void FixEfieldKokkos::post_force(int /*vflag*/) } copymode = 1; + // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); - { // local variables for lambda capture auto prd = Few(domain->prd); @@ -240,12 +242,13 @@ void FixEfieldKokkos::post_force(int /*vflag*/) fsum[2] = fsum_kk.d2; fsum[3] = fsum_kk.d3; } -/* + template KOKKOS_INLINE_FUNCTION void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { if (mask[i] & groupbit) { if (region && !d_match[i]) return; + auto prd = Few(domain->prd); auto h = Few(domain->h); auto triclinic = domain->triclinic; @@ -254,14 +257,14 @@ void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, x_i[1] = x(i,1); x_i[2] = x(i,2); auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); - const F_FLOAT qtmp = q[i]; + const F_FLOAT qtmp = q(i); const F_FLOAT fx = qtmp * ex; const F_FLOAT fy = qtmp * ey; const F_FLOAT fz = qtmp * ez; f(i,0) += fx; f(i,1) += fy; f(i,2) += fz; - + // TODO: access to unwrap below crashes fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; @@ -292,13 +295,14 @@ void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int else if (ystyle) f(i,1) += fy; if (zstyle == ATOM) f(i,2) += d_efield(i,2); else if (zstyle) f(i,2) += fz; + // TODO: access to unwrap below crashes fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; fsum_kk.d2 += fy; fsum_kk.d3 += fz; } } -*/ + namespace LAMMPS_NS { template class FixEfieldKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index 2739d03ffc..d159473d1d 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -60,13 +60,13 @@ class FixEfieldKokkos : public FixEfield { ~FixEfieldKokkos() override; void init() override; void post_force(int) override; -/* + KOKKOS_INLINE_FUNCTION void operator()(TagFixEfieldConstant, const int&, double_4&) const; KOKKOS_INLINE_FUNCTION void operator()(TagFixEfieldNonConstant, const int&, double_4&) const; -*/ + private: DAT::tdual_ffloat_2d k_efield; typename AT::t_ffloat_2d_randomread d_efield; From 6ff85cab7f3bc8cec12470fadd675945561a0c39 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 10 Aug 2023 00:34:52 -0500 Subject: [PATCH 08/19] Adding fix spring/self/kk, needed to add maxatom to fix spring/self, may need resize xoriginal as well --- src/fix_spring_self.cpp | 1 + src/fix_spring_self.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index 550b3afc4d..31f54caef2 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -73,6 +73,7 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : // register with Atom class xoriginal = nullptr; + maxatom = atom->nmax; FixSpringSelf::grow_arrays(atom->nmax); atom->add_callback(Atom::GROW); atom->add_callback(Atom::RESTART); diff --git a/src/fix_spring_self.h b/src/fix_spring_self.h index 59dba78e43..24a03aa92e 100644 --- a/src/fix_spring_self.h +++ b/src/fix_spring_self.h @@ -47,11 +47,12 @@ class FixSpringSelf : public Fix { int size_restart(int) override; int maxsize_restart() override; - private: + protected: double k, espring; double **xoriginal; // original coords of atoms int xflag, yflag, zflag; int ilevel_respa; + int maxatom; }; } // namespace LAMMPS_NS From 4bb3ecd09c1a88768e556be76f8707a685c33680 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 10 Aug 2023 00:36:33 -0500 Subject: [PATCH 09/19] Added the fix spring/self/kk source files --- src/KOKKOS/fix_spring_self_kokkos.cpp | 152 ++++++++++++++++++++++++++ src/KOKKOS/fix_spring_self_kokkos.h | 57 ++++++++++ 2 files changed, 209 insertions(+) create mode 100644 src/KOKKOS/fix_spring_self_kokkos.cpp create mode 100644 src/KOKKOS/fix_spring_self_kokkos.h diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp new file mode 100644 index 0000000000..fe6d3a3d50 --- /dev/null +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -0,0 +1,152 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "fix_spring_self_kokkos.h" + +#include "atom_kokkos.h" +#include "update.h" +#include "modify.h" +#include "domain_kokkos.h" +#include "region.h" +#include "input.h" +#include "variable.h" +#include "memory_kokkos.h" +#include "error.h" +#include "atom_masks.h" +#include "kokkos_base.h" + +#include + +using namespace LAMMPS_NS; +using namespace FixConst; + +/* ---------------------------------------------------------------------- */ + +template +FixSpringSelfKokkos::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char **arg) : + FixSpringSelf(lmp, narg, arg) +{ + kokkosable = 1; + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = EMPTY_MASK; + datamask_modify = EMPTY_MASK; + + maxatom = atom->nmax; + memory->destroy(xoriginal); + memoryKK->create_kokkos(k_xoriginal,xoriginal,maxatom,3,"spring/self:xoriginal"); + d_xoriginal = k_xoriginal.view(); +} + +/* ---------------------------------------------------------------------- */ + +template +FixSpringSelfKokkos::~FixSpringSelfKokkos() +{ + if (copymode) return; + + memoryKK->destroy_kokkos(k_xoriginal,xoriginal); + xoriginal = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::init() +{ + FixSpringSelf::init(); + + if (utils::strmatch(update->integrate_style,"^respa")) + error->all(FLERR,"Cannot (yet) use respa with Kokkos"); +} + +/* ---------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::post_force(int /*vflag*/) +{ + atomKK->sync(execution_space, X_MASK | F_MASK | MASK_MASK); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + image = atomKK->k_image.view(); + mask = atomKK->k_mask.view(); + + int nlocal = atom->nlocal; + + // reallocate xoriginal array if necessary + + if (atom->nmax > maxatom) { + maxatom = atom->nmax; + memoryKK->destroy_kokkos(k_xoriginal,xoriginal); + memoryKK->create_kokkos(k_xoriginal,xoriginal,maxatom,3,"fix_spring/self:xoriginal"); + d_xoriginal = k_xoriginal.view(); + } + + double espring_kk; + + + copymode = 1; + //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this, espring_kk); + { + // local variables for lambda capture + auto prd = Few(domain->prd); + auto h = Few(domain->h); + auto triclinic = domain->triclinic; + auto l_xflag = xflag; + auto l_yflag = yflag; + auto l_zflag = zflag; + auto l_k = k; + auto l_x = x; + auto l_xoriginal = d_xoriginal; + auto l_f = f; + auto l_mask = mask; + auto l_image = image; + auto l_groupbit = groupbit; + + Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) { + if (l_mask[i] & l_groupbit) { + Few x_i; + x_i[0] = l_x(i,0); + x_i[1] = l_x(i,1); + x_i[2] = l_x(i,2); + auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i)); + auto dx = unwrap[0] - l_xoriginal(i, 0); + auto dy = unwrap[1] - l_xoriginal(i, 1); + auto dz = unwrap[2] - l_xoriginal(i, 2); + if (!l_xflag) dx = 0.0; + if (!l_yflag) dy = 0.0; + if (!l_zflag) dz = 0.0; + l_f(i,0) -= l_k*dx; + l_f(i,1) -= l_k*dy; + l_f(i,2) -= l_k*dz; + espring_kk += l_k * (dx*dx + dy*dy + dz*dz); + } + },espring_kk); + } + + copymode = 0; + + atomKK->modified(execution_space, F_MASK); + + espring = 0.5*espring_kk; +} + +namespace LAMMPS_NS { +template class FixSpringSelfKokkos; +#ifdef LMP_KOKKOS_GPU +template class FixSpringSelfKokkos; +#endif +} + diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h new file mode 100644 index 0000000000..58dcbc525e --- /dev/null +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -0,0 +1,57 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS +// clang-format off +FixStyle(spring/self/kk,FixSpringSelfKokkos); +FixStyle(spring/self/kk/device,FixSpringSelfKokkos); +FixStyle(spring/self/kk/host,FixSpringSelfKokkos); +// clang-format on +#else + +// clang-format off +#ifndef LMP_FIX_SPRING_SELF_KOKKOS_H +#define LMP_FIX_SPRING_SELF_KOKKOS_H + +#include "fix_spring_self.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +template +class FixSpringSelfKokkos : public FixSpringSelf { + public: + typedef DeviceType device_type; + typedef double value_type; + typedef ArrayTypes AT; + + FixSpringSelfKokkos(class LAMMPS *, int, char **); + ~FixSpringSelfKokkos() override; + void init() override; + void post_force(int) override; + + private: + DAT::tdual_ffloat_2d k_xoriginal; + typename AT::t_ffloat_2d_randomread d_xoriginal; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_imageint_1d_randomread image; + typename AT::t_int_1d_randomread mask; +}; + +} + +#endif +#endif + From a24eccf95d0abd036823ff5bbe45839ca9d8a907 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Fri, 11 Aug 2023 09:43:28 -0500 Subject: [PATCH 10/19] Removed maxatom from fix spring/self, need to work on exchange for xoriginal on spring/self/kk --- src/KOKKOS/fix_spring_self_kokkos.cpp | 12 +----------- src/fix_spring_self.cpp | 3 ++- src/fix_spring_self.h | 1 - 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index fe6d3a3d50..5031c0641b 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -43,9 +43,8 @@ FixSpringSelfKokkos::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; - maxatom = atom->nmax; memory->destroy(xoriginal); - memoryKK->create_kokkos(k_xoriginal,xoriginal,maxatom,3,"spring/self:xoriginal"); + memoryKK->create_kokkos(k_xoriginal,xoriginal,atom->nmax,3,"spring/self:xoriginal"); d_xoriginal = k_xoriginal.view(); } @@ -85,15 +84,6 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) int nlocal = atom->nlocal; - // reallocate xoriginal array if necessary - - if (atom->nmax > maxatom) { - maxatom = atom->nmax; - memoryKK->destroy_kokkos(k_xoriginal,xoriginal); - memoryKK->create_kokkos(k_xoriginal,xoriginal,maxatom,3,"fix_spring/self:xoriginal"); - d_xoriginal = k_xoriginal.view(); - } - double espring_kk; diff --git a/src/fix_spring_self.cpp b/src/fix_spring_self.cpp index 31f54caef2..df00a2ba8c 100644 --- a/src/fix_spring_self.cpp +++ b/src/fix_spring_self.cpp @@ -73,7 +73,6 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : // register with Atom class xoriginal = nullptr; - maxatom = atom->nmax; FixSpringSelf::grow_arrays(atom->nmax); atom->add_callback(Atom::GROW); atom->add_callback(Atom::RESTART); @@ -97,6 +96,8 @@ FixSpringSelf::FixSpringSelf(LAMMPS *lmp, int narg, char **arg) : FixSpringSelf::~FixSpringSelf() { + if (copymode) return; + // unregister callbacks to this fix from Atom class atom->delete_callback(id,Atom::GROW); diff --git a/src/fix_spring_self.h b/src/fix_spring_self.h index 24a03aa92e..f13f2be918 100644 --- a/src/fix_spring_self.h +++ b/src/fix_spring_self.h @@ -52,7 +52,6 @@ class FixSpringSelf : public Fix { double **xoriginal; // original coords of atoms int xflag, yflag, zflag; int ilevel_respa; - int maxatom; }; } // namespace LAMMPS_NS From ca9924035bbb90c5d6c9cf41db40a6a10f293ec5 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sat, 12 Aug 2023 01:19:43 -0500 Subject: [PATCH 11/19] Working on pack/unpack exchange for xoriginal --- src/KOKKOS/fix_spring_self_kokkos.cpp | 143 ++++++++++++++++++++++++++ src/KOKKOS/fix_spring_self_kokkos.h | 53 +++++++++- 2 files changed, 193 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 5031c0641b..4e89ede2ba 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -133,6 +133,149 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) espring = 0.5*espring_kk; } +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixSpringSelfKokkos::pack_exchange_item(const int &mysend, int &offset, const bool &final) const +{ + const int i = d_exchange_sendlist(mysend); + + d_buf[mysend] = nsend + offset; + int m = nsend + offset; + d_buf[m++] = d_xoriginal(i,0); + d_buf[m++] = d_xoriginal(i,1); + d_buf[m++] = d_xoriginal(i,2); + if (mysend == nsend-1) d_count() = m; + offset = m - nsend; + + const int j = d_copylist(mysend); + if (j > -1) { + d_xoriginal(i,0) = d_xoriginal(j,0); + d_xoriginal(i,1) = d_xoriginal(j,1); + d_xoriginal(i,2) = d_xoriginal(j,2); + } +} + +/* ---------------------------------------------------------------------- */ + +template +int FixSpringSelfKokkos::pack_exchange_kokkos( + const int &nsend, DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, + ExecutionSpace space) +{ + + k_buf.sync(); + k_copylist.sync(); + k_exchange_sendlist.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_copylist = k_copylist.view(); + d_exchange_sendlist = k_exchange_sendlist.view(); + this->nsend = nsend; + + + k_xoriginal.template sync(); + + Kokkos::deep_copy(d_count,0); + + copymode = 1; + + FixSpringSelfKokkosPackExchangeFunctor pack_exchange_functor(this); + Kokkos::parallel_scan(nsend,pack_exchange_functor); + + copymode = 0; + + k_buf.modify(); + + if (space == Host) k_buf.sync(); + else k_buf.sync(); + + k_xoriginal.template modify(); + + Kokkos::deep_copy(h_count,d_count); + + return h_count(); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixSpringSelfKokkos::operator()(TagFixSpringSelfUnpackExchange, const int &i) const +{ + int index = d_indices(i); + + if (index > -1) { + int m = d_buf[i]; + + d_xoriginal(index,0) = static_cast (d_buf[m++]); + d_xoriginal(index,1) = static_cast (d_buf[m++]); + d_xoriginal(index,2) = static_cast (d_buf[m++]); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::unpack_exchange_kokkos( + DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, + ExecutionSpace /*space*/) +{ + k_buf.sync(); + k_indices.sync(); + + d_buf = typename ArrayTypes::t_xfloat_1d_um( + k_buf.template view().data(), + k_buf.extent(0)*k_buf.extent(1)); + d_indices = k_indices.view(); + + k_xoriginal.template sync(); + + copymode = 1; + + Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); + + copymode = 0; + + k_xoriginal.template modify(); +} + +/* ---------------------------------------------------------------------- + pack values in local atom-based arrays for exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixSpringSelfKokkos::pack_exchange(int i, double *buf) +{ + k_xoriginal.sync_host(); + + int m = FixSpringSelf::pack_exchange(i,buf); + + k_xoriginal.modify_host(); + + return m; +} + +/* ---------------------------------------------------------------------- + unpack values in local atom-based arrays from exchange with another proc +------------------------------------------------------------------------- */ + +template +int FixSpringSelfKokkos::unpack_exchange(int nlocal, double *buf) +{ + k_xoriginal.sync_host(); + + int m = FixSpringSelf::unpack_exchange(nlocal,buf); + + k_xoriginal.modify_host(); + + return m; +} + namespace LAMMPS_NS { template class FixSpringSelfKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index 58dcbc525e..b69d4edb4a 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -25,11 +25,14 @@ FixStyle(spring/self/kk/host,FixSpringSelfKokkos); #include "fix_spring_self.h" #include "kokkos_type.h" +#include "kokkos_base.h" namespace LAMMPS_NS { +struct TagFixSpringSelfUnpackExchange{}; + template -class FixSpringSelfKokkos : public FixSpringSelf { +class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { public: typedef DeviceType device_type; typedef double value_type; @@ -40,14 +43,58 @@ class FixSpringSelfKokkos : public FixSpringSelf { void init() override; void post_force(int) override; - private: + KOKKOS_INLINE_FUNCTION + void pack_exchange_item(const int&, int &, const bool &) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagFixSpringSelfUnpackExchange, const int&) const; + + int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf, + DAT::tdual_int_1d k_sendlist, + DAT::tdual_int_1d k_copylist, + ExecutionSpace space) override; + + void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, + DAT::tdual_int_1d &indices,int nrecv, + ExecutionSpace space) override; + + + int pack_exchange(int, double *) override; + int unpack_exchange(int, double *) override; + + protected: DAT::tdual_ffloat_2d k_xoriginal; - typename AT::t_ffloat_2d_randomread d_xoriginal; + typename AT::t_ffloat_2d d_xoriginal; typename AT::t_x_array_randomread x; typename AT::t_f_array f; typename AT::t_imageint_1d_randomread image; typename AT::t_int_1d_randomread mask; + + int nsend; + + typename AT::t_int_2d d_sendlist; + typename AT::t_xfloat_1d_um d_buf; + + typename AT::t_int_1d d_exchange_sendlist; + typename AT::t_int_1d d_copylist; + typename AT::t_int_1d d_indices; + + typename AT::t_int_scalar d_count; + HAT::t_int_scalar h_count; + +}; + +template +struct FixSpringSelfKokkosPackExchangeFunctor { + typedef DeviceType device_type; + typedef int value_type; + FixSpringSelfKokkos c; + FixSpringSelfKokkosPackExchangeFunctor(FixSpringSelfKokkos* c_ptr):c(*c_ptr) {}; + KOKKOS_INLINE_FUNCTION + void operator()(const int &i, int &offset, const bool &final) const { + c.pack_exchange_item(i, offset, final); + } }; } From ea965d3b2681d6d922edb27e7a0b98859d6c6fb3 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sun, 13 Aug 2023 23:40:53 -0500 Subject: [PATCH 12/19] Working on exchange comm on device, and grow arrays for xoriginal --- src/KOKKOS/fix_spring_self_kokkos.cpp | 21 +++++++++++++++++++-- src/KOKKOS/fix_spring_self_kokkos.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 4e89ede2ba..8a576e2dea 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -38,14 +38,20 @@ FixSpringSelfKokkos::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char FixSpringSelf(lmp, narg, arg) { kokkosable = 1; + exchange_comm_device = 1; + maxexchange = 6; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; memory->destroy(xoriginal); - memoryKK->create_kokkos(k_xoriginal,xoriginal,atom->nmax,3,"spring/self:xoriginal"); - d_xoriginal = k_xoriginal.view(); + + int nmax = atom->nmax; + grow_arrays(nmax); + + d_count = typename AT::t_int_scalar("fix_shake:count"); + h_count = Kokkos::create_mirror_view(d_count); } /* ---------------------------------------------------------------------- */ @@ -133,6 +139,17 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) espring = 0.5*espring_kk; } +/* ---------------------------------------------------------------------- + allocate local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::grow_arrays(int nmax) +{ + memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,3,"spring/self:xoriginal"); + d_xoriginal = k_xoriginal.view(); +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index b69d4edb4a..30b9eaf40a 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -41,6 +41,7 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { FixSpringSelfKokkos(class LAMMPS *, int, char **); ~FixSpringSelfKokkos() override; void init() override; + void grow_arrays(int) override; void post_force(int) override; KOKKOS_INLINE_FUNCTION From ffe291b7934188dc3901534b5ce9d75bf04042c6 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Tue, 22 Aug 2023 15:28:28 -0500 Subject: [PATCH 13/19] Fixed bugs with the memory allocation for xoriginal --- src/KOKKOS/fix_spring_self_kokkos.cpp | 20 ++++++++++++++------ src/KOKKOS/fix_spring_self_kokkos.h | 2 ++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 8a576e2dea..e8aa07240f 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -39,19 +39,29 @@ FixSpringSelfKokkos::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char { kokkosable = 1; exchange_comm_device = 1; - maxexchange = 6; atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; - memory->destroy(xoriginal); + xoriginal_tmp = xoriginal; + xoriginal = nullptr; int nmax = atom->nmax; grow_arrays(nmax); - d_count = typename AT::t_int_scalar("fix_shake:count"); + for (int i = 0; i < atom->nlocal; i++) { + k_xoriginal.h_view(i,0) = xoriginal_tmp[i][0]; + k_xoriginal.h_view(i,1) = xoriginal_tmp[i][1]; + k_xoriginal.h_view(i,2) = xoriginal_tmp[i][2]; + } + + k_xoriginal.modify_host(); + + d_count = typename AT::t_int_scalar("spring/self:count"); h_count = Kokkos::create_mirror_view(d_count); + + memory->destroy(xoriginal_tmp); } /* ---------------------------------------------------------------------- */ @@ -81,18 +91,16 @@ void FixSpringSelfKokkos::init() template void FixSpringSelfKokkos::post_force(int /*vflag*/) { - atomKK->sync(execution_space, X_MASK | F_MASK | MASK_MASK); + atomKK->sync(execution_space, X_MASK | F_MASK | IMAGE_MASK | MASK_MASK); x = atomKK->k_x.view(); f = atomKK->k_f.view(); image = atomKK->k_image.view(); mask = atomKK->k_mask.view(); - int nlocal = atom->nlocal; double espring_kk; - copymode = 1; //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this, espring_kk); { diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index 30b9eaf40a..99fe435d88 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -84,6 +84,8 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { typename AT::t_int_scalar d_count; HAT::t_int_scalar h_count; + double **xoriginal_tmp; // original coords of atoms + }; template From 187bebb515081bf94e3e1b6ab431a8a4902ab3db Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Thu, 24 Aug 2023 12:17:37 -0500 Subject: [PATCH 14/19] Working on fix spring/self/kk, something with missing host-device sync that causes force blowup --- src/KOKKOS/fix_efield_kokkos.cpp | 6 +++--- src/KOKKOS/fix_spring_self_kokkos.cpp | 28 +++++++++++++++++++++------ src/KOKKOS/fix_spring_self_kokkos.h | 7 ++++--- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index bbf106f515..8c4469095f 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -218,11 +218,11 @@ void FixEfieldKokkos::post_force(int /*vflag*/) auto fx = qtmp * l_ex; auto fy = qtmp * l_ey; auto fz = qtmp * l_ez; - if (l_xstyle == ATOM) l_f(i,0) += l_d_efield(i,0); + if (l_xstyle == ATOM) l_f(i,0) += qtmp * l_d_efield(i,0); else if (l_xstyle) l_f(i,0) += fx; - if (l_ystyle == ATOM) l_f(i,1) += l_d_efield(i,1); + if (l_ystyle == ATOM) l_f(i,1) += qtmp * l_d_efield(i,1); else if (l_ystyle) l_f(i,1) += fy; - if (l_zstyle == ATOM) l_f(i,2) += l_d_efield(i,2); + if (l_zstyle == ATOM) l_f(i,2) += qtmp * l_d_efield(i,2); else if (l_zstyle) l_f(i,2) += fz; fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; fsum_kk.d1 += fx; diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index e8aa07240f..da1576f3ef 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -108,16 +108,17 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) auto prd = Few(domain->prd); auto h = Few(domain->h); auto triclinic = domain->triclinic; - auto l_xflag = xflag; - auto l_yflag = yflag; - auto l_zflag = zflag; auto l_k = k; - auto l_x = x; auto l_xoriginal = d_xoriginal; + + auto l_x = x; auto l_f = f; auto l_mask = mask; auto l_image = image; auto l_groupbit = groupbit; + auto l_xflag = xflag; + auto l_yflag = yflag; + auto l_zflag = zflag; Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) { if (l_mask[i] & l_groupbit) { @@ -154,10 +155,25 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) template void FixSpringSelfKokkos::grow_arrays(int nmax) { - memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,3,"spring/self:xoriginal"); + memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,"spring/self:xoriginal"); d_xoriginal = k_xoriginal.view(); } +/* ---------------------------------------------------------------------- + copy values within local atom-based arrays +------------------------------------------------------------------------- */ + +template +void FixSpringSelfKokkos::copy_arrays(int i, int j, int delflag) +{ + k_xoriginal.sync_host(); + + FixSpringSelf::copy_arrays(i,j,delflag); + + k_xoriginal.modify_host(); +} + + /* ---------------------------------------------------------------------- */ template @@ -202,7 +218,7 @@ int FixSpringSelfKokkos::pack_exchange_kokkos( d_exchange_sendlist = k_exchange_sendlist.view(); this->nsend = nsend; - + k_xoriginal.template sync(); Kokkos::deep_copy(d_count,0); diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index 99fe435d88..49233c4dcc 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -42,6 +42,7 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { ~FixSpringSelfKokkos() override; void init() override; void grow_arrays(int) override; + void copy_arrays(int, int, int) override; void post_force(int) override; KOKKOS_INLINE_FUNCTION @@ -64,14 +65,14 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { int unpack_exchange(int, double *) override; protected: - DAT::tdual_ffloat_2d k_xoriginal; - typename AT::t_ffloat_2d d_xoriginal; + DAT::tdual_x_array k_xoriginal; + typename AT::t_x_array d_xoriginal; typename AT::t_x_array_randomread x; typename AT::t_f_array f; typename AT::t_imageint_1d_randomread image; typename AT::t_int_1d_randomread mask; - + int nsend; typename AT::t_int_2d d_sendlist; From 58d60dfea04b7d3d2012d0c8639bc07a3ca819e0 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sat, 26 Aug 2023 16:08:59 -0500 Subject: [PATCH 15/19] Fixed bugs with device sync for xoriginal --- src/KOKKOS/fix_spring_self_kokkos.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index da1576f3ef..13ebb7de6f 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -101,8 +101,11 @@ void FixSpringSelfKokkos::post_force(int /*vflag*/) double espring_kk; + k_xoriginal.modify(); + k_xoriginal.sync(); + copymode = 1; - //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this, espring_kk); + { // local variables for lambda capture auto prd = Few(domain->prd); @@ -173,7 +176,6 @@ void FixSpringSelfKokkos::copy_arrays(int i, int j, int delflag) k_xoriginal.modify_host(); } - /* ---------------------------------------------------------------------- */ template From 4ef9f70bfe0ef7a408eff0584c793e2fb3ba64dc Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 28 Aug 2023 09:43:00 -0500 Subject: [PATCH 16/19] Fixed whitespaces, added author info --- src/KOKKOS/fix_efield_kokkos.cpp | 4 ++++ src/KOKKOS/fix_spring_self_kokkos.cpp | 4 ++++ src/KOKKOS/fix_spring_self_kokkos.h | 2 +- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index 8c4469095f..ffe1c34e97 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -12,6 +12,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Trung Nguyen (U Chicago) +------------------------------------------------------------------------- */ + #include "fix_efield_kokkos.h" #include "atom_kokkos.h" diff --git a/src/KOKKOS/fix_spring_self_kokkos.cpp b/src/KOKKOS/fix_spring_self_kokkos.cpp index 13ebb7de6f..efd8a652ff 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.cpp +++ b/src/KOKKOS/fix_spring_self_kokkos.cpp @@ -12,6 +12,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Trung Nguyen (U Chicago) +------------------------------------------------------------------------- */ + #include "fix_spring_self_kokkos.h" #include "atom_kokkos.h" diff --git a/src/KOKKOS/fix_spring_self_kokkos.h b/src/KOKKOS/fix_spring_self_kokkos.h index 49233c4dcc..b23e92249b 100644 --- a/src/KOKKOS/fix_spring_self_kokkos.h +++ b/src/KOKKOS/fix_spring_self_kokkos.h @@ -72,7 +72,7 @@ class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase { typename AT::t_f_array f; typename AT::t_imageint_1d_randomread image; typename AT::t_int_1d_randomread mask; - + int nsend; typename AT::t_int_2d d_sendlist; From b08abd4a809df5b12bf9e9abca796873742e5d75 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 28 Aug 2023 11:06:07 -0500 Subject: [PATCH 17/19] Updated Install.sh and cleaned up --- src/KOKKOS/Install.sh | 4 ++++ src/KOKKOS/pair_yukawa_colloid_kokkos.cpp | 9 ++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 766daaff19..0a5bb398aa 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -127,6 +127,8 @@ action fix_dt_reset_kokkos.cpp action fix_dt_reset_kokkos.h action fix_enforce2d_kokkos.cpp action fix_enforce2d_kokkos.h +action fix_efield_kokkos.cpp +action fix_efield_kokkos.h action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h action fix_freeze_kokkos.cpp fix_freeze.cpp @@ -171,6 +173,8 @@ action fix_shake_kokkos.cpp fix_shake.cpp action fix_shake_kokkos.h fix_shake.h action fix_shardlow_kokkos.cpp fix_shardlow.cpp action fix_shardlow_kokkos.h fix_shardlow.h +action fix_spring_self_kokkos.cpp +action fix_spring_self_kokkos.h action fix_viscous_kokkos.cpp action fix_viscous_kokkos.h action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp diff --git a/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp index ca491a3800..04eb5ab657 100644 --- a/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp +++ b/src/KOKKOS/pair_yukawa_colloid_kokkos.cpp @@ -210,7 +210,7 @@ void PairYukawaColloidKokkos::compute(int eflag_in, int vflag_in) } } - +/* ---------------------------------------------------------------------- */ template template @@ -227,11 +227,10 @@ compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a : params(itype,jtype).a; - // U = a * exp(-kappa*r-(radi+radj)) / kappa - // f = a * exp(-kappa*r) + // U = a * exp(-kappa*(r-(radi+radj))) / kappa + // f = -dU/dr = a * exp(-kappa*r) // f/r = a * exp(-kappa*r) / r const F_FLOAT rinv = 1.0 / rr; - const F_FLOAT rinv2 = rinv*rinv; const F_FLOAT screening = exp(-kappa*(rr-(radi+radj))); const F_FLOAT forceyukawa = aa * screening; const F_FLOAT fpair = forceyukawa * rinv; @@ -255,7 +254,7 @@ compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, const F_FLOAT offset = STACKPARAMS ? m_params[itype][jtype].offset : params(itype,jtype).offset; - // U = a * exp(-kappa*r) / kappa + // U = a * exp(-kappa*(r-(radi+radj))) / kappa const F_FLOAT rinv = 1.0 / rr; const F_FLOAT screening = exp(-kappa*(rr-(radi+radj))); From 67bcf75b74f34b4bd0037d47fade1a77fabc073a Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 28 Aug 2023 23:18:05 -0500 Subject: [PATCH 18/19] Updated the corresponding doc pages with ".. index::" and added (k) to the commands in the overview pages --- doc/src/Commands_fix.rst | 4 ++-- doc/src/Commands_pair.rst | 2 +- doc/src/fix_efield.rst | 1 + doc/src/fix_spring_self.rst | 1 + doc/src/pair_yukawa_colloid.rst | 3 ++- 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/src/Commands_fix.rst b/doc/src/Commands_fix.rst index 6fe321e3c9..a15e24e3e5 100644 --- a/doc/src/Commands_fix.rst +++ b/doc/src/Commands_fix.rst @@ -69,7 +69,7 @@ OPT. * :doc:`drude/transform/inverse ` * :doc:`dt/reset (k) ` * :doc:`edpd/source ` - * :doc:`efield ` + * :doc:`efield (k) ` * :doc:`efield/tip4p ` * :doc:`ehex ` * :doc:`electrode/conp (i) ` @@ -233,7 +233,7 @@ OPT. * :doc:`spring ` * :doc:`spring/chunk ` * :doc:`spring/rg ` - * :doc:`spring/self ` + * :doc:`spring/self (k) ` * :doc:`srd ` * :doc:`store/force ` * :doc:`store/state ` diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index c45a1d778c..b3d40717da 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -305,5 +305,5 @@ OPT. * :doc:`wf/cut ` * :doc:`ylz ` * :doc:`yukawa (gko) ` - * :doc:`yukawa/colloid (go) ` + * :doc:`yukawa/colloid (gko) ` * :doc:`zbl (gko) ` diff --git a/doc/src/fix_efield.rst b/doc/src/fix_efield.rst index e38e1e6894..c6ac3a0722 100644 --- a/doc/src/fix_efield.rst +++ b/doc/src/fix_efield.rst @@ -1,4 +1,5 @@ .. index:: fix efield +.. index:: fix efield/kk .. index:: fix efield/tip4p fix efield command diff --git a/doc/src/fix_spring_self.rst b/doc/src/fix_spring_self.rst index 6cf0a9e0e7..0def6d51f0 100644 --- a/doc/src/fix_spring_self.rst +++ b/doc/src/fix_spring_self.rst @@ -1,4 +1,5 @@ .. index:: fix spring/self +.. index:: fix spring/self/kk fix spring/self command ======================= diff --git a/doc/src/pair_yukawa_colloid.rst b/doc/src/pair_yukawa_colloid.rst index 6611ea04e4..96893f8e37 100644 --- a/doc/src/pair_yukawa_colloid.rst +++ b/doc/src/pair_yukawa_colloid.rst @@ -1,11 +1,12 @@ .. index:: pair_style yukawa/colloid .. index:: pair_style yukawa/colloid/gpu +.. index:: pair_style yukawa/colloid/kk .. index:: pair_style yukawa/colloid/omp pair_style yukawa/colloid command ================================= -Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/omp* +Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/kk*, *yukawa/colloid/omp* Syntax """""" From 4910401f6c43c54318e1c4509475c6f693129f0b Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Thu, 12 Oct 2023 12:52:57 -0600 Subject: [PATCH 19/19] Add accelerator package text --- doc/src/fix_efield.rst | 6 ++++++ doc/src/fix_spring_self.rst | 6 ++++++ doc/src/pair_yukawa_colloid.rst | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/doc/src/fix_efield.rst b/doc/src/fix_efield.rst index c6ac3a0722..71be030266 100644 --- a/doc/src/fix_efield.rst +++ b/doc/src/fix_efield.rst @@ -211,6 +211,12 @@ the iteration count during the minimization. system (the quantity being minimized), you MUST enable the :doc:`fix_modify ` *energy* option for this fix. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" diff --git a/doc/src/fix_spring_self.rst b/doc/src/fix_spring_self.rst index 0def6d51f0..3a3e246455 100644 --- a/doc/src/fix_spring_self.rst +++ b/doc/src/fix_spring_self.rst @@ -81,6 +81,12 @@ invoked by the :doc:`minimize ` command. you MUST enable the :doc:`fix_modify ` *energy* option for this fix. +---------- + +.. include:: accel_styles.rst + +---------- + Restrictions """""""""""" none diff --git a/doc/src/pair_yukawa_colloid.rst b/doc/src/pair_yukawa_colloid.rst index 96893f8e37..c6f201d249 100644 --- a/doc/src/pair_yukawa_colloid.rst +++ b/doc/src/pair_yukawa_colloid.rst @@ -132,6 +132,12 @@ per-type polydispersity is allowed. This means all particles of the same type must have the same diameter. Each type can have a different diameter. +---------- + +.. include:: accel_styles.rst + +---------- + Related commands """"""""""""""""