diff --git a/src/KOKKOS/fix_efield_kokkos.cpp b/src/KOKKOS/fix_efield_kokkos.cpp index 4009773982..c009b129ae 100644 --- a/src/KOKKOS/fix_efield_kokkos.cpp +++ b/src/KOKKOS/fix_efield_kokkos.cpp @@ -13,22 +13,23 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Trung Nguyen (U Chicago) + Contributing authors: Trung Nguyen (U Chicago) + Mitch Murphy (alphataubio@gmail.com) ------------------------------------------------------------------------- */ #include "fix_efield_kokkos.h" #include "atom_kokkos.h" -#include "update.h" -#include "modify.h" -#include "domain_kokkos.h" -#include "region.h" -#include "input.h" -#include "variable.h" -#include "memory_kokkos.h" -#include "error.h" #include "atom_masks.h" +#include "domain_kokkos.h" +#include "error.h" +#include "input.h" #include "kokkos_base.h" +#include "memory_kokkos.h" +#include "modify_kokkos.h" +#include "region.h" +#include "update.h" +#include "variable.h" using namespace LAMMPS_NS; using namespace FixConst; @@ -43,13 +44,17 @@ FixEfieldKokkos::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) { kokkosable = 1; atomKK = (AtomKokkos *) atom; + domainKK = (DomainKokkos *) domain; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; memory->destroy(efield); memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield"); - d_efield = k_efield.view(); + d_efield = k_efield.template view(); + + memoryKK->create_kokkos(k_fsum,fsum,4,"efield:fsum"); + d_fsum = k_fsum.template view(); } /* ---------------------------------------------------------------------- */ @@ -60,7 +65,8 @@ FixEfieldKokkos::~FixEfieldKokkos() if (copymode) return; memoryKK->destroy_kokkos(k_efield,efield); - efield = nullptr; + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_fsum,fsum); } /* ---------------------------------------------------------------------- */ @@ -68,6 +74,7 @@ FixEfieldKokkos::~FixEfieldKokkos() template void FixEfieldKokkos::init() { + FixEfield::init(); if (utils::strmatch(update->integrate_style,"^respa")) @@ -77,17 +84,29 @@ void FixEfieldKokkos::init() /* ---------------------------------------------------------------------- */ template -void FixEfieldKokkos::post_force(int /*vflag*/) +void FixEfieldKokkos::post_force(int vflag) { + atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); - x = atomKK->k_x.view(); - f = atomKK->k_f.view(); - q = atomKK->k_q.view(); - image = atomKK->k_image.view(); - mask = atomKK->k_mask.view(); + d_x = atomKK->k_x.template view(); + d_f = atomKK->k_f.template view(); + d_q = atomKK->k_q.template view(); + d_image = atomKK->k_image.template view(); + d_mask = atomKK->k_mask.template view(); + int nlocal = atomKK->nlocal; - int nlocal = atom->nlocal; + // virial setup + + v_init(vflag); + + // reallocate per-atom arrays if necessary + + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"efield:vatom"); + d_vatom = k_vatom.template view(); + } // update region if necessary @@ -111,54 +130,22 @@ void FixEfieldKokkos::post_force(int /*vflag*/) d_efield = k_efield.view(); } - fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0; - double_4 fsum_kk; + force_flag = 0; + double result[10] = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0}; + if (varflag == CONSTANT) { + + // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) [ndtrung81 (2023/08)] + + // i tested it on kokkos-omp and it works, might have been + // a bug in DomainKokkos that's been fixed since. + // FIXME: test on kokkos-gpu + // [alphataubio (2024/08)] + copymode = 1; - - // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) - //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); - - { - // local variables for lambda capture - auto prd = Few(domain->prd); - auto h = Few(domain->h); - auto triclinic = domain->triclinic; - auto l_ex = ex; - auto l_ey = ey; - auto l_ez = ez; - - auto l_x = x; - auto l_q = q; - auto l_f = f; - auto l_mask = mask; - auto l_image = image; - auto l_groupbit = groupbit; - - Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) { - if (l_mask[i] & l_groupbit) { - Few x_i; - x_i[0] = l_x(i,0); - x_i[1] = l_x(i,1); - x_i[2] = l_x(i,2); - auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i)); - auto qtmp = l_q(i); - auto fx = qtmp * l_ex; - auto fy = qtmp * l_ey; - auto fz = qtmp * l_ez; - l_f(i,0) += fx; - l_f(i,1) += fy; - l_f(i,2) += fz; - fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; - fsum_kk.d1 += fx; - fsum_kk.d2 += fy; - fsum_kk.d3 += fz; - } - },fsum_kk); - } - + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,result); copymode = 0; // variable force, wrap with clear/add @@ -167,144 +154,159 @@ void FixEfieldKokkos::post_force(int /*vflag*/) atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos - modify->clearstep_compute(); - - if (xstyle == EQUAL) ex = input->variable->compute_equal(xvar); - else if (xstyle == ATOM) - input->variable->compute_atom(xvar,igroup,&efield[0][0],4,0); - if (ystyle == EQUAL) ey = input->variable->compute_equal(yvar); - else if (ystyle == ATOM) - input->variable->compute_atom(yvar,igroup,&efield[0][1],4,0); - if (zstyle == EQUAL) ez = input->variable->compute_equal(zvar); - else if (zstyle == ATOM) - input->variable->compute_atom(zvar,igroup,&efield[0][2],4,0); - - modify->addstep_compute(update->ntimestep + 1); + FixEfield::update_efield_variables(); if (varflag == ATOM) { // this can be removed when variable class is ported to Kokkos k_efield.modify(); k_efield.sync(); } + // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) [ndtrung81 (2023/08)] + + // i tested it on kokkos-omp and it works, might have been + // a bug in DomainKokkos that's been fixed since. + // FIXME: test on kokkos-gpu + // [alphataubio (2024/08)] + copymode = 1; - // It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) - //Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,fsum_kk); - { - // local variables for lambda capture - auto prd = Few(domain->prd); - auto h = Few(domain->h); - auto triclinic = domain->triclinic; - auto l_ex = ex; - auto l_ey = ey; - auto l_ez = ez; - auto l_d_efield = d_efield; - - auto l_x = x; - auto l_q = q; - auto l_f = f; - auto l_mask = mask; - auto l_image = image; - auto l_groupbit = groupbit; - auto l_xstyle = xstyle; - auto l_ystyle = ystyle; - auto l_zstyle = zstyle; - - Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) { - if (l_mask[i] & l_groupbit) { - Few x_i; - x_i[0] = l_x(i,0); - x_i[1] = l_x(i,1); - x_i[2] = l_x(i,2); - auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i)); - auto qtmp = l_q(i); - auto fx = qtmp * l_ex; - auto fy = qtmp * l_ey; - auto fz = qtmp * l_ez; - if (l_xstyle == ATOM) l_f(i,0) += qtmp * l_d_efield(i,0); - else if (l_xstyle) l_f(i,0) += fx; - if (l_ystyle == ATOM) l_f(i,1) += qtmp * l_d_efield(i,1); - else if (l_ystyle) l_f(i,1) += fy; - if (l_zstyle == ATOM) l_f(i,2) += qtmp * l_d_efield(i,2); - else if (l_zstyle) l_f(i,2) += fz; - fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; - fsum_kk.d1 += fx; - fsum_kk.d2 += fy; - fsum_kk.d3 += fz; - } - },fsum_kk); - } - + Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal),*this,result); copymode = 0; + } + atomKK->modified(execution_space, F_MASK); - fsum[0] = fsum_kk.d0; - fsum[1] = fsum_kk.d1; - fsum[2] = fsum_kk.d2; - fsum[3] = fsum_kk.d3; + Kokkos::atomic_store(&(d_fsum[0]),result[0]); + Kokkos::atomic_store(&(d_fsum[1]),result[1]); + Kokkos::atomic_store(&(d_fsum[2]),result[2]); + Kokkos::atomic_store(&(d_fsum[3]),result[3]); + k_fsum.template modify(); + k_fsum.template sync(); + + if (vflag_global) { + virial[0] += result[4]; + virial[1] += result[5]; + virial[2] += result[6]; + virial[3] += result[7]; + virial[4] += result[8]; + virial[5] += result[9]; + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + } template KOKKOS_INLINE_FUNCTION -void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { - if (mask[i] & groupbit) { +void FixEfieldKokkos::operator()(TagFixEfieldConstant, const int &i, value_type result) const { + if (d_mask(i) & groupbit) { if (region && !d_match[i]) return; - auto prd = Few(domain->prd); - auto h = Few(domain->h); - auto triclinic = domain->triclinic; Few x_i; - x_i[0] = x(i,0); - x_i[1] = x(i,1); - x_i[2] = x(i,2); - auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); - const F_FLOAT qtmp = q(i); - const F_FLOAT fx = qtmp * ex; - const F_FLOAT fy = qtmp * ey; - const F_FLOAT fz = qtmp * ez; - f(i,0) += fx; - f(i,1) += fy; - f(i,2) += fz; - // TODO: access to unwrap below crashes - fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; - fsum_kk.d1 += fx; - fsum_kk.d2 += fy; - fsum_kk.d3 += fz; + x_i[0] = d_x(i,0); + x_i[1] = d_x(i,1); + x_i[2] = d_x(i,2); + auto unwrapKK = DomainKokkos::unmap(domainKK->prd,domainKK->h, + domainKK->triclinic,x_i,d_image(i)); + const F_FLOAT fx = d_q(i) * ex; + const F_FLOAT fy = d_q(i) * ey; + const F_FLOAT fz = d_q(i) * ez; + d_f(i,0) += fx; + d_f(i,1) += fy; + d_f(i,2) += fz; + // TODO: access to unwrap below crashes [ndtrung81 (2023/08)] + // tested, works on kokkos-omp [alphataubio (2024/08)] + // changed to unwrapKK to avoid possible clash with base class unwrap + // FIXME: test on kokkos-gpu + result[0] -= fx * unwrapKK[0] + fy * unwrapKK[1] + fz * unwrapKK[2]; + result[1] += fx; + result[2] += fy; + result[3] += fz; + + if (evflag) { + double v[6]; + v[0] = fx * unwrapKK[0]; + v[1] = fy * unwrapKK[1]; + v[2] = fz * unwrapKK[2]; + v[3] = fx * unwrapKK[1]; + v[4] = fx * unwrapKK[2]; + v[5] = fy * unwrapKK[2]; + v_tally(result, i, v); + } + } } template KOKKOS_INLINE_FUNCTION -void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const { - auto prd = Few(domain->prd); - auto h = Few(domain->h); - auto triclinic = domain->triclinic; - if (mask[i] & groupbit) { +void FixEfieldKokkos::operator()(TagFixEfieldNonConstant, const int &i, value_type result) const { + if (d_mask(i) & groupbit) { if (region && !d_match[i]) return; - Few x_i; - x_i[0] = x(i,0); - x_i[1] = x(i,1); - x_i[2] = x(i,2); - auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); - const F_FLOAT qtmp = q[i]; - const F_FLOAT fx = qtmp * ex; - const F_FLOAT fy = qtmp * ey; - const F_FLOAT fz = qtmp * ez; - if (xstyle == ATOM) f(i,0) += d_efield(i,0); - else if (xstyle) f(i,0) += fx; - if (ystyle == ATOM) f(i,1) += d_efield(i,1); - else if (ystyle) f(i,1) += fy; - if (zstyle == ATOM) f(i,2) += d_efield(i,2); - else if (zstyle) f(i,2) += fz; - // TODO: access to unwrap below crashes - fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; - fsum_kk.d1 += fx; - fsum_kk.d2 += fy; - fsum_kk.d3 += fz; + + F_FLOAT fx, fy, fz; + + if (xstyle == ATOM) fx = qe2f * d_q(i) * d_efield(i,0); + else fx = d_q(i) * ex; + if (ystyle == ATOM) fy = qe2f * d_q(i) * d_efield(i,1); + else fy = d_q(i) * ey; + if (zstyle == ATOM) fz = qe2f * d_q(i) * d_efield(i,2); + else fz = d_q(i) * ez; + + d_f(i,0) += fx; + d_f(i,1) += fy; + d_f(i,2) += fz; + + result[1] += fx; + result[2] += fy; + result[3] += fz; + + if (pstyle == ATOM) result[0] += qe2f * d_q(i) * d_efield(i,3); + else if (estyle == ATOM) result[0] += d_efield(i,3); } } +/* ---------------------------------------------------------------------- + tally virial into global and per-atom accumulators + i = local index of atom + v = total virial for the interaction + increment global virial by v + increment per-atom virial by v + this method can be used when fix computes forces in post_force() + and the force depends on a distance to some external object + e.g. fix wall/lj93: compute virial only on owned atoms +------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void FixEfieldKokkos::v_tally(value_type result, int i, double *v) const +{ + + if (vflag_global) { + result[4] += v[0]; + result[5] += v[1]; + result[6] += v[2]; + result[7] += v[3]; + result[8] += v[4]; + result[9] += v[5]; + } + + if (vflag_atom) { + Kokkos::atomic_add(&(d_vatom(i,0)),v[0]); + Kokkos::atomic_add(&(d_vatom(i,1)),v[1]); + Kokkos::atomic_add(&(d_vatom(i,2)),v[2]); + Kokkos::atomic_add(&(d_vatom(i,3)),v[3]); + Kokkos::atomic_add(&(d_vatom(i,4)),v[4]); + Kokkos::atomic_add(&(d_vatom(i,5)),v[5]); + } + +} + + namespace LAMMPS_NS { template class FixEfieldKokkos; #ifdef LMP_KOKKOS_GPU diff --git a/src/KOKKOS/fix_efield_kokkos.h b/src/KOKKOS/fix_efield_kokkos.h index d159473d1d..fd940d8028 100644 --- a/src/KOKKOS/fix_efield_kokkos.h +++ b/src/KOKKOS/fix_efield_kokkos.h @@ -28,32 +28,14 @@ FixStyle(efield/kk/host,FixEfieldKokkos); namespace LAMMPS_NS { -struct e_double_4 { - double d0, d1, d2, d3; - KOKKOS_INLINE_FUNCTION - e_double_4() { - d0 = d1 = d2 = d3 = 0.0; - } - KOKKOS_INLINE_FUNCTION - e_double_4& operator+=(const e_double_4 &rhs) { - d0 += rhs.d0; - d1 += rhs.d1; - d2 += rhs.d2; - d3 += rhs.d3; - return *this; - } -}; -typedef e_double_4 double_4; - struct TagFixEfieldConstant{}; - struct TagFixEfieldNonConstant{}; template class FixEfieldKokkos : public FixEfield { public: typedef DeviceType device_type; - typedef double_4 value_type; + typedef double value_type[]; typedef ArrayTypes AT; FixEfieldKokkos(class LAMMPS *, int, char **); @@ -62,21 +44,34 @@ class FixEfieldKokkos : public FixEfield { void post_force(int) override; KOKKOS_INLINE_FUNCTION - void operator()(TagFixEfieldConstant, const int&, double_4&) const; + void operator()(TagFixEfieldConstant, const int&, value_type) const; KOKKOS_INLINE_FUNCTION - void operator()(TagFixEfieldNonConstant, const int&, double_4&) const; + void operator()(TagFixEfieldNonConstant, const int&, value_type) const; + + const int value_count = 10; private: + class DomainKokkos *domainKK; + DAT::tdual_ffloat_2d k_efield; typename AT::t_ffloat_2d_randomread d_efield; typename AT::t_int_1d d_match; - typename AT::t_x_array_randomread x; - typename AT::t_float_1d_randomread q; - typename AT::t_f_array f; - typename AT::t_imageint_1d_randomread image; - typename AT::t_int_1d_randomread mask; + typename AT::t_x_array_randomread d_x; + typename AT::t_float_1d_randomread d_q; + typename AT::t_f_array d_f; + typename AT::t_imageint_1d_randomread d_image; + typename AT::t_int_1d_randomread d_mask; + + DAT::tdual_virial_array k_vatom; + typename AT::t_virial_array d_vatom; + + typename AT::tdual_ffloat_1d k_fsum; + typename AT::t_ffloat_1d d_fsum; + + KOKKOS_INLINE_FUNCTION + void v_tally(value_type, int, double*) const; }; } diff --git a/src/fix_efield.cpp b/src/fix_efield.cpp index 81be66b3e3..b0abee6d0f 100644 --- a/src/fix_efield.cpp +++ b/src/fix_efield.cpp @@ -56,6 +56,7 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) : ilevel_respa = 0; energy_global_flag = 1; virial_global_flag = virial_peratom_flag = 1; + fsum = new double[4]; qe2f = force->qe2f; xstyle = ystyle = zstyle = estyle = pstyle = NONE; @@ -311,7 +312,6 @@ void FixEfield::post_force(int vflag) double **x = atom->x; double fx, fy, fz; double v[6], unwrap[3]; - ; // constant efield @@ -508,4 +508,6 @@ void FixEfield::update_efield_variables() else if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0); modify->addstep_compute(update->ntimestep + 1); + } + diff --git a/src/fix_efield.h b/src/fix_efield.h index 72fd204898..df6b336e4c 100644 --- a/src/fix_efield.h +++ b/src/fix_efield.h @@ -59,7 +59,7 @@ class FixEfield : public Fix { double **efield; int force_flag; - double fsum[4], fsum_all[4]; + double *fsum, fsum_all[4]; // need fsum double*, not double[] for kokkos dual view void update_efield_variables(); }; } // namespace LAMMPS_NS