significant rewrite, all 3 tests pass

This commit is contained in:
alphataubio
2024-08-04 01:51:26 -04:00
parent 755fdc5986
commit c8c9e34e2d
4 changed files with 199 additions and 200 deletions

View File

@ -13,22 +13,23 @@
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
Contributing author: Trung Nguyen (U Chicago) Contributing authors: Trung Nguyen (U Chicago)
Mitch Murphy (alphataubio@gmail.com)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "fix_efield_kokkos.h" #include "fix_efield_kokkos.h"
#include "atom_kokkos.h" #include "atom_kokkos.h"
#include "update.h"
#include "modify.h"
#include "domain_kokkos.h"
#include "region.h"
#include "input.h"
#include "variable.h"
#include "memory_kokkos.h"
#include "error.h"
#include "atom_masks.h" #include "atom_masks.h"
#include "domain_kokkos.h"
#include "error.h"
#include "input.h"
#include "kokkos_base.h" #include "kokkos_base.h"
#include "memory_kokkos.h"
#include "modify_kokkos.h"
#include "region.h"
#include "update.h"
#include "variable.h"
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace FixConst; using namespace FixConst;
@ -43,13 +44,17 @@ FixEfieldKokkos<DeviceType>::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg)
{ {
kokkosable = 1; kokkosable = 1;
atomKK = (AtomKokkos *) atom; atomKK = (AtomKokkos *) atom;
domainKK = (DomainKokkos *) domain;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space; execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK; datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK; datamask_modify = EMPTY_MASK;
memory->destroy(efield); memory->destroy(efield);
memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield"); memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield");
d_efield = k_efield.view<DeviceType>(); d_efield = k_efield.template view<DeviceType>();
memoryKK->create_kokkos(k_fsum,fsum,4,"efield:fsum");
d_fsum = k_fsum.template view<DeviceType>();
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -60,7 +65,8 @@ FixEfieldKokkos<DeviceType>::~FixEfieldKokkos()
if (copymode) return; if (copymode) return;
memoryKK->destroy_kokkos(k_efield,efield); memoryKK->destroy_kokkos(k_efield,efield);
efield = nullptr; memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->destroy_kokkos(k_fsum,fsum);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -68,6 +74,7 @@ FixEfieldKokkos<DeviceType>::~FixEfieldKokkos()
template<class DeviceType> template<class DeviceType>
void FixEfieldKokkos<DeviceType>::init() void FixEfieldKokkos<DeviceType>::init()
{ {
FixEfield::init(); FixEfield::init();
if (utils::strmatch(update->integrate_style,"^respa")) if (utils::strmatch(update->integrate_style,"^respa"))
@ -77,17 +84,29 @@ void FixEfieldKokkos<DeviceType>::init()
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template<class DeviceType> template<class DeviceType>
void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/) void FixEfieldKokkos<DeviceType>::post_force(int vflag)
{ {
atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK); atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK);
x = atomKK->k_x.view<DeviceType>(); d_x = atomKK->k_x.template view<DeviceType>();
f = atomKK->k_f.view<DeviceType>(); d_f = atomKK->k_f.template view<DeviceType>();
q = atomKK->k_q.view<DeviceType>(); d_q = atomKK->k_q.template view<DeviceType>();
image = atomKK->k_image.view<DeviceType>(); d_image = atomKK->k_image.template view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>(); d_mask = atomKK->k_mask.template view<DeviceType>();
int nlocal = atomKK->nlocal;
int nlocal = atom->nlocal; // virial setup
v_init(vflag);
// reallocate per-atom arrays if necessary
if (vflag_atom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"efield:vatom");
d_vatom = k_vatom.template view<DeviceType>();
}
// update region if necessary // update region if necessary
@ -111,54 +130,22 @@ void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/)
d_efield = k_efield.view<DeviceType>(); d_efield = k_efield.view<DeviceType>();
} }
fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0;
double_4 fsum_kk;
force_flag = 0; force_flag = 0;
double result[10] = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0};
if (varflag == CONSTANT) { if (varflag == CONSTANT) {
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) [ndtrung81 (2023/08)]
// i tested it on kokkos-omp and it works, might have been
// a bug in DomainKokkos that's been fixed since.
// FIXME: test on kokkos-gpu
// [alphataubio (2024/08)]
copymode = 1; copymode = 1;
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldConstant>(0,nlocal),*this,result);
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
//Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldConstant>(0,nlocal),*this,fsum_kk);
{
// local variables for lambda capture
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
auto l_ex = ex;
auto l_ey = ey;
auto l_ez = ez;
auto l_x = x;
auto l_q = q;
auto l_f = f;
auto l_mask = mask;
auto l_image = image;
auto l_groupbit = groupbit;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);
x_i[1] = l_x(i,1);
x_i[2] = l_x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
auto qtmp = l_q(i);
auto fx = qtmp * l_ex;
auto fy = qtmp * l_ey;
auto fz = qtmp * l_ez;
l_f(i,0) += fx;
l_f(i,1) += fy;
l_f(i,2) += fz;
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
}
},fsum_kk);
}
copymode = 0; copymode = 0;
// variable force, wrap with clear/add // variable force, wrap with clear/add
@ -167,144 +154,159 @@ void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/)
atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos
modify->clearstep_compute(); FixEfield::update_efield_variables();
if (xstyle == EQUAL) ex = input->variable->compute_equal(xvar);
else if (xstyle == ATOM)
input->variable->compute_atom(xvar,igroup,&efield[0][0],4,0);
if (ystyle == EQUAL) ey = input->variable->compute_equal(yvar);
else if (ystyle == ATOM)
input->variable->compute_atom(yvar,igroup,&efield[0][1],4,0);
if (zstyle == EQUAL) ez = input->variable->compute_equal(zvar);
else if (zstyle == ATOM)
input->variable->compute_atom(zvar,igroup,&efield[0][2],4,0);
modify->addstep_compute(update->ntimestep + 1);
if (varflag == ATOM) { // this can be removed when variable class is ported to Kokkos if (varflag == ATOM) { // this can be removed when variable class is ported to Kokkos
k_efield.modify<LMPHostType>(); k_efield.modify<LMPHostType>();
k_efield.sync<DeviceType>(); k_efield.sync<DeviceType>();
} }
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) [ndtrung81 (2023/08)]
// i tested it on kokkos-omp and it works, might have been
// a bug in DomainKokkos that's been fixed since.
// FIXME: test on kokkos-gpu
// [alphataubio (2024/08)]
copymode = 1; copymode = 1;
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldNonConstant>(0,nlocal),*this,result);
//Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldNonConstant>(0,nlocal),*this,fsum_kk);
{
// local variables for lambda capture
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
auto l_ex = ex;
auto l_ey = ey;
auto l_ez = ez;
auto l_d_efield = d_efield;
auto l_x = x;
auto l_q = q;
auto l_f = f;
auto l_mask = mask;
auto l_image = image;
auto l_groupbit = groupbit;
auto l_xstyle = xstyle;
auto l_ystyle = ystyle;
auto l_zstyle = zstyle;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);
x_i[1] = l_x(i,1);
x_i[2] = l_x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
auto qtmp = l_q(i);
auto fx = qtmp * l_ex;
auto fy = qtmp * l_ey;
auto fz = qtmp * l_ez;
if (l_xstyle == ATOM) l_f(i,0) += qtmp * l_d_efield(i,0);
else if (l_xstyle) l_f(i,0) += fx;
if (l_ystyle == ATOM) l_f(i,1) += qtmp * l_d_efield(i,1);
else if (l_ystyle) l_f(i,1) += fy;
if (l_zstyle == ATOM) l_f(i,2) += qtmp * l_d_efield(i,2);
else if (l_zstyle) l_f(i,2) += fz;
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
}
},fsum_kk);
}
copymode = 0; copymode = 0;
} }
atomKK->modified(execution_space, F_MASK); atomKK->modified(execution_space, F_MASK);
fsum[0] = fsum_kk.d0; Kokkos::atomic_store(&(d_fsum[0]),result[0]);
fsum[1] = fsum_kk.d1; Kokkos::atomic_store(&(d_fsum[1]),result[1]);
fsum[2] = fsum_kk.d2; Kokkos::atomic_store(&(d_fsum[2]),result[2]);
fsum[3] = fsum_kk.d3; Kokkos::atomic_store(&(d_fsum[3]),result[3]);
k_fsum.template modify<DeviceType>();
k_fsum.template sync<LMPHostType>();
if (vflag_global) {
virial[0] += result[4];
virial[1] += result[5];
virial[2] += result[6];
virial[3] += result[7];
virial[4] += result[8];
virial[5] += result[9];
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
} }
template<class DeviceType> template<class DeviceType>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const { void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldConstant, const int &i, value_type result) const {
if (mask[i] & groupbit) { if (d_mask(i) & groupbit) {
if (region && !d_match[i]) return; if (region && !d_match[i]) return;
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
Few<double,3> x_i; Few<double,3> x_i;
x_i[0] = x(i,0); x_i[0] = d_x(i,0);
x_i[1] = x(i,1); x_i[1] = d_x(i,1);
x_i[2] = x(i,2); x_i[2] = d_x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); auto unwrapKK = DomainKokkos::unmap(domainKK->prd,domainKK->h,
const F_FLOAT qtmp = q(i); domainKK->triclinic,x_i,d_image(i));
const F_FLOAT fx = qtmp * ex; const F_FLOAT fx = d_q(i) * ex;
const F_FLOAT fy = qtmp * ey; const F_FLOAT fy = d_q(i) * ey;
const F_FLOAT fz = qtmp * ez; const F_FLOAT fz = d_q(i) * ez;
f(i,0) += fx; d_f(i,0) += fx;
f(i,1) += fy; d_f(i,1) += fy;
f(i,2) += fz; d_f(i,2) += fz;
// TODO: access to unwrap below crashes // TODO: access to unwrap below crashes [ndtrung81 (2023/08)]
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; // tested, works on kokkos-omp [alphataubio (2024/08)]
fsum_kk.d1 += fx; // changed to unwrapKK to avoid possible clash with base class unwrap
fsum_kk.d2 += fy; // FIXME: test on kokkos-gpu
fsum_kk.d3 += fz; result[0] -= fx * unwrapKK[0] + fy * unwrapKK[1] + fz * unwrapKK[2];
result[1] += fx;
result[2] += fy;
result[3] += fz;
if (evflag) {
double v[6];
v[0] = fx * unwrapKK[0];
v[1] = fy * unwrapKK[1];
v[2] = fz * unwrapKK[2];
v[3] = fx * unwrapKK[1];
v[4] = fx * unwrapKK[2];
v[5] = fy * unwrapKK[2];
v_tally(result, i, v);
}
} }
} }
template<class DeviceType> template<class DeviceType>
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const { void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldNonConstant, const int &i, value_type result) const {
auto prd = Few<double,3>(domain->prd); if (d_mask(i) & groupbit) {
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
if (mask[i] & groupbit) {
if (region && !d_match[i]) return; if (region && !d_match[i]) return;
Few<double,3> x_i;
x_i[0] = x(i,0); F_FLOAT fx, fy, fz;
x_i[1] = x(i,1);
x_i[2] = x(i,2); if (xstyle == ATOM) fx = qe2f * d_q(i) * d_efield(i,0);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i)); else fx = d_q(i) * ex;
const F_FLOAT qtmp = q[i]; if (ystyle == ATOM) fy = qe2f * d_q(i) * d_efield(i,1);
const F_FLOAT fx = qtmp * ex; else fy = d_q(i) * ey;
const F_FLOAT fy = qtmp * ey; if (zstyle == ATOM) fz = qe2f * d_q(i) * d_efield(i,2);
const F_FLOAT fz = qtmp * ez; else fz = d_q(i) * ez;
if (xstyle == ATOM) f(i,0) += d_efield(i,0);
else if (xstyle) f(i,0) += fx; d_f(i,0) += fx;
if (ystyle == ATOM) f(i,1) += d_efield(i,1); d_f(i,1) += fy;
else if (ystyle) f(i,1) += fy; d_f(i,2) += fz;
if (zstyle == ATOM) f(i,2) += d_efield(i,2);
else if (zstyle) f(i,2) += fz; result[1] += fx;
// TODO: access to unwrap below crashes result[2] += fy;
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2]; result[3] += fz;
fsum_kk.d1 += fx;
fsum_kk.d2 += fy; if (pstyle == ATOM) result[0] += qe2f * d_q(i) * d_efield(i,3);
fsum_kk.d3 += fz; else if (estyle == ATOM) result[0] += d_efield(i,3);
} }
} }
/* ----------------------------------------------------------------------
tally virial into global and per-atom accumulators
i = local index of atom
v = total virial for the interaction
increment global virial by v
increment per-atom virial by v
this method can be used when fix computes forces in post_force()
and the force depends on a distance to some external object
e.g. fix wall/lj93: compute virial only on owned atoms
------------------------------------------------------------------------- */
template <class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEfieldKokkos<DeviceType>::v_tally(value_type result, int i, double *v) const
{
if (vflag_global) {
result[4] += v[0];
result[5] += v[1];
result[6] += v[2];
result[7] += v[3];
result[8] += v[4];
result[9] += v[5];
}
if (vflag_atom) {
Kokkos::atomic_add(&(d_vatom(i,0)),v[0]);
Kokkos::atomic_add(&(d_vatom(i,1)),v[1]);
Kokkos::atomic_add(&(d_vatom(i,2)),v[2]);
Kokkos::atomic_add(&(d_vatom(i,3)),v[3]);
Kokkos::atomic_add(&(d_vatom(i,4)),v[4]);
Kokkos::atomic_add(&(d_vatom(i,5)),v[5]);
}
}
namespace LAMMPS_NS { namespace LAMMPS_NS {
template class FixEfieldKokkos<LMPDeviceType>; template class FixEfieldKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU #ifdef LMP_KOKKOS_GPU

View File

@ -28,32 +28,14 @@ FixStyle(efield/kk/host,FixEfieldKokkos<LMPHostType>);
namespace LAMMPS_NS { namespace LAMMPS_NS {
struct e_double_4 {
double d0, d1, d2, d3;
KOKKOS_INLINE_FUNCTION
e_double_4() {
d0 = d1 = d2 = d3 = 0.0;
}
KOKKOS_INLINE_FUNCTION
e_double_4& operator+=(const e_double_4 &rhs) {
d0 += rhs.d0;
d1 += rhs.d1;
d2 += rhs.d2;
d3 += rhs.d3;
return *this;
}
};
typedef e_double_4 double_4;
struct TagFixEfieldConstant{}; struct TagFixEfieldConstant{};
struct TagFixEfieldNonConstant{}; struct TagFixEfieldNonConstant{};
template<class DeviceType> template<class DeviceType>
class FixEfieldKokkos : public FixEfield { class FixEfieldKokkos : public FixEfield {
public: public:
typedef DeviceType device_type; typedef DeviceType device_type;
typedef double_4 value_type; typedef double value_type[];
typedef ArrayTypes<DeviceType> AT; typedef ArrayTypes<DeviceType> AT;
FixEfieldKokkos(class LAMMPS *, int, char **); FixEfieldKokkos(class LAMMPS *, int, char **);
@ -62,21 +44,34 @@ class FixEfieldKokkos : public FixEfield {
void post_force(int) override; void post_force(int) override;
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(TagFixEfieldConstant, const int&, double_4&) const; void operator()(TagFixEfieldConstant, const int&, value_type) const;
KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION
void operator()(TagFixEfieldNonConstant, const int&, double_4&) const; void operator()(TagFixEfieldNonConstant, const int&, value_type) const;
const int value_count = 10;
private: private:
class DomainKokkos *domainKK;
DAT::tdual_ffloat_2d k_efield; DAT::tdual_ffloat_2d k_efield;
typename AT::t_ffloat_2d_randomread d_efield; typename AT::t_ffloat_2d_randomread d_efield;
typename AT::t_int_1d d_match; typename AT::t_int_1d d_match;
typename AT::t_x_array_randomread x; typename AT::t_x_array_randomread d_x;
typename AT::t_float_1d_randomread q; typename AT::t_float_1d_randomread d_q;
typename AT::t_f_array f; typename AT::t_f_array d_f;
typename AT::t_imageint_1d_randomread image; typename AT::t_imageint_1d_randomread d_image;
typename AT::t_int_1d_randomread mask; typename AT::t_int_1d_randomread d_mask;
DAT::tdual_virial_array k_vatom;
typename AT::t_virial_array d_vatom;
typename AT::tdual_ffloat_1d k_fsum;
typename AT::t_ffloat_1d d_fsum;
KOKKOS_INLINE_FUNCTION
void v_tally(value_type, int, double*) const;
}; };
} }

View File

@ -56,6 +56,7 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
ilevel_respa = 0; ilevel_respa = 0;
energy_global_flag = 1; energy_global_flag = 1;
virial_global_flag = virial_peratom_flag = 1; virial_global_flag = virial_peratom_flag = 1;
fsum = new double[4];
qe2f = force->qe2f; qe2f = force->qe2f;
xstyle = ystyle = zstyle = estyle = pstyle = NONE; xstyle = ystyle = zstyle = estyle = pstyle = NONE;
@ -311,7 +312,6 @@ void FixEfield::post_force(int vflag)
double **x = atom->x; double **x = atom->x;
double fx, fy, fz; double fx, fy, fz;
double v[6], unwrap[3]; double v[6], unwrap[3];
;
// constant efield // constant efield
@ -508,4 +508,6 @@ void FixEfield::update_efield_variables()
else if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0); else if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0);
modify->addstep_compute(update->ntimestep + 1); modify->addstep_compute(update->ntimestep + 1);
} }

View File

@ -59,7 +59,7 @@ class FixEfield : public Fix {
double **efield; double **efield;
int force_flag; int force_flag;
double fsum[4], fsum_all[4]; double *fsum, fsum_all[4]; // need fsum double*, not double[] for kokkos dual view
void update_efield_variables(); void update_efield_variables();
}; };
} // namespace LAMMPS_NS } // namespace LAMMPS_NS