significant rewrite, all 3 tests pass

This commit is contained in:
alphataubio
2024-08-04 01:51:26 -04:00
parent 755fdc5986
commit c8c9e34e2d
4 changed files with 199 additions and 200 deletions

View File

@ -13,22 +13,23 @@
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Trung Nguyen (U Chicago)
Contributing authors: Trung Nguyen (U Chicago)
Mitch Murphy (alphataubio@gmail.com)
------------------------------------------------------------------------- */
#include "fix_efield_kokkos.h"
#include "atom_kokkos.h"
#include "update.h"
#include "modify.h"
#include "domain_kokkos.h"
#include "region.h"
#include "input.h"
#include "variable.h"
#include "memory_kokkos.h"
#include "error.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "error.h"
#include "input.h"
#include "kokkos_base.h"
#include "memory_kokkos.h"
#include "modify_kokkos.h"
#include "region.h"
#include "update.h"
#include "variable.h"
using namespace LAMMPS_NS;
using namespace FixConst;
@ -43,13 +44,17 @@ FixEfieldKokkos<DeviceType>::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
domainKK = (DomainKokkos *) domain;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
memory->destroy(efield);
memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield");
d_efield = k_efield.view<DeviceType>();
d_efield = k_efield.template view<DeviceType>();
memoryKK->create_kokkos(k_fsum,fsum,4,"efield:fsum");
d_fsum = k_fsum.template view<DeviceType>();
}
/* ---------------------------------------------------------------------- */
@ -60,7 +65,8 @@ FixEfieldKokkos<DeviceType>::~FixEfieldKokkos()
if (copymode) return;
memoryKK->destroy_kokkos(k_efield,efield);
efield = nullptr;
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->destroy_kokkos(k_fsum,fsum);
}
/* ---------------------------------------------------------------------- */
@ -68,6 +74,7 @@ FixEfieldKokkos<DeviceType>::~FixEfieldKokkos()
template<class DeviceType>
void FixEfieldKokkos<DeviceType>::init()
{
FixEfield::init();
if (utils::strmatch(update->integrate_style,"^respa"))
@ -77,17 +84,29 @@ void FixEfieldKokkos<DeviceType>::init()
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/)
void FixEfieldKokkos<DeviceType>::post_force(int vflag)
{
atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK);
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
q = atomKK->k_q.view<DeviceType>();
image = atomKK->k_image.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
d_x = atomKK->k_x.template view<DeviceType>();
d_f = atomKK->k_f.template view<DeviceType>();
d_q = atomKK->k_q.template view<DeviceType>();
d_image = atomKK->k_image.template view<DeviceType>();
d_mask = atomKK->k_mask.template view<DeviceType>();
int nlocal = atomKK->nlocal;
int nlocal = atom->nlocal;
// virial setup
v_init(vflag);
// reallocate per-atom arrays if necessary
if (vflag_atom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"efield:vatom");
d_vatom = k_vatom.template view<DeviceType>();
}
// update region if necessary
@ -111,54 +130,22 @@ void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/)
d_efield = k_efield.view<DeviceType>();
}
fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0;
double_4 fsum_kk;
force_flag = 0;
double result[10] = {0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0};
if (varflag == CONSTANT) {
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) [ndtrung81 (2023/08)]
// i tested it on kokkos-omp and it works, might have been
// a bug in DomainKokkos that's been fixed since.
// FIXME: test on kokkos-gpu
// [alphataubio (2024/08)]
copymode = 1;
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
//Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldConstant>(0,nlocal),*this,fsum_kk);
{
// local variables for lambda capture
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
auto l_ex = ex;
auto l_ey = ey;
auto l_ez = ez;
auto l_x = x;
auto l_q = q;
auto l_f = f;
auto l_mask = mask;
auto l_image = image;
auto l_groupbit = groupbit;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);
x_i[1] = l_x(i,1);
x_i[2] = l_x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
auto qtmp = l_q(i);
auto fx = qtmp * l_ex;
auto fy = qtmp * l_ey;
auto fz = qtmp * l_ez;
l_f(i,0) += fx;
l_f(i,1) += fy;
l_f(i,2) += fz;
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
}
},fsum_kk);
}
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldConstant>(0,nlocal),*this,result);
copymode = 0;
// variable force, wrap with clear/add
@ -167,144 +154,159 @@ void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/)
atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos
modify->clearstep_compute();
if (xstyle == EQUAL) ex = input->variable->compute_equal(xvar);
else if (xstyle == ATOM)
input->variable->compute_atom(xvar,igroup,&efield[0][0],4,0);
if (ystyle == EQUAL) ey = input->variable->compute_equal(yvar);
else if (ystyle == ATOM)
input->variable->compute_atom(yvar,igroup,&efield[0][1],4,0);
if (zstyle == EQUAL) ez = input->variable->compute_equal(zvar);
else if (zstyle == ATOM)
input->variable->compute_atom(zvar,igroup,&efield[0][2],4,0);
modify->addstep_compute(update->ntimestep + 1);
FixEfield::update_efield_variables();
if (varflag == ATOM) { // this can be removed when variable class is ported to Kokkos
k_efield.modify<LMPHostType>();
k_efield.sync<DeviceType>();
}
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below) [ndtrung81 (2023/08)]
// i tested it on kokkos-omp and it works, might have been
// a bug in DomainKokkos that's been fixed since.
// FIXME: test on kokkos-gpu
// [alphataubio (2024/08)]
copymode = 1;
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
//Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldNonConstant>(0,nlocal),*this,fsum_kk);
{
// local variables for lambda capture
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
auto l_ex = ex;
auto l_ey = ey;
auto l_ez = ez;
auto l_d_efield = d_efield;
auto l_x = x;
auto l_q = q;
auto l_f = f;
auto l_mask = mask;
auto l_image = image;
auto l_groupbit = groupbit;
auto l_xstyle = xstyle;
auto l_ystyle = ystyle;
auto l_zstyle = zstyle;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);
x_i[1] = l_x(i,1);
x_i[2] = l_x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
auto qtmp = l_q(i);
auto fx = qtmp * l_ex;
auto fy = qtmp * l_ey;
auto fz = qtmp * l_ez;
if (l_xstyle == ATOM) l_f(i,0) += qtmp * l_d_efield(i,0);
else if (l_xstyle) l_f(i,0) += fx;
if (l_ystyle == ATOM) l_f(i,1) += qtmp * l_d_efield(i,1);
else if (l_ystyle) l_f(i,1) += fy;
if (l_zstyle == ATOM) l_f(i,2) += qtmp * l_d_efield(i,2);
else if (l_zstyle) l_f(i,2) += fz;
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
}
},fsum_kk);
}
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldNonConstant>(0,nlocal),*this,result);
copymode = 0;
}
atomKK->modified(execution_space, F_MASK);
fsum[0] = fsum_kk.d0;
fsum[1] = fsum_kk.d1;
fsum[2] = fsum_kk.d2;
fsum[3] = fsum_kk.d3;
Kokkos::atomic_store(&(d_fsum[0]),result[0]);
Kokkos::atomic_store(&(d_fsum[1]),result[1]);
Kokkos::atomic_store(&(d_fsum[2]),result[2]);
Kokkos::atomic_store(&(d_fsum[3]),result[3]);
k_fsum.template modify<DeviceType>();
k_fsum.template sync<LMPHostType>();
if (vflag_global) {
virial[0] += result[4];
virial[1] += result[5];
virial[2] += result[6];
virial[3] += result[7];
virial[4] += result[8];
virial[5] += result[9];
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const {
if (mask[i] & groupbit) {
void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldConstant, const int &i, value_type result) const {
if (d_mask(i) & groupbit) {
if (region && !d_match[i]) return;
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
Few<double,3> x_i;
x_i[0] = x(i,0);
x_i[1] = x(i,1);
x_i[2] = x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
const F_FLOAT qtmp = q(i);
const F_FLOAT fx = qtmp * ex;
const F_FLOAT fy = qtmp * ey;
const F_FLOAT fz = qtmp * ez;
f(i,0) += fx;
f(i,1) += fy;
f(i,2) += fz;
// TODO: access to unwrap below crashes
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
x_i[0] = d_x(i,0);
x_i[1] = d_x(i,1);
x_i[2] = d_x(i,2);
auto unwrapKK = DomainKokkos::unmap(domainKK->prd,domainKK->h,
domainKK->triclinic,x_i,d_image(i));
const F_FLOAT fx = d_q(i) * ex;
const F_FLOAT fy = d_q(i) * ey;
const F_FLOAT fz = d_q(i) * ez;
d_f(i,0) += fx;
d_f(i,1) += fy;
d_f(i,2) += fz;
// TODO: access to unwrap below crashes [ndtrung81 (2023/08)]
// tested, works on kokkos-omp [alphataubio (2024/08)]
// changed to unwrapKK to avoid possible clash with base class unwrap
// FIXME: test on kokkos-gpu
result[0] -= fx * unwrapKK[0] + fy * unwrapKK[1] + fz * unwrapKK[2];
result[1] += fx;
result[2] += fy;
result[3] += fz;
if (evflag) {
double v[6];
v[0] = fx * unwrapKK[0];
v[1] = fy * unwrapKK[1];
v[2] = fz * unwrapKK[2];
v[3] = fx * unwrapKK[1];
v[4] = fx * unwrapKK[2];
v[5] = fy * unwrapKK[2];
v_tally(result, i, v);
}
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const {
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
if (mask[i] & groupbit) {
void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldNonConstant, const int &i, value_type result) const {
if (d_mask(i) & groupbit) {
if (region && !d_match[i]) return;
Few<double,3> x_i;
x_i[0] = x(i,0);
x_i[1] = x(i,1);
x_i[2] = x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
const F_FLOAT qtmp = q[i];
const F_FLOAT fx = qtmp * ex;
const F_FLOAT fy = qtmp * ey;
const F_FLOAT fz = qtmp * ez;
if (xstyle == ATOM) f(i,0) += d_efield(i,0);
else if (xstyle) f(i,0) += fx;
if (ystyle == ATOM) f(i,1) += d_efield(i,1);
else if (ystyle) f(i,1) += fy;
if (zstyle == ATOM) f(i,2) += d_efield(i,2);
else if (zstyle) f(i,2) += fz;
// TODO: access to unwrap below crashes
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
F_FLOAT fx, fy, fz;
if (xstyle == ATOM) fx = qe2f * d_q(i) * d_efield(i,0);
else fx = d_q(i) * ex;
if (ystyle == ATOM) fy = qe2f * d_q(i) * d_efield(i,1);
else fy = d_q(i) * ey;
if (zstyle == ATOM) fz = qe2f * d_q(i) * d_efield(i,2);
else fz = d_q(i) * ez;
d_f(i,0) += fx;
d_f(i,1) += fy;
d_f(i,2) += fz;
result[1] += fx;
result[2] += fy;
result[3] += fz;
if (pstyle == ATOM) result[0] += qe2f * d_q(i) * d_efield(i,3);
else if (estyle == ATOM) result[0] += d_efield(i,3);
}
}
/* ----------------------------------------------------------------------
tally virial into global and per-atom accumulators
i = local index of atom
v = total virial for the interaction
increment global virial by v
increment per-atom virial by v
this method can be used when fix computes forces in post_force()
and the force depends on a distance to some external object
e.g. fix wall/lj93: compute virial only on owned atoms
------------------------------------------------------------------------- */
template <class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEfieldKokkos<DeviceType>::v_tally(value_type result, int i, double *v) const
{
if (vflag_global) {
result[4] += v[0];
result[5] += v[1];
result[6] += v[2];
result[7] += v[3];
result[8] += v[4];
result[9] += v[5];
}
if (vflag_atom) {
Kokkos::atomic_add(&(d_vatom(i,0)),v[0]);
Kokkos::atomic_add(&(d_vatom(i,1)),v[1]);
Kokkos::atomic_add(&(d_vatom(i,2)),v[2]);
Kokkos::atomic_add(&(d_vatom(i,3)),v[3]);
Kokkos::atomic_add(&(d_vatom(i,4)),v[4]);
Kokkos::atomic_add(&(d_vatom(i,5)),v[5]);
}
}
namespace LAMMPS_NS {
template class FixEfieldKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU

View File

@ -28,32 +28,14 @@ FixStyle(efield/kk/host,FixEfieldKokkos<LMPHostType>);
namespace LAMMPS_NS {
struct e_double_4 {
double d0, d1, d2, d3;
KOKKOS_INLINE_FUNCTION
e_double_4() {
d0 = d1 = d2 = d3 = 0.0;
}
KOKKOS_INLINE_FUNCTION
e_double_4& operator+=(const e_double_4 &rhs) {
d0 += rhs.d0;
d1 += rhs.d1;
d2 += rhs.d2;
d3 += rhs.d3;
return *this;
}
};
typedef e_double_4 double_4;
struct TagFixEfieldConstant{};
struct TagFixEfieldNonConstant{};
template<class DeviceType>
class FixEfieldKokkos : public FixEfield {
public:
typedef DeviceType device_type;
typedef double_4 value_type;
typedef double value_type[];
typedef ArrayTypes<DeviceType> AT;
FixEfieldKokkos(class LAMMPS *, int, char **);
@ -62,21 +44,34 @@ class FixEfieldKokkos : public FixEfield {
void post_force(int) override;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixEfieldConstant, const int&, double_4&) const;
void operator()(TagFixEfieldConstant, const int&, value_type) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixEfieldNonConstant, const int&, double_4&) const;
void operator()(TagFixEfieldNonConstant, const int&, value_type) const;
const int value_count = 10;
private:
class DomainKokkos *domainKK;
DAT::tdual_ffloat_2d k_efield;
typename AT::t_ffloat_2d_randomread d_efield;
typename AT::t_int_1d d_match;
typename AT::t_x_array_randomread x;
typename AT::t_float_1d_randomread q;
typename AT::t_f_array f;
typename AT::t_imageint_1d_randomread image;
typename AT::t_int_1d_randomread mask;
typename AT::t_x_array_randomread d_x;
typename AT::t_float_1d_randomread d_q;
typename AT::t_f_array d_f;
typename AT::t_imageint_1d_randomread d_image;
typename AT::t_int_1d_randomread d_mask;
DAT::tdual_virial_array k_vatom;
typename AT::t_virial_array d_vatom;
typename AT::tdual_ffloat_1d k_fsum;
typename AT::t_ffloat_1d d_fsum;
KOKKOS_INLINE_FUNCTION
void v_tally(value_type, int, double*) const;
};
}

View File

@ -56,6 +56,7 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
ilevel_respa = 0;
energy_global_flag = 1;
virial_global_flag = virial_peratom_flag = 1;
fsum = new double[4];
qe2f = force->qe2f;
xstyle = ystyle = zstyle = estyle = pstyle = NONE;
@ -311,7 +312,6 @@ void FixEfield::post_force(int vflag)
double **x = atom->x;
double fx, fy, fz;
double v[6], unwrap[3];
;
// constant efield
@ -508,4 +508,6 @@ void FixEfield::update_efield_variables()
else if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0);
modify->addstep_compute(update->ntimestep + 1);
}

View File

@ -59,7 +59,7 @@ class FixEfield : public Fix {
double **efield;
int force_flag;
double fsum[4], fsum_all[4];
double *fsum, fsum_all[4]; // need fsum double*, not double[] for kokkos dual view
void update_efield_variables();
};
} // namespace LAMMPS_NS