Files
lammps/src/KOKKOS/fix_shake_kokkos.cpp

2091 lines
66 KiB
C++

// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "fix_shake_kokkos.h"
#include "fix_rattle.h"
#include "atom_kokkos.h"
#include "atom_vec.h"
#include "molecule.h"
#include "update.h"
#include "respa.h"
#include "modify.h"
#include "domain.h"
#include "force.h"
#include "bond.h"
#include "angle.h"
#include "comm.h"
#include "group.h"
#include "fix_respa.h"
#include "math_const.h"
#include "memory_kokkos.h"
#include "error.h"
#include "kokkos.h"
#include "atom_masks.h"
#include <cmath>
#include <cstring>
using namespace LAMMPS_NS;
using namespace FixConst;
using namespace MathConst;
#define RVOUS 1 // 0 for irregular, 1 for all2all
#define BIG 1.0e20
#define MASSDELTA 0.1
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixShakeKokkos<DeviceType>::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) :
FixShake(lmp, narg, arg)
{
kokkosable = 1;
forward_comm_device = exchange_comm_device = sort_device = 1;
maxexchange = 9;
atomKK = (AtomKokkos *)atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
memory->destroy(xshake);
shake_flag_tmp = shake_flag;
shake_atom_tmp = shake_atom;
shake_type_tmp = shake_type;
shake_flag = nullptr;
shake_atom = nullptr;
shake_type = nullptr;
int nmax = atom->nmax;
grow_arrays(nmax);
for (int i = 0; i < atom->nlocal; i++) {
k_shake_flag.h_view[i] = shake_flag_tmp[i];
k_shake_atom.h_view(i,0) = shake_atom_tmp[i][0];
k_shake_atom.h_view(i,1) = shake_atom_tmp[i][1];
k_shake_atom.h_view(i,2) = shake_atom_tmp[i][2];
k_shake_atom.h_view(i,3) = shake_atom_tmp[i][3];
k_shake_type.h_view(i,0) = shake_type_tmp[i][0];
k_shake_type.h_view(i,1) = shake_type_tmp[i][1];
k_shake_type.h_view(i,2) = shake_type_tmp[i][2];
}
k_shake_flag.modify_host();
k_shake_atom.modify_host();
k_shake_type.modify_host();
k_bond_distance = DAT::tdual_float_1d("fix_shake:bond_distance",atom->nbondtypes+1);
k_angle_distance = DAT::tdual_float_1d("fix_shake:angle_distance",atom->nangletypes+1);
d_bond_distance = k_bond_distance.view<DeviceType>();
d_angle_distance = k_angle_distance.view<DeviceType>();
// use 1D view for scalars to reduce GPU memory operations
d_scalars = typename AT::t_int_1d("neighbor:scalars",2);
h_scalars = HAT::t_int_1d("neighbor:scalars_mirror",2);
d_error_flag = Kokkos::subview(d_scalars,0);
d_nlist = Kokkos::subview(d_scalars,1);
h_error_flag = Kokkos::subview(h_scalars,0);
h_nlist = Kokkos::subview(h_scalars,1);
d_count = typename AT::t_int_scalar("fix_shake:count");
h_count = Kokkos::create_mirror_view(d_count);
memory->destroy(shake_flag_tmp);
memory->destroy(shake_atom_tmp);
memory->destroy(shake_type_tmp);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixShakeKokkos<DeviceType>::~FixShakeKokkos()
{
if (copymode) return;
k_shake_flag.sync_host();
k_shake_atom.sync_host();
for (int i = 0; i < nlocal; i++) {
if (shake_flag[i] == 0) continue;
else if (shake_flag[i] == 1) {
bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1);
bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1);
angletype_findset(i,shake_atom[i][1],shake_atom[i][2],1);
} else if (shake_flag[i] == 2) {
bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1);
} else if (shake_flag[i] == 3) {
bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1);
bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1);
} else if (shake_flag[i] == 4) {
bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1);
bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1);
bondtype_findset(i,shake_atom[i][0],shake_atom[i][3],1);
}
}
memoryKK->destroy_kokkos(k_shake_flag,shake_flag);
memoryKK->destroy_kokkos(k_shake_atom,shake_atom);
memoryKK->destroy_kokkos(k_shake_type,shake_type);
memoryKK->destroy_kokkos(k_xshake,xshake);
memoryKK->destroy_kokkos(k_list,list);
memoryKK->destroy_kokkos(k_closest_list,closest_list);
memoryKK->destroy_kokkos(k_vatom,vatom);
}
/* ----------------------------------------------------------------------
set bond and angle distances
this init must happen after force->bond and force->angle inits
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::init()
{
FixShake::init();
if (utils::strmatch(update->integrate_style,"^respa"))
error->all(FLERR,"Cannot yet use respa with Kokkos");
if (rattle)
error->all(FLERR,"Cannot yet use KOKKOS package with fix rattle");
// set equilibrium bond distances
for (int i = 1; i <= atom->nbondtypes; i++)
k_bond_distance.h_view[i] = bond_distance[i];
// set equilibrium angle distances
for (int i = 1; i <= atom->nangletypes; i++)
k_angle_distance.h_view[i] = angle_distance[i];
k_bond_distance.modify_host();
k_angle_distance.modify_host();
k_bond_distance.sync<DeviceType>();
k_angle_distance.sync<DeviceType>();
}
/* ----------------------------------------------------------------------
run setup for minimization.
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::min_setup(int /*vflag*/)
{
error->all(FLERR, "Cannot yet use fix {} during minimization with Kokkos", style);
}
/* ----------------------------------------------------------------------
build list of SHAKE clusters to constrain
if one or more atoms in cluster are on this proc,
this proc lists the cluster exactly once
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::pre_neighbor()
{
// local copies of atom quantities
// used by SHAKE until next re-neighboring
atomKK->sync(execution_space,X_MASK);
ebond = 0.0;
d_x = atomKK->k_x.view<DeviceType>();
nlocal = atom->nlocal;
map_style = atom->map_style;
if (map_style == Atom::MAP_ARRAY) {
k_map_array = atomKK->k_map_array;
k_map_array.template sync<DeviceType>();
} else if (map_style == Atom::MAP_HASH) {
k_map_hash = atomKK->k_map_hash;
k_map_hash.template sync<DeviceType>();
}
k_shake_flag.sync<DeviceType>();
k_shake_atom.sync<DeviceType>();
// extend size of SHAKE list if necessary
if (nlocal > maxlist) {
maxlist = atom->nmax;
memoryKK->destroy_kokkos(k_list,list);
memoryKK->create_kokkos(k_list,list,maxlist,"shake:list");
d_list = k_list.view<DeviceType>();
memoryKK->destroy_kokkos(k_closest_list,closest_list);
memoryKK->create_kokkos(k_closest_list,closest_list,maxlist,4,"shake:closest_list");
d_closest_list = k_closest_list.view<DeviceType>();
}
// Atom Map
map_style = atom->map_style;
if (map_style == Atom::MAP_ARRAY) {
k_map_array = atomKK->k_map_array;
k_map_array.template sync<DeviceType>();
} else if (map_style == Atom::MAP_HASH) {
k_map_hash = atomKK->k_map_hash;
k_map_hash.template sync<DeviceType>();
}
atomKK->k_sametag.sync<DeviceType>();
d_sametag = atomKK->k_sametag.view<DeviceType>();
// build list of SHAKE clusters I compute
Kokkos::deep_copy(d_scalars,0);
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixShakePreNeighbor>(0,nlocal),*this);
copymode = 0;
k_list.modify<DeviceType>();
k_closest_list.modify<DeviceType>();
Kokkos::deep_copy(h_scalars,d_scalars);
nlist = h_nlist();
if (h_error_flag() == 1) {
error->one(FLERR,"Shake atoms missing on proc "
"{} at step {}",comm->me,update->ntimestep);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::operator()(TagFixShakePreNeighbor, const int &i) const {
if (d_shake_flag[i]) {
if (d_shake_flag[i] == 2) {
int atom1 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,0),map_style,k_map_array,k_map_hash);
int atom2 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,1),map_style,k_map_array,k_map_hash);
if (atom1 == -1 || atom2 == -1) {
d_error_flag() = 1;
}
atom1 = closest_image(i, atom1);
atom2 = closest_image(i, atom2);
if (i <= atom1 && i <= atom2) {
const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1);
d_list[nlist] = i;
d_closest_list(nlist,0) = atom1;
d_closest_list(nlist,1) = atom2;
}
} else if (d_shake_flag[i] % 2 == 1) {
int atom1 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,0),map_style,k_map_array,k_map_hash);
int atom2 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,1),map_style,k_map_array,k_map_hash);
int atom3 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,2),map_style,k_map_array,k_map_hash);
if (atom1 == -1 || atom2 == -1 || atom3 == -1)
d_error_flag() = 1;
atom1 = closest_image(i, atom1);
atom2 = closest_image(i, atom2);
atom3 = closest_image(i, atom3);
if (i <= atom1 && i <= atom2 && i <= atom3) {
const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1);
d_list[nlist] = i;
d_closest_list(nlist,0) = atom1;
d_closest_list(nlist,1) = atom2;
d_closest_list(nlist,2) = atom3;
}
} else {
int atom1 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,0),map_style,k_map_array,k_map_hash);
int atom2 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,1),map_style,k_map_array,k_map_hash);
int atom3 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,2),map_style,k_map_array,k_map_hash);
int atom4 = AtomKokkos::map_kokkos<DeviceType>(d_shake_atom(i,3),map_style,k_map_array,k_map_hash);
if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1)
d_error_flag() = 1;
atom1 = closest_image(i, atom1);
atom2 = closest_image(i, atom2);
atom3 = closest_image(i, atom3);
atom4 = closest_image(i, atom4);
if (i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4) {
const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1);
d_list[nlist] = i;
d_closest_list(nlist,0) = atom1;
d_closest_list(nlist,1) = atom2;
d_closest_list(nlist,2) = atom3;
d_closest_list(nlist,3) = atom4;
}
}
}
}
/* ----------------------------------------------------------------------
compute the force adjustment for SHAKE constraint
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::post_force(int vflag)
{
ebond = 0.0;
copymode = 1;
d_x = atomKK->k_x.view<DeviceType>();
d_f = atomKK->k_f.view<DeviceType>();
d_type = atomKK->k_type.view<DeviceType>();
d_rmass = atomKK->k_rmass.view<DeviceType>();
d_mass = atomKK->k_mass.view<DeviceType>();
nlocal = atomKK->nlocal;
map_style = atom->map_style;
if (map_style == Atom::MAP_ARRAY) {
k_map_array = atomKK->k_map_array;
k_map_array.template sync<DeviceType>();
} else if (map_style == Atom::MAP_HASH) {
k_map_hash = atomKK->k_map_hash;
k_map_hash.template sync<DeviceType>();
}
if (d_rmass.data())
atomKK->sync(execution_space,X_MASK|F_MASK|RMASS_MASK);
else
atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK);
k_shake_flag.sync<DeviceType>();
k_shake_type.sync<DeviceType>();
k_list.sync<DeviceType>();
k_closest_list.sync<DeviceType>();
if (update->ntimestep == next_output)
stats();
// xshake = unconstrained move with current v,f
// communicate results if necessary
unconstrained_update();
comm->forward_comm(this);
k_xshake.sync<DeviceType>();
// virial setup
v_init(vflag);
// reallocate per-atom arrays if necessary
if (vflag_atom) {
memoryKK->destroy_kokkos(k_vatom,vatom);
memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom");
d_vatom = k_vatom.template view<KKDeviceType>();
}
neighflag = lmp->kokkos->neighflag;
// FULL neighlist still needs atomics in fix shake
if (neighflag == FULL) {
if (lmp->kokkos->nthreads > 1 || lmp->kokkos->ngpus > 0)
neighflag = HALFTHREAD;
else
neighflag = HALF;
}
need_dup = 0;
if (neighflag != HALF)
need_dup = std::is_same_v<NeedDup_v<HALFTHREAD,DeviceType>,Kokkos::Experimental::ScatterDuplicated>;
// allocate duplicated memory
if (need_dup) {
dup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_f);
dup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
} else {
ndup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_f);
ndup_vatom = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
}
Kokkos::deep_copy(d_error_flag,0);
EV_FLOAT ev;
// loop over clusters to add constraint forces
if (neighflag == HALF) {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixShakePostForce<HALF,1> >(0,nlist),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixShakePostForce<HALF,0> >(0,nlist),*this);
} else {
if (evflag)
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixShakePostForce<HALFTHREAD,1> >(0,nlist),*this,ev);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixShakePostForce<HALFTHREAD,0> >(0,nlist),*this);
}
copymode = 0;
Kokkos::deep_copy(h_error_flag,d_error_flag);
if (h_error_flag() == 2)
error->warning(FLERR,"Shake determinant < 0.0");
else if (h_error_flag() == 3)
error->one(FLERR,"Shake determinant = 0.0");
// store vflag for coordinate_constraints_end_of_step()
vflag_post_force = vflag;
// reduction over duplicated memory
if (need_dup)
Kokkos::Experimental::contribute(d_f,dup_f);
atomKK->modified(execution_space,F_MASK);
if (vflag_global) {
virial[0] += ev.v[0];
virial[1] += ev.v[1];
virial[2] += ev.v[2];
virial[3] += ev.v[3];
virial[4] += ev.v[4];
virial[5] += ev.v[5];
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
// free duplicated memory
if (need_dup) {
dup_f = decltype(dup_f)();
dup_vatom = decltype(dup_vatom)();
}
}
/* ----------------------------------------------------------------------
substitute shake constraints with very strong bonds
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::min_post_force(int vflag)
{
// not yet ported to Kokkos
atomKK->sync(Host,X_MASK | F_MASK);
k_shake_flag.sync_host();
k_shake_type.sync_host();
k_list.sync_host();
k_closest_list.sync_host();
FixShake::min_post_force(vflag);
atomKK->modified(Host,F_MASK);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::operator()(TagFixShakePostForce<NEIGHFLAG,EVFLAG>, const int &i, EV_FLOAT& ev) const {
const int m = d_list[i];
if (d_shake_flag[m] == 2) shake<NEIGHFLAG,EVFLAG>(i,ev);
else if (d_shake_flag[m] == 3) shake3<NEIGHFLAG,EVFLAG>(i,ev);
else if (d_shake_flag[m] == 4) shake4<NEIGHFLAG,EVFLAG>(i,ev);
else shake3angle<NEIGHFLAG,EVFLAG>(i,ev);
}
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::operator()(TagFixShakePostForce<NEIGHFLAG,EVFLAG>, const int &i) const {
EV_FLOAT ev;
this->template operator()<NEIGHFLAG,EVFLAG>(TagFixShakePostForce<NEIGHFLAG,EVFLAG>(), i, ev);
}
/* ----------------------------------------------------------------------
count # of degrees-of-freedom removed by SHAKE for atoms in igroup
------------------------------------------------------------------------- */
template<class DeviceType>
int FixShakeKokkos<DeviceType>::dof(int igroup)
{
d_mask = atomKK->k_mask.view<DeviceType>();
d_tag = atomKK->k_tag.view<DeviceType>();
nlocal = atom->nlocal;
atomKK->sync(execution_space,MASK_MASK|TAG_MASK);
k_shake_flag.sync<DeviceType>();
k_shake_atom.sync<DeviceType>();
// count dof in a cluster if and only if
// the central atom is in group and atom i is the central atom
int n = 0;
{
// local variables for lambda capture
auto d_shake_flag = this->d_shake_flag;
auto d_shake_atom = this->d_shake_atom;
auto tag = this->d_tag;
auto mask = this->d_mask;
auto groupbit = group->bitmask[igroup];
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType>(0,nlocal),
LAMMPS_LAMBDA(const int& i, int& n) {
if (!(mask[i] & groupbit)) return;
if (d_shake_flag[i] == 0) return;
if (d_shake_atom(i,0) != tag[i]) return;
if (d_shake_flag[i] == 1) n += 3;
else if (d_shake_flag[i] == 2) n += 1;
else if (d_shake_flag[i] == 3) n += 2;
else if (d_shake_flag[i] == 4) n += 3;
},n);
}
int nall;
MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world);
return nall;
}
/* ----------------------------------------------------------------------
assumes NVE update, seems to be accurate enough for NVT,NPT,NPH as well
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::unconstrained_update()
{
d_x = atomKK->k_x.view<DeviceType>();
d_v = atomKK->k_v.view<DeviceType>();
d_f = atomKK->k_f.view<DeviceType>();
d_type = atomKK->k_type.view<DeviceType>();
d_rmass = atomKK->k_rmass.view<DeviceType>();
d_mass = atomKK->k_mass.view<DeviceType>();
nlocal = atom->nlocal;
if (d_rmass.data())
atomKK->sync(execution_space,X_MASK|V_MASK|F_MASK|RMASS_MASK);
else
atomKK->sync(execution_space,X_MASK|V_MASK|F_MASK|TYPE_MASK);
k_shake_flag.sync<DeviceType>();
k_xshake.sync<DeviceType>();
{
// local variables for lambda capture
auto d_shake_flag = this->d_shake_flag;
auto d_xshake = this->d_xshake;
auto x = this->d_x;
auto v = this->d_v;
auto f = this->d_f;
auto dtfsq = this->dtfsq;
auto dtv = this->dtv;
if (d_rmass.data()) {
auto rmass = this->d_rmass;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal),
LAMMPS_LAMBDA(const int& i) {
if (d_shake_flag[i]) {
const double dtfmsq = dtfsq / rmass[i];
d_xshake(i,0) = x(i,0) + dtv*v(i,0) + dtfmsq*f(i,0);
d_xshake(i,1) = x(i,1) + dtv*v(i,1) + dtfmsq*f(i,1);
d_xshake(i,2) = x(i,2) + dtv*v(i,2) + dtfmsq*f(i,2);
} else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0.0;
});
} else {
auto mass = this->d_mass;
auto type = this->d_type;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType>(0,nlocal),
LAMMPS_LAMBDA(const int& i) {
if (d_shake_flag[i]) {
const double dtfmsq = dtfsq / mass[type[i]];
d_xshake(i,0) = x(i,0) + dtv*v(i,0) + dtfmsq*f(i,0);
d_xshake(i,1) = x(i,1) + dtv*v(i,1) + dtfmsq*f(i,1);
d_xshake(i,2) = x(i,2) + dtv*v(i,2) + dtfmsq*f(i,2);
} else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0.0;
});
}
}
k_xshake.modify<DeviceType>();
}
/* ----------------------------------------------------------------------
calculate SHAKE constraint forces for size 2 cluster = single bond
------------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::shake(int ilist, EV_FLOAT& ev) const
{
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
auto v_f = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
auto a_f = v_f.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
int atomlist[2];
double v[6];
double invmass0,invmass1;
// local atom IDs and constraint distances
int m = d_list[ilist];
int i0 = d_closest_list(ilist,0);
int i1 = d_closest_list(ilist,1);
double bond1 = d_bond_distance[d_shake_type(m,0)];
// r01 = distance vec between atoms
double r01[3];
r01[0] = d_x(i0,0) - d_x(i1,0);
r01[1] = d_x(i0,1) - d_x(i1,1);
r01[2] = d_x(i0,2) - d_x(i1,2);
// s01 = distance vec after unconstrained update
double s01[3];
s01[0] = d_xshake(i0,0) - d_xshake(i1,0);
s01[1] = d_xshake(i0,1) - d_xshake(i1,1);
s01[2] = d_xshake(i0,2) - d_xshake(i1,2);
// scalar distances between atoms
double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2];
double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2];
// a,b,c = coeffs in quadratic equation for lamda
if (d_rmass.data()) {
invmass0 = 1.0/d_rmass[i0];
invmass1 = 1.0/d_rmass[i1];
} else {
invmass0 = 1.0/d_mass[d_type[i0]];
invmass1 = 1.0/d_mass[d_type[i1]];
}
double a = (invmass0+invmass1)*(invmass0+invmass1) * r01sq;
double b = 2.0 * (invmass0+invmass1) *
(s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]);
double c = s01sq - bond1*bond1;
// error check
double determ = b*b - 4.0*a*c;
if (determ < 0.0) {
//error->warning(FLERR,"Shake determinant < 0.0",0);
d_error_flag() = 2;
determ = 0.0;
}
// exact quadratic solution for lamda
double lamda,lamda1,lamda2;
lamda1 = (-b+sqrt(determ)) / (2.0*a);
lamda2 = (-b-sqrt(determ)) / (2.0*a);
if (fabs(lamda1) <= fabs(lamda2)) lamda = lamda1;
else lamda = lamda2;
// update forces if atom is owned by this processor
lamda /= dtfsq;
if (i0 < nlocal) {
a_f(i0,0) += lamda*r01[0];
a_f(i0,1) += lamda*r01[1];
a_f(i0,2) += lamda*r01[2];
}
if (i1 < nlocal) {
a_f(i1,0) -= lamda*r01[0];
a_f(i1,1) -= lamda*r01[1];
a_f(i1,2) -= lamda*r01[2];
}
if (EVFLAG) {
int count = 0;
if (i0 < nlocal) atomlist[count++] = i0;
if (i1 < nlocal) atomlist[count++] = i1;
v[0] = lamda*r01[0]*r01[0];
v[1] = lamda*r01[1]*r01[1];
v[2] = lamda*r01[2]*r01[2];
v[3] = lamda*r01[0]*r01[1];
v[4] = lamda*r01[0]*r01[2];
v[5] = lamda*r01[1]*r01[2];
v_tally<NEIGHFLAG>(ev,count,atomlist,2.0,v);
}
}
/* ----------------------------------------------------------------------
calculate SHAKE constraint forces for size 3 cluster = two bonds
------------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::shake3(int ilist, EV_FLOAT& ev) const
{
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
auto v_f = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
auto a_f = v_f.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
int atomlist[3];
double v[6];
double invmass0,invmass1,invmass2;
// local atom IDs and constraint distances
int m = d_list[ilist];
int i0 = d_closest_list(ilist,0);
int i1 = d_closest_list(ilist,1);
int i2 = d_closest_list(ilist,2);
double bond1 = d_bond_distance[d_shake_type(m,0)];
double bond2 = d_bond_distance[d_shake_type(m,1)];
// r01,r02 = distance vec between atoms
double r01[3];
r01[0] = d_x(i0,0) - d_x(i1,0);
r01[1] = d_x(i0,1) - d_x(i1,1);
r01[2] = d_x(i0,2) - d_x(i1,2);
double r02[3];
r02[0] = d_x(i0,0) - d_x(i2,0);
r02[1] = d_x(i0,1) - d_x(i2,1);
r02[2] = d_x(i0,2) - d_x(i2,2);
// s01,s02 = distance vec after unconstrained update
double s01[3];
s01[0] = d_xshake(i0,0) - d_xshake(i1,0);
s01[1] = d_xshake(i0,1) - d_xshake(i1,1);
s01[2] = d_xshake(i0,2) - d_xshake(i1,2);
double s02[3];
s02[0] = d_xshake(i0,0) - d_xshake(i2,0);
s02[1] = d_xshake(i0,1) - d_xshake(i2,1);
s02[2] = d_xshake(i0,2) - d_xshake(i2,2);
// scalar distances between atoms
double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2];
double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2];
double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2];
double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2];
// matrix coeffs and rhs for lamda equations
if (d_rmass.data()) {
invmass0 = 1.0/d_rmass[i0];
invmass1 = 1.0/d_rmass[i1];
invmass2 = 1.0/d_rmass[i2];
} else {
invmass0 = 1.0/d_mass[d_type[i0]];
invmass1 = 1.0/d_mass[d_type[i1]];
invmass2 = 1.0/d_mass[d_type[i2]];
}
double a11 = 2.0 * (invmass0+invmass1) *
(s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]);
double a12 = 2.0 * invmass0 *
(s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]);
double a21 = 2.0 * invmass0 *
(s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]);
double a22 = 2.0 * (invmass0+invmass2) *
(s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]);
// inverse of matrix
double determ = a11*a22 - a12*a21;
if (determ == 0.0) d_error_flag() = 3;
//error->one(FLERR,"Shake determinant = 0.0");
double determinv = 1.0/determ;
double a11inv = a22*determinv;
double a12inv = -a12*determinv;
double a21inv = -a21*determinv;
double a22inv = a11*determinv;
// quadratic correction coeffs
double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]);
double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq;
double quad1_0202 = invmass0*invmass0 * r02sq;
double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102;
double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq;
double quad2_0101 = invmass0*invmass0 * r01sq;
double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102;
// iterate until converged
double lamda01 = 0.0;
double lamda02 = 0.0;
int niter = 0;
int done = 0;
double quad1,quad2,b1,b2,lamda01_new,lamda02_new;
while (!done && niter < max_iter) {
quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 +
quad1_0102 * lamda01*lamda02;
quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 +
quad2_0102 * lamda01*lamda02;
b1 = bond1*bond1 - s01sq - quad1;
b2 = bond2*bond2 - s02sq - quad2;
lamda01_new = a11inv*b1 + a12inv*b2;
lamda02_new = a21inv*b1 + a22inv*b2;
done = 1;
if (fabs(lamda01_new-lamda01) > tolerance) done = 0;
if (fabs(lamda02_new-lamda02) > tolerance) done = 0;
lamda01 = lamda01_new;
lamda02 = lamda02_new;
// stop iterations before we have a floating point overflow
// max double is < 1.0e308, so 1e150 is a reasonable cutoff
if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150) done = 1;
niter++;
}
// update forces if atom is owned by this processor
lamda01 = lamda01/dtfsq;
lamda02 = lamda02/dtfsq;
if (i0 < nlocal) {
a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0];
a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1];
a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2];
}
if (i1 < nlocal) {
a_f(i1,0) -= lamda01*r01[0];
a_f(i1,1) -= lamda01*r01[1];
a_f(i1,2) -= lamda01*r01[2];
}
if (i2 < nlocal) {
a_f(i2,0) -= lamda02*r02[0];
a_f(i2,1) -= lamda02*r02[1];
a_f(i2,2) -= lamda02*r02[2];
}
if (EVFLAG) {
int count = 0;
if (i0 < nlocal) atomlist[count++] = i0;
if (i1 < nlocal) atomlist[count++] = i1;
if (i2 < nlocal) atomlist[count++] = i2;
v[0] = lamda01*r01[0]*r01[0] + lamda02*r02[0]*r02[0];
v[1] = lamda01*r01[1]*r01[1] + lamda02*r02[1]*r02[1];
v[2] = lamda01*r01[2]*r01[2] + lamda02*r02[2]*r02[2];
v[3] = lamda01*r01[0]*r01[1] + lamda02*r02[0]*r02[1];
v[4] = lamda01*r01[0]*r01[2] + lamda02*r02[0]*r02[2];
v[5] = lamda01*r01[1]*r01[2] + lamda02*r02[1]*r02[2];
v_tally<NEIGHFLAG>(ev,count,atomlist,3.0,v);
}
}
/* ----------------------------------------------------------------------
calculate SHAKE constraint forces for size 4 cluster = three bonds
------------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::shake4(int ilist, EV_FLOAT& ev) const
{
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
auto v_f = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
auto a_f = v_f.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
int atomlist[4];
double v[6];
double invmass0,invmass1,invmass2,invmass3;
// local atom IDs and constraint distances
int m = d_list[ilist];
int i0 = d_closest_list(ilist,0);
int i1 = d_closest_list(ilist,1);
int i2 = d_closest_list(ilist,2);
int i3 = d_closest_list(ilist,3);
double bond1 = d_bond_distance[d_shake_type(m,0)];
double bond2 = d_bond_distance[d_shake_type(m,1)];
double bond3 = d_bond_distance[d_shake_type(m,2)];
// r01,r02,r03 = distance vec between atoms
double r01[3];
r01[0] = d_x(i0,0) - d_x(i1,0);
r01[1] = d_x(i0,1) - d_x(i1,1);
r01[2] = d_x(i0,2) - d_x(i1,2);
double r02[3];
r02[0] = d_x(i0,0) - d_x(i2,0);
r02[1] = d_x(i0,1) - d_x(i2,1);
r02[2] = d_x(i0,2) - d_x(i2,2);
double r03[3];
r03[0] = d_x(i0,0) - d_x(i3,0);
r03[1] = d_x(i0,1) - d_x(i3,1);
r03[2] = d_x(i0,2) - d_x(i3,2);
// s01,s02,s03 = distance vec after unconstrained update
double s01[3];
s01[0] = d_xshake(i0,0) - d_xshake(i1,0);
s01[1] = d_xshake(i0,1) - d_xshake(i1,1);
s01[2] = d_xshake(i0,2) - d_xshake(i1,2);
double s02[3];
s02[0] = d_xshake(i0,0) - d_xshake(i2,0);
s02[1] = d_xshake(i0,1) - d_xshake(i2,1);
s02[2] = d_xshake(i0,2) - d_xshake(i2,2);
double s03[3];
s03[0] = d_xshake(i0,0) - d_xshake(i3,0);
s03[1] = d_xshake(i0,1) - d_xshake(i3,1);
s03[2] = d_xshake(i0,2) - d_xshake(i3,2);
// scalar distances between atoms
double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2];
double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2];
double r03sq = r03[0]*r03[0] + r03[1]*r03[1] + r03[2]*r03[2];
double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2];
double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2];
double s03sq = s03[0]*s03[0] + s03[1]*s03[1] + s03[2]*s03[2];
// matrix coeffs and rhs for lamda equations
if (d_rmass.data()) {
invmass0 = 1.0/d_rmass[i0];
invmass1 = 1.0/d_rmass[i1];
invmass2 = 1.0/d_rmass[i2];
invmass3 = 1.0/d_rmass[i3];
} else {
invmass0 = 1.0/d_mass[d_type[i0]];
invmass1 = 1.0/d_mass[d_type[i1]];
invmass2 = 1.0/d_mass[d_type[i2]];
invmass3 = 1.0/d_mass[d_type[i3]];
}
double a11 = 2.0 * (invmass0+invmass1) *
(s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]);
double a12 = 2.0 * invmass0 *
(s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]);
double a13 = 2.0 * invmass0 *
(s01[0]*r03[0] + s01[1]*r03[1] + s01[2]*r03[2]);
double a21 = 2.0 * invmass0 *
(s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]);
double a22 = 2.0 * (invmass0+invmass2) *
(s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]);
double a23 = 2.0 * invmass0 *
(s02[0]*r03[0] + s02[1]*r03[1] + s02[2]*r03[2]);
double a31 = 2.0 * invmass0 *
(s03[0]*r01[0] + s03[1]*r01[1] + s03[2]*r01[2]);
double a32 = 2.0 * invmass0 *
(s03[0]*r02[0] + s03[1]*r02[1] + s03[2]*r02[2]);
double a33 = 2.0 * (invmass0+invmass3) *
(s03[0]*r03[0] + s03[1]*r03[1] + s03[2]*r03[2]);
// inverse of matrix;
double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 -
a11*a23*a32 - a12*a21*a33 - a13*a22*a31;
if (determ == 0.0) d_error_flag() = 3;
//error->one(FLERR,"Shake determinant = 0.0");
double determinv = 1.0/determ;
double a11inv = determinv * (a22*a33 - a23*a32);
double a12inv = -determinv * (a12*a33 - a13*a32);
double a13inv = determinv * (a12*a23 - a13*a22);
double a21inv = -determinv * (a21*a33 - a23*a31);
double a22inv = determinv * (a11*a33 - a13*a31);
double a23inv = -determinv * (a11*a23 - a13*a21);
double a31inv = determinv * (a21*a32 - a22*a31);
double a32inv = -determinv * (a11*a32 - a12*a31);
double a33inv = determinv * (a11*a22 - a12*a21);
// quadratic correction coeffs
double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]);
double r0103 = (r01[0]*r03[0] + r01[1]*r03[1] + r01[2]*r03[2]);
double r0203 = (r02[0]*r03[0] + r02[1]*r03[1] + r02[2]*r03[2]);
double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq;
double quad1_0202 = invmass0*invmass0 * r02sq;
double quad1_0303 = invmass0*invmass0 * r03sq;
double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102;
double quad1_0103 = 2.0 * (invmass0+invmass1)*invmass0 * r0103;
double quad1_0203 = 2.0 * invmass0*invmass0 * r0203;
double quad2_0101 = invmass0*invmass0 * r01sq;
double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq;
double quad2_0303 = invmass0*invmass0 * r03sq;
double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102;
double quad2_0103 = 2.0 * invmass0*invmass0 * r0103;
double quad2_0203 = 2.0 * (invmass0+invmass2)*invmass0 * r0203;
double quad3_0101 = invmass0*invmass0 * r01sq;
double quad3_0202 = invmass0*invmass0 * r02sq;
double quad3_0303 = (invmass0+invmass3)*(invmass0+invmass3) * r03sq;
double quad3_0102 = 2.0 * invmass0*invmass0 * r0102;
double quad3_0103 = 2.0 * (invmass0+invmass3)*invmass0 * r0103;
double quad3_0203 = 2.0 * (invmass0+invmass3)*invmass0 * r0203;
// iterate until converged
double lamda01 = 0.0;
double lamda02 = 0.0;
double lamda03 = 0.0;
int niter = 0;
int done = 0;
double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda03_new;
while (!done && niter < max_iter) {
quad1 = quad1_0101 * lamda01*lamda01 +
quad1_0202 * lamda02*lamda02 +
quad1_0303 * lamda03*lamda03 +
quad1_0102 * lamda01*lamda02 +
quad1_0103 * lamda01*lamda03 +
quad1_0203 * lamda02*lamda03;
quad2 = quad2_0101 * lamda01*lamda01 +
quad2_0202 * lamda02*lamda02 +
quad2_0303 * lamda03*lamda03 +
quad2_0102 * lamda01*lamda02 +
quad2_0103 * lamda01*lamda03 +
quad2_0203 * lamda02*lamda03;
quad3 = quad3_0101 * lamda01*lamda01 +
quad3_0202 * lamda02*lamda02 +
quad3_0303 * lamda03*lamda03 +
quad3_0102 * lamda01*lamda02 +
quad3_0103 * lamda01*lamda03 +
quad3_0203 * lamda02*lamda03;
b1 = bond1*bond1 - s01sq - quad1;
b2 = bond2*bond2 - s02sq - quad2;
b3 = bond3*bond3 - s03sq - quad3;
lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3;
lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3;
lamda03_new = a31inv*b1 + a32inv*b2 + a33inv*b3;
done = 1;
if (fabs(lamda01_new-lamda01) > tolerance) done = 0;
if (fabs(lamda02_new-lamda02) > tolerance) done = 0;
if (fabs(lamda03_new-lamda03) > tolerance) done = 0;
lamda01 = lamda01_new;
lamda02 = lamda02_new;
lamda03 = lamda03_new;
// stop iterations before we have a floating point overflow
// max double is < 1.0e308, so 1e150 is a reasonable cutoff
if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150
|| fabs(lamda03) > 1e150) done = 1;
niter++;
}
// update forces if atom is owned by this processor
lamda01 = lamda01/dtfsq;
lamda02 = lamda02/dtfsq;
lamda03 = lamda03/dtfsq;
if (i0 < nlocal) {
a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0] + lamda03*r03[0];
a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1] + lamda03*r03[1];
a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2] + lamda03*r03[2];
}
if (i1 < nlocal) {
a_f(i1,0) -= lamda01*r01[0];
a_f(i1,1) -= lamda01*r01[1];
a_f(i1,2) -= lamda01*r01[2];
}
if (i2 < nlocal) {
a_f(i2,0) -= lamda02*r02[0];
a_f(i2,1) -= lamda02*r02[1];
a_f(i2,2) -= lamda02*r02[2];
}
if (i3 < nlocal) {
a_f(i3,0) -= lamda03*r03[0];
a_f(i3,1) -= lamda03*r03[1];
a_f(i3,2) -= lamda03*r03[2];
}
if (EVFLAG) {
int count = 0;
if (i0 < nlocal) atomlist[count++] = i0;
if (i1 < nlocal) atomlist[count++] = i1;
if (i2 < nlocal) atomlist[count++] = i2;
if (i3 < nlocal) atomlist[count++] = i3;
v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda03*r03[0]*r03[0];
v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda03*r03[1]*r03[1];
v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda03*r03[2]*r03[2];
v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda03*r03[0]*r03[1];
v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda03*r03[0]*r03[2];
v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda03*r03[1]*r03[2];
v_tally<NEIGHFLAG>(ev,count,atomlist,4.0,v);
}
}
/* ----------------------------------------------------------------------
calculate SHAKE constraint forces for size 3 cluster = two bonds + angle
------------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::shake3angle(int ilist, EV_FLOAT& ev) const
{
// The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial
auto v_f = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f);
auto a_f = v_f.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
int atomlist[3];
double v[6];
double invmass0,invmass1,invmass2;
// local atom IDs and constraint distances
int m = d_list[ilist];
int i0 = d_closest_list(ilist,0);
int i1 = d_closest_list(ilist,1);
int i2 = d_closest_list(ilist,2);
double bond1 = d_bond_distance[d_shake_type(m,0)];
double bond2 = d_bond_distance[d_shake_type(m,1)];
double bond12 = d_angle_distance[d_shake_type(m,2)];
// r01,r02,r12 = distance vec between atoms
double r01[3];
r01[0] = d_x(i0,0) - d_x(i1,0);
r01[1] = d_x(i0,1) - d_x(i1,1);
r01[2] = d_x(i0,2) - d_x(i1,2);
double r02[3];
r02[0] = d_x(i0,0) - d_x(i2,0);
r02[1] = d_x(i0,1) - d_x(i2,1);
r02[2] = d_x(i0,2) - d_x(i2,2);
double r12[3];
r12[0] = d_x(i1,0) - d_x(i2,0);
r12[1] = d_x(i1,1) - d_x(i2,1);
r12[2] = d_x(i1,2) - d_x(i2,2);
// s01,s02,s12 = distance vec after unconstrained update
double s01[3];
s01[0] = d_xshake(i0,0) - d_xshake(i1,0);
s01[1] = d_xshake(i0,1) - d_xshake(i1,1);
s01[2] = d_xshake(i0,2) - d_xshake(i1,2);
double s02[3];
s02[0] = d_xshake(i0,0) - d_xshake(i2,0);
s02[1] = d_xshake(i0,1) - d_xshake(i2,1);
s02[2] = d_xshake(i0,2) - d_xshake(i2,2);
double s12[3];
s12[0] = d_xshake(i1,0) - d_xshake(i2,0);
s12[1] = d_xshake(i1,1) - d_xshake(i2,1);
s12[2] = d_xshake(i1,2) - d_xshake(i2,2);
// scalar distances between atoms
double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2];
double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2];
double r12sq = r12[0]*r12[0] + r12[1]*r12[1] + r12[2]*r12[2];
double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2];
double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2];
double s12sq = s12[0]*s12[0] + s12[1]*s12[1] + s12[2]*s12[2];
// matrix coeffs and rhs for lamda equations
if (d_rmass.data()) {
invmass0 = 1.0/d_rmass[i0];
invmass1 = 1.0/d_rmass[i1];
invmass2 = 1.0/d_rmass[i2];
} else {
invmass0 = 1.0/d_mass[d_type[i0]];
invmass1 = 1.0/d_mass[d_type[i1]];
invmass2 = 1.0/d_mass[d_type[i2]];
}
double a11 = 2.0 * (invmass0+invmass1) *
(s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]);
double a12 = 2.0 * invmass0 *
(s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]);
double a13 = - 2.0 * invmass1 *
(s01[0]*r12[0] + s01[1]*r12[1] + s01[2]*r12[2]);
double a21 = 2.0 * invmass0 *
(s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]);
double a22 = 2.0 * (invmass0+invmass2) *
(s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]);
double a23 = 2.0 * invmass2 *
(s02[0]*r12[0] + s02[1]*r12[1] + s02[2]*r12[2]);
double a31 = - 2.0 * invmass1 *
(s12[0]*r01[0] + s12[1]*r01[1] + s12[2]*r01[2]);
double a32 = 2.0 * invmass2 *
(s12[0]*r02[0] + s12[1]*r02[1] + s12[2]*r02[2]);
double a33 = 2.0 * (invmass1+invmass2) *
(s12[0]*r12[0] + s12[1]*r12[1] + s12[2]*r12[2]);
// inverse of matrix
double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 -
a11*a23*a32 - a12*a21*a33 - a13*a22*a31;
if (determ == 0.0) d_error_flag() = 3;
//error->one(FLERR,"Shake determinant = 0.0");
double determinv = 1.0/determ;
double a11inv = determinv * (a22*a33 - a23*a32);
double a12inv = -determinv * (a12*a33 - a13*a32);
double a13inv = determinv * (a12*a23 - a13*a22);
double a21inv = -determinv * (a21*a33 - a23*a31);
double a22inv = determinv * (a11*a33 - a13*a31);
double a23inv = -determinv * (a11*a23 - a13*a21);
double a31inv = determinv * (a21*a32 - a22*a31);
double a32inv = -determinv * (a11*a32 - a12*a31);
double a33inv = determinv * (a11*a22 - a12*a21);
// quadratic correction coeffs
double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]);
double r0112 = (r01[0]*r12[0] + r01[1]*r12[1] + r01[2]*r12[2]);
double r0212 = (r02[0]*r12[0] + r02[1]*r12[1] + r02[2]*r12[2]);
double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq;
double quad1_0202 = invmass0*invmass0 * r02sq;
double quad1_1212 = invmass1*invmass1 * r12sq;
double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102;
double quad1_0112 = - 2.0 * (invmass0+invmass1)*invmass1 * r0112;
double quad1_0212 = - 2.0 * invmass0*invmass1 * r0212;
double quad2_0101 = invmass0*invmass0 * r01sq;
double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq;
double quad2_1212 = invmass2*invmass2 * r12sq;
double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102;
double quad2_0112 = 2.0 * invmass0*invmass2 * r0112;
double quad2_0212 = 2.0 * (invmass0+invmass2)*invmass2 * r0212;
double quad3_0101 = invmass1*invmass1 * r01sq;
double quad3_0202 = invmass2*invmass2 * r02sq;
double quad3_1212 = (invmass1+invmass2)*(invmass1+invmass2) * r12sq;
double quad3_0102 = - 2.0 * invmass1*invmass2 * r0102;
double quad3_0112 = - 2.0 * (invmass1+invmass2)*invmass1 * r0112;
double quad3_0212 = 2.0 * (invmass1+invmass2)*invmass2 * r0212;
// iterate until converged
double lamda01 = 0.0;
double lamda02 = 0.0;
double lamda12 = 0.0;
int niter = 0;
int done = 0;
double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda12_new;
while (!done && niter < max_iter) {
quad1 = quad1_0101 * lamda01*lamda01 +
quad1_0202 * lamda02*lamda02 +
quad1_1212 * lamda12*lamda12 +
quad1_0102 * lamda01*lamda02 +
quad1_0112 * lamda01*lamda12 +
quad1_0212 * lamda02*lamda12;
quad2 = quad2_0101 * lamda01*lamda01 +
quad2_0202 * lamda02*lamda02 +
quad2_1212 * lamda12*lamda12 +
quad2_0102 * lamda01*lamda02 +
quad2_0112 * lamda01*lamda12 +
quad2_0212 * lamda02*lamda12;
quad3 = quad3_0101 * lamda01*lamda01 +
quad3_0202 * lamda02*lamda02 +
quad3_1212 * lamda12*lamda12 +
quad3_0102 * lamda01*lamda02 +
quad3_0112 * lamda01*lamda12 +
quad3_0212 * lamda02*lamda12;
b1 = bond1*bond1 - s01sq - quad1;
b2 = bond2*bond2 - s02sq - quad2;
b3 = bond12*bond12 - s12sq - quad3;
lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3;
lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3;
lamda12_new = a31inv*b1 + a32inv*b2 + a33inv*b3;
done = 1;
if (fabs(lamda01_new-lamda01) > tolerance) done = 0;
if (fabs(lamda02_new-lamda02) > tolerance) done = 0;
if (fabs(lamda12_new-lamda12) > tolerance) done = 0;
lamda01 = lamda01_new;
lamda02 = lamda02_new;
lamda12 = lamda12_new;
// stop iterations before we have a floating point overflow
// max double is < 1.0e308, so 1e150 is a reasonable cutoff
if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150
|| fabs(lamda12) > 1e150) done = 1;
niter++;
}
// update forces if atom is owned by this processor
lamda01 = lamda01/dtfsq;
lamda02 = lamda02/dtfsq;
lamda12 = lamda12/dtfsq;
if (i0 < nlocal) {
a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0];
a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1];
a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2];
}
if (i1 < nlocal) {
a_f(i1,0) -= lamda01*r01[0] - lamda12*r12[0];
a_f(i1,1) -= lamda01*r01[1] - lamda12*r12[1];
a_f(i1,2) -= lamda01*r01[2] - lamda12*r12[2];
}
if (i2 < nlocal) {
a_f(i2,0) -= lamda02*r02[0] + lamda12*r12[0];
a_f(i2,1) -= lamda02*r02[1] + lamda12*r12[1];
a_f(i2,2) -= lamda02*r02[2] + lamda12*r12[2];
}
if (EVFLAG) {
int count = 0;
if (i0 < nlocal) atomlist[count++] = i0;
if (i1 < nlocal) atomlist[count++] = i1;
if (i2 < nlocal) atomlist[count++] = i2;
v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda12*r12[0]*r12[0];
v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda12*r12[1]*r12[1];
v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda12*r12[2]*r12[2];
v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda12*r12[0]*r12[1];
v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda12*r12[0]*r12[2];
v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda12*r12[1]*r12[2];
v_tally<NEIGHFLAG>(ev,count,atomlist,3.0,v);
}
}
/* ----------------------------------------------------------------------
print-out bond & angle statistics
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::stats()
{
atomKK->sync(Host,X_MASK);
k_shake_flag.sync_host();
k_shake_atom.sync_host();
k_shake_type.sync_host();
k_list.sync_host();
k_closest_list.sync_host();
FixShake::stats();
}
/* ----------------------------------------------------------------------
allocate local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::grow_arrays(int nmax)
{
memoryKK->grow_kokkos(k_shake_flag,shake_flag,nmax,"shake:shake_flag");
memoryKK->grow_kokkos(k_shake_atom,shake_atom,nmax,4,"shake:shake_atom");
memoryKK->grow_kokkos(k_shake_type,shake_type,nmax,3,"shake:shake_type");
memoryKK->destroy_kokkos(k_xshake,xshake);
memoryKK->create_kokkos(k_xshake,xshake,nmax,"shake:xshake");
d_shake_flag = k_shake_flag.view<DeviceType>();
d_shake_atom = k_shake_atom.view<DeviceType>();
d_shake_type = k_shake_type.view<DeviceType>();
d_xshake = k_xshake.view<DeviceType>();
memory->destroy(ftmp);
memory->create(ftmp,nmax,3,"shake:ftmp");
memory->destroy(vtmp);
memory->create(vtmp,nmax,3,"shake:vtmp");
}
/* ----------------------------------------------------------------------
copy values within local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
{
k_shake_flag.sync_host();
k_shake_atom.sync_host();
k_shake_type.sync_host();
FixShake::copy_arrays(i,j,delflag);
k_shake_flag.modify_host();
k_shake_atom.modify_host();
k_shake_type.modify_host();
}
/* ----------------------------------------------------------------------
sort local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter)
{
// always sort on the device
k_shake_flag.sync_device();
k_shake_atom.sync_device();
k_shake_type.sync_device();
Sorter.sort(LMPDeviceType(), k_shake_flag.d_view);
Sorter.sort(LMPDeviceType(), k_shake_atom.d_view);
Sorter.sort(LMPDeviceType(), k_shake_type.d_view);
k_shake_flag.modify_device();
k_shake_atom.modify_device();
k_shake_type.modify_device();
}
/* ----------------------------------------------------------------------
initialize one atom's array values, called when atom is created
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::set_arrays(int i)
{
k_shake_flag.sync_host();
shake_flag[i] = 0;
k_shake_flag.modify_host();
}
/* ----------------------------------------------------------------------
update one atom's array values
called when molecule is created from fix gcmc
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::update_arrays(int i, int atom_offset)
{
k_shake_flag.sync_host();
k_shake_atom.sync_host();
FixShake::update_arrays(i,atom_offset);
k_shake_flag.modify_host();
k_shake_atom.modify_host();
}
/* ----------------------------------------------------------------------
initialize a molecule inserted by another fix, e.g. deposit or pour
called when molecule is created
nlocalprev = # of atoms on this proc before molecule inserted
tagprev = atom ID previous to new atoms in the molecule
xgeom,vcm,quat ignored
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::set_molecule(int nlocalprev, tagint tagprev, int imol,
double * xgeom, double * vcm, double * quat)
{
atomKK->sync(Host,TAG_MASK|MOLECULE_MASK);
k_shake_flag.sync_host();
k_shake_atom.sync_host();
k_shake_type.sync_host();
FixShake::set_molecule(nlocalprev,tagprev,imol,xgeom,vcm,quat);
k_shake_flag.modify_host();
k_shake_atom.modify_host();
k_shake_type.modify_host();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offset, const bool &final) const
{
const int i = d_exchange_sendlist(mysend);
int flag = d_shake_flag[i];
if (!final) {
if (flag == 1) offset += 7;
else if (flag == 2) offset += 4;
else if (flag == 3) offset += 6;
else if (flag == 4) offset += 8;
else offset++;
} else {
d_buf[mysend] = nsend + offset;
int m = nsend + offset;
d_buf[m++] = flag;
if (flag == 1) {
d_buf[m++] = d_shake_atom(i,0);
d_buf[m++] = d_shake_atom(i,1);
d_buf[m++] = d_shake_atom(i,2);
d_buf[m++] = d_shake_type(i,0);
d_buf[m++] = d_shake_type(i,1);
d_buf[m++] = d_shake_type(i,2);
} else if (flag == 2) {
d_buf[m++] = d_shake_atom(i,0);
d_buf[m++] = d_shake_atom(i,1);
d_buf[m++] = d_shake_type(i,0);
} else if (flag == 3) {
d_buf[m++] = d_shake_atom(i,0);
d_buf[m++] = d_shake_atom(i,1);
d_buf[m++] = d_shake_atom(i,2);
d_buf[m++] = d_shake_type(i,0);
d_buf[m++] = d_shake_type(i,1);
} else if (flag == 4) {
d_buf[m++] = d_shake_atom(i,0);
d_buf[m++] = d_shake_atom(i,1);
d_buf[m++] = d_shake_atom(i,2);
d_buf[m++] = d_shake_atom(i,3);
d_buf[m++] = d_shake_type(i,0);
d_buf[m++] = d_shake_type(i,1);
d_buf[m++] = d_shake_type(i,2);
}
if (mysend == nsend-1) d_count() = m;
offset = m - nsend;
const int j = d_copylist(mysend);
if (j > -1) {
d_shake_flag[i] = d_shake_flag[j];
int flag = d_shake_flag[i];
if (flag == 1) {
d_shake_atom(i,0) = d_shake_atom(j,0);
d_shake_atom(i,1) = d_shake_atom(j,1);
d_shake_atom(i,2) = d_shake_atom(j,2);
d_shake_type(i,0) = d_shake_type(j,0);
d_shake_type(i,1) = d_shake_type(j,1);
d_shake_type(i,2) = d_shake_type(j,2);
} else if (flag == 2) {
d_shake_atom(i,0) = d_shake_atom(j,0);
d_shake_atom(i,1) = d_shake_atom(j,1);
d_shake_type(i,0) = d_shake_type(j,0);
} else if (flag == 3) {
d_shake_atom(i,0) = d_shake_atom(j,0);
d_shake_atom(i,1) = d_shake_atom(j,1);
d_shake_atom(i,2) = d_shake_atom(j,2);
d_shake_type(i,0) = d_shake_type(j,0);
d_shake_type(i,1) = d_shake_type(j,1);
} else if (flag == 4) {
d_shake_atom(i,0) = d_shake_atom(j,0);
d_shake_atom(i,1) = d_shake_atom(j,1);
d_shake_atom(i,2) = d_shake_atom(j,2);
d_shake_atom(i,3) = d_shake_atom(j,3);
d_shake_type(i,0) = d_shake_type(j,0);
d_shake_type(i,1) = d_shake_type(j,1);
d_shake_type(i,2) = d_shake_type(j,2);
}
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixShakeKokkos<DeviceType>::pack_exchange_kokkos(
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist,
ExecutionSpace space)
{
k_buf.sync<DeviceType>();
k_copylist.sync<DeviceType>();
k_exchange_sendlist.sync<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_copylist = k_copylist.view<DeviceType>();
d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
this->nsend = nsend;
k_shake_flag.template sync<DeviceType>();
k_shake_atom.template sync<DeviceType>();
k_shake_type.template sync<DeviceType>();
Kokkos::deep_copy(d_count,0);
copymode = 1;
FixShakeKokkosPackExchangeFunctor<DeviceType> pack_exchange_functor(this);
Kokkos::parallel_scan(nsend,pack_exchange_functor);
copymode = 0;
k_buf.modify<DeviceType>();
if (space == Host) k_buf.sync<LMPHostType>();
else k_buf.sync<LMPDeviceType>();
k_shake_flag.template modify<DeviceType>();
k_shake_atom.template modify<DeviceType>();
k_shake_type.template modify<DeviceType>();
Kokkos::deep_copy(h_count,d_count);
return h_count();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::operator()(TagFixShakeUnpackExchange, const int &i) const
{
int index = d_indices(i);
if (index > -1) {
int m = d_buf[i];
int flag = d_shake_flag[index] = static_cast<int> (d_buf[m++]);
if (flag == 1) {
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
d_shake_type(index,2) = static_cast<int> (d_buf[m++]);
} else if (flag == 2) {
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
} else if (flag == 3) {
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
} else if (flag == 4) {
d_shake_atom(index,0) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,1) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,2) = static_cast<tagint> (d_buf[m++]);
d_shake_atom(index,3) = static_cast<tagint> (d_buf[m++]);
d_shake_type(index,0) = static_cast<int> (d_buf[m++]);
d_shake_type(index,1) = static_cast<int> (d_buf[m++]);
d_shake_type(index,2) = static_cast<int> (d_buf[m++]);
}
}
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixShakeKokkos<DeviceType>::unpack_exchange_kokkos(
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
ExecutionSpace /*space*/)
{
k_buf.sync<DeviceType>();
k_indices.sync<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_indices = k_indices.view<DeviceType>();
k_shake_flag.template sync<DeviceType>();
k_shake_atom.template sync<DeviceType>();
k_shake_type.template sync<DeviceType>();
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShakeUnpackExchange>(0,nrecv),*this);
copymode = 0;
k_shake_flag.template modify<DeviceType>();
k_shake_atom.template modify<DeviceType>();
k_shake_type.template modify<DeviceType>();
}
/* ----------------------------------------------------------------------
pack values in local atom-based arrays for exchange with another proc
------------------------------------------------------------------------- */
template<class DeviceType>
int FixShakeKokkos<DeviceType>::pack_exchange(int i, double *buf)
{
k_shake_flag.sync_host();
k_shake_atom.sync_host();
k_shake_type.sync_host();
int m = FixShake::pack_exchange(i,buf);
k_shake_flag.modify_host();
k_shake_atom.modify_host();
k_shake_type.modify_host();
return m;
}
/* ----------------------------------------------------------------------
unpack values in local atom-based arrays from exchange with another proc
------------------------------------------------------------------------- */
template<class DeviceType>
int FixShakeKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
{
k_shake_flag.sync_host();
k_shake_atom.sync_host();
k_shake_type.sync_host();
int m = FixShake::unpack_exchange(nlocal,buf);
k_shake_flag.modify_host();
k_shake_atom.modify_host();
k_shake_type.modify_host();
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixShakeKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist,
int iswap_in, DAT::tdual_xfloat_1d &k_buf,
int pbc_flag, int* pbc)
{
d_sendlist = k_sendlist.view<DeviceType>();
iswap = iswap_in;
d_buf = k_buf.view<DeviceType>();
if (domain->triclinic == 0) {
dx = pbc[0]*domain->xprd;
dy = pbc[1]*domain->yprd;
dz = pbc[2]*domain->zprd;
} else {
dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz;
dy = pbc[1]*domain->yprd + pbc[3]*domain->yz;
dz = pbc[2]*domain->zprd;
}
if (pbc_flag)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixShakePackForwardComm<1> >(0,n),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixShakePackForwardComm<0> >(0,n),*this);
return n*3;
}
template<class DeviceType>
template<int PBC_FLAG>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::operator()(TagFixShakePackForwardComm<PBC_FLAG>, const int &i) const {
const int j = d_sendlist(iswap, i);
if (PBC_FLAG == 0) {
d_buf[3*i] = d_xshake(j,0);
d_buf[3*i+1] = d_xshake(j,1);
d_buf[3*i+2] = d_xshake(j,2);
} else {
d_buf[3*i] = d_xshake(j,0) + dx;
d_buf[3*i+1] = d_xshake(j,1) + dy;
d_buf[3*i+2] = d_xshake(j,2) + dz;
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixShakeKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
k_xshake.sync_host();
int m = FixShake::pack_forward_comm(n,list,buf,pbc_flag,pbc);
k_xshake.modify_host();
return m;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf)
{
first = first_in;
d_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixShakeUnpackForwardComm>(0,n),*this);
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::operator()(TagFixShakeUnpackForwardComm, const int &i) const {
d_xshake(i + first,0) = d_buf[3*i];
d_xshake(i + first,1) = d_buf[3*i+1];
d_xshake(i + first,2) = d_buf[3*i+2];
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
{
k_xshake.sync_host();
FixShake::unpack_forward_comm(n,first,buf);
k_xshake.modify_host();
}
/* ----------------------------------------------------------------------
add coordinate constraining forces
this method is called at the end of a timestep
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::shake_end_of_step(int vflag) {
dtv = update->dt;
dtfsq = 0.5 * update->dt * update->dt * force->ftm2v;
FixShakeKokkos<DeviceType>::post_force(vflag);
if (!rattle) dtfsq = update->dt * update->dt * force->ftm2v;
}
/* ----------------------------------------------------------------------
calculate constraining forces based on the current configuration
change coordinates
------------------------------------------------------------------------- */
template<class DeviceType>
void FixShakeKokkos<DeviceType>::correct_coordinates(int vflag) {
x = atom->x;
v = atom->v;
f = atom->f;
mass = atom->mass;
rmass = atom->rmass;
type = atom->type;
atomKK->sync(Host,X_MASK|V_MASK|F_MASK);
// save current forces and velocities so that you
// initialize them to zero such that FixShake::unconstrained_coordinate_update has no effect
for (int j=0; j<nlocal; j++) {
for (int k=0; k<3; k++) {
// store current value of forces and velocities
ftmp[j][k] = f[j][k];
vtmp[j][k] = v[j][k];
// set f and v to zero for SHAKE
v[j][k] = 0;
f[j][k] = 0;
}
}
atomKK->modified(Host,V_MASK|F_MASK);
// call SHAKE to correct the coordinates which were updated without constraints
// IMPORTANT: use 1 as argument and thereby enforce velocity Verlet
dtfsq = 0.5 * update->dt * update->dt * force->ftm2v;
FixShakeKokkos<DeviceType>::post_force(vflag);
atomKK->sync(Host,X_MASK|F_MASK);
// integrate coordinates: x' = xnp1 + dt^2/2m_i * f, where f is the constraining force
// NOTE: After this command, the coordinates geometry of the molecules will be correct!
double dtfmsq;
if (rmass) {
for (int i = 0; i < nlocal; i++) {
dtfmsq = dtfsq/ rmass[i];
x[i][0] = x[i][0] + dtfmsq*f[i][0];
x[i][1] = x[i][1] + dtfmsq*f[i][1];
x[i][2] = x[i][2] + dtfmsq*f[i][2];
}
}
else {
for (int i = 0; i < nlocal; i++) {
dtfmsq = dtfsq / mass[type[i]];
x[i][0] = x[i][0] + dtfmsq*f[i][0];
x[i][1] = x[i][1] + dtfmsq*f[i][1];
x[i][2] = x[i][2] + dtfmsq*f[i][2];
}
}
// copy forces and velocities back
for (int j=0; j<nlocal; j++) {
for (int k=0; k<3; k++) {
f[j][k] = ftmp[j][k];
v[j][k] = vtmp[j][k];
}
}
if (!rattle) dtfsq = update->dt * update->dt * force->ftm2v;
// communicate changes
// NOTE: for compatibility xshake is temporarily set to x, such that pack/unpack_forward
// can be used for communicating the coordinates.
double **xtmp = xshake;
xshake = x;
forward_comm_device = 0;
comm->forward_comm(this);
forward_comm_device = 1;
xshake = xtmp;
atomKK->modified(Host,X_MASK|V_MASK|F_MASK);
}
/* ----------------------------------------------------------------------
tally virial into global and per-atom accumulators
n = # of local owned atoms involved, with local indices in list
v = total virial for the interaction involving total atoms
increment global virial by n/total fraction
increment per-atom virial of each atom in list by 1/total fraction
this method can be used when fix computes forces in post_force()
e.g. fix shake, fix rigid: compute virial only on owned atoms
whether newton_bond is on or off
other procs will tally left-over fractions for atoms they own
------------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void FixShakeKokkos<DeviceType>::v_tally(EV_FLOAT &ev, int n, int *atomlist, double total,
double *v) const
{
int m;
if (vflag_global) {
double fraction = n/total;
ev.v[0] += fraction*v[0];
ev.v[1] += fraction*v[1];
ev.v[2] += fraction*v[2];
ev.v[3] += fraction*v[3];
ev.v[4] += fraction*v[4];
ev.v[5] += fraction*v[5];
}
if (vflag_atom) {
double fraction = 1.0/total;
for (int i = 0; i < n; i++) {
auto v_vatom = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom);
auto a_vatom = v_vatom.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
m = atomlist[i];
a_vatom(m,0) += fraction*v[0];
a_vatom(m,1) += fraction*v[1];
a_vatom(m,2) += fraction*v[2];
a_vatom(m,3) += fraction*v[3];
a_vatom(m,4) += fraction*v[4];
a_vatom(m,5) += fraction*v[5];
}
}
}
/* ----------------------------------------------------------------------
return local index of atom J or any of its images that is closest to atom I
if J is not a valid index like -1, just return it
copied from domain.cpp
------------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int FixShakeKokkos<DeviceType>::closest_image(const int i, int j) const
{
if (j < 0) return j;
const X_FLOAT xi0 = d_x(i,0);
const X_FLOAT xi1 = d_x(i,1);
const X_FLOAT xi2 = d_x(i,2);
int closest = j;
X_FLOAT delx = xi0 - d_x(j,0);
X_FLOAT dely = xi1 - d_x(j,1);
X_FLOAT delz = xi2 - d_x(j,2);
X_FLOAT rsqmin = delx*delx + dely*dely + delz*delz;
X_FLOAT rsq;
while (d_sametag[j] >= 0) {
j = d_sametag[j];
delx = xi0 - d_x(j,0);
dely = xi1 - d_x(j,1);
delz = xi2 - d_x(j,2);
rsq = delx*delx + dely*dely + delz*delz;
if (rsq < rsqmin) {
rsqmin = rsq;
closest = j;
}
}
return closest;
}
/* ---------------------------------------------------------------------- */
namespace LAMMPS_NS {
template class FixShakeKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU
template class FixShakeKokkos<LMPHostType>;
#endif
}