// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories LAMMPS development team: developers@lammps.org Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "fix_shake_kokkos.h" #include "fix_rattle.h" #include "atom_kokkos.h" #include "atom_vec.h" #include "molecule.h" #include "update.h" #include "respa.h" #include "modify.h" #include "domain.h" #include "force.h" #include "bond.h" #include "angle.h" #include "comm.h" #include "group.h" #include "fix_respa.h" #include "math_const.h" #include "memory_kokkos.h" #include "error.h" #include "kokkos.h" #include "atom_masks.h" #include #include using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; #define RVOUS 1 // 0 for irregular, 1 for all2all #define BIG 1.0e20 #define MASSDELTA 0.1 /* ---------------------------------------------------------------------- */ template FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : FixShake(lmp, narg, arg) { kokkosable = 1; forward_comm_device = exchange_comm_device = sort_device = 1; maxexchange = 9; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; memory->destroy(xshake); shake_flag_tmp = shake_flag; shake_atom_tmp = shake_atom; shake_type_tmp = shake_type; shake_flag = nullptr; shake_atom = nullptr; shake_type = nullptr; int nmax = atom->nmax; grow_arrays(nmax); for (int i = 0; i < atom->nlocal; i++) { k_shake_flag.h_view[i] = shake_flag_tmp[i]; k_shake_atom.h_view(i,0) = shake_atom_tmp[i][0]; k_shake_atom.h_view(i,1) = shake_atom_tmp[i][1]; k_shake_atom.h_view(i,2) = shake_atom_tmp[i][2]; k_shake_atom.h_view(i,3) = shake_atom_tmp[i][3]; k_shake_type.h_view(i,0) = shake_type_tmp[i][0]; k_shake_type.h_view(i,1) = shake_type_tmp[i][1]; k_shake_type.h_view(i,2) = shake_type_tmp[i][2]; } k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); k_bond_distance = DAT::tdual_float_1d("fix_shake:bond_distance",atom->nbondtypes+1); k_angle_distance = DAT::tdual_float_1d("fix_shake:angle_distance",atom->nangletypes+1); d_bond_distance = k_bond_distance.view(); d_angle_distance = k_angle_distance.view(); // use 1D view for scalars to reduce GPU memory operations d_scalars = typename AT::t_int_1d("neighbor:scalars",2); h_scalars = HAT::t_int_1d("neighbor:scalars_mirror",2); d_error_flag = Kokkos::subview(d_scalars,0); d_nlist = Kokkos::subview(d_scalars,1); h_error_flag = Kokkos::subview(h_scalars,0); h_nlist = Kokkos::subview(h_scalars,1); d_count = typename AT::t_int_scalar("fix_shake:count"); h_count = Kokkos::create_mirror_view(d_count); memory->destroy(shake_flag_tmp); memory->destroy(shake_atom_tmp); memory->destroy(shake_type_tmp); } /* ---------------------------------------------------------------------- */ template FixShakeKokkos::~FixShakeKokkos() { if (copymode) return; k_shake_flag.sync_host(); k_shake_atom.sync_host(); for (int i = 0; i < nlocal; i++) { if (shake_flag[i] == 0) continue; else if (shake_flag[i] == 1) { bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1); bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1); angletype_findset(i,shake_atom[i][1],shake_atom[i][2],1); } else if (shake_flag[i] == 2) { bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1); } else if (shake_flag[i] == 3) { bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1); bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1); } else if (shake_flag[i] == 4) { bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1); bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1); bondtype_findset(i,shake_atom[i][0],shake_atom[i][3],1); } } memoryKK->destroy_kokkos(k_shake_flag,shake_flag); memoryKK->destroy_kokkos(k_shake_atom,shake_atom); memoryKK->destroy_kokkos(k_shake_type,shake_type); memoryKK->destroy_kokkos(k_xshake,xshake); memoryKK->destroy_kokkos(k_list,list); memoryKK->destroy_kokkos(k_closest_list,closest_list); memoryKK->destroy_kokkos(k_vatom,vatom); } /* ---------------------------------------------------------------------- set bond and angle distances this init must happen after force->bond and force->angle inits ------------------------------------------------------------------------- */ template void FixShakeKokkos::init() { FixShake::init(); if (utils::strmatch(update->integrate_style,"^respa")) error->all(FLERR,"Cannot yet use respa with Kokkos"); if (rattle) error->all(FLERR,"Cannot yet use KOKKOS package with fix rattle"); // set equilibrium bond distances for (int i = 1; i <= atom->nbondtypes; i++) k_bond_distance.h_view[i] = bond_distance[i]; // set equilibrium angle distances for (int i = 1; i <= atom->nangletypes; i++) k_angle_distance.h_view[i] = angle_distance[i]; k_bond_distance.modify_host(); k_angle_distance.modify_host(); k_bond_distance.sync(); k_angle_distance.sync(); } /* ---------------------------------------------------------------------- run setup for minimization. ------------------------------------------------------------------------- */ template void FixShakeKokkos::min_setup(int /*vflag*/) { error->all(FLERR, "Cannot yet use fix {} during minimization with Kokkos", style); } /* ---------------------------------------------------------------------- build list of SHAKE clusters to constrain if one or more atoms in cluster are on this proc, this proc lists the cluster exactly once ------------------------------------------------------------------------- */ template void FixShakeKokkos::pre_neighbor() { // local copies of atom quantities // used by SHAKE until next re-neighboring atomKK->sync(execution_space,X_MASK); ebond = 0.0; d_x = atomKK->k_x.view(); nlocal = atom->nlocal; map_style = atom->map_style; if (map_style == Atom::MAP_ARRAY) { k_map_array = atomKK->k_map_array; k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; k_map_hash.template sync(); } k_shake_flag.sync(); k_shake_atom.sync(); // extend size of SHAKE list if necessary if (nlocal > maxlist) { maxlist = atom->nmax; memoryKK->destroy_kokkos(k_list,list); memoryKK->create_kokkos(k_list,list,maxlist,"shake:list"); d_list = k_list.view(); memoryKK->destroy_kokkos(k_closest_list,closest_list); memoryKK->create_kokkos(k_closest_list,closest_list,maxlist,4,"shake:closest_list"); d_closest_list = k_closest_list.view(); } // Atom Map map_style = atom->map_style; if (map_style == Atom::MAP_ARRAY) { k_map_array = atomKK->k_map_array; k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; k_map_hash.template sync(); } atomKK->k_sametag.sync(); d_sametag = atomKK->k_sametag.view(); // build list of SHAKE clusters I compute Kokkos::deep_copy(d_scalars,0); copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); copymode = 0; k_list.modify(); k_closest_list.modify(); Kokkos::deep_copy(h_scalars,d_scalars); nlist = h_nlist(); if (h_error_flag() == 1) { error->one(FLERR,"Shake atoms missing on proc " "{} at step {}",comm->me,update->ntimestep); } } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakePreNeighbor, const int &i) const { if (d_shake_flag[i]) { if (d_shake_flag[i] == 2) { int atom1 = AtomKokkos::map_kokkos(d_shake_atom(i,0),map_style,k_map_array,k_map_hash); int atom2 = AtomKokkos::map_kokkos(d_shake_atom(i,1),map_style,k_map_array,k_map_hash); if (atom1 == -1 || atom2 == -1) { d_error_flag() = 1; } atom1 = closest_image(i, atom1); atom2 = closest_image(i, atom2); if (i <= atom1 && i <= atom2) { const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1); d_list[nlist] = i; d_closest_list(nlist,0) = atom1; d_closest_list(nlist,1) = atom2; } } else if (d_shake_flag[i] % 2 == 1) { int atom1 = AtomKokkos::map_kokkos(d_shake_atom(i,0),map_style,k_map_array,k_map_hash); int atom2 = AtomKokkos::map_kokkos(d_shake_atom(i,1),map_style,k_map_array,k_map_hash); int atom3 = AtomKokkos::map_kokkos(d_shake_atom(i,2),map_style,k_map_array,k_map_hash); if (atom1 == -1 || atom2 == -1 || atom3 == -1) d_error_flag() = 1; atom1 = closest_image(i, atom1); atom2 = closest_image(i, atom2); atom3 = closest_image(i, atom3); if (i <= atom1 && i <= atom2 && i <= atom3) { const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1); d_list[nlist] = i; d_closest_list(nlist,0) = atom1; d_closest_list(nlist,1) = atom2; d_closest_list(nlist,2) = atom3; } } else { int atom1 = AtomKokkos::map_kokkos(d_shake_atom(i,0),map_style,k_map_array,k_map_hash); int atom2 = AtomKokkos::map_kokkos(d_shake_atom(i,1),map_style,k_map_array,k_map_hash); int atom3 = AtomKokkos::map_kokkos(d_shake_atom(i,2),map_style,k_map_array,k_map_hash); int atom4 = AtomKokkos::map_kokkos(d_shake_atom(i,3),map_style,k_map_array,k_map_hash); if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1) d_error_flag() = 1; atom1 = closest_image(i, atom1); atom2 = closest_image(i, atom2); atom3 = closest_image(i, atom3); atom4 = closest_image(i, atom4); if (i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4) { const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1); d_list[nlist] = i; d_closest_list(nlist,0) = atom1; d_closest_list(nlist,1) = atom2; d_closest_list(nlist,2) = atom3; d_closest_list(nlist,3) = atom4; } } } } /* ---------------------------------------------------------------------- compute the force adjustment for SHAKE constraint ------------------------------------------------------------------------- */ template void FixShakeKokkos::post_force(int vflag) { ebond = 0.0; copymode = 1; d_x = atomKK->k_x.view(); d_f = atomKK->k_f.view(); d_type = atomKK->k_type.view(); d_rmass = atomKK->k_rmass.view(); d_mass = atomKK->k_mass.view(); nlocal = atomKK->nlocal; map_style = atom->map_style; if (map_style == Atom::MAP_ARRAY) { k_map_array = atomKK->k_map_array; k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; k_map_hash.template sync(); } if (d_rmass.data()) atomKK->sync(execution_space,X_MASK|F_MASK|RMASS_MASK); else atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK); k_shake_flag.sync(); k_shake_type.sync(); k_list.sync(); k_closest_list.sync(); if (update->ntimestep == next_output) stats(); // xshake = unconstrained move with current v,f // communicate results if necessary unconstrained_update(); comm->forward_comm(this); k_xshake.sync(); // virial setup v_init(vflag); // reallocate per-atom arrays if necessary if (vflag_atom) { memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom"); d_vatom = k_vatom.template view(); } neighflag = lmp->kokkos->neighflag; // FULL neighlist still needs atomics in fix shake if (neighflag == FULL) { if (lmp->kokkos->nthreads > 1 || lmp->kokkos->ngpus > 0) neighflag = HALFTHREAD; else neighflag = HALF; } need_dup = 0; if (neighflag != HALF) need_dup = std::is_same_v,Kokkos::Experimental::ScatterDuplicated>; // allocate duplicated memory if (need_dup) { dup_f = Kokkos::Experimental::create_scatter_view(d_f); dup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); } else { ndup_f = Kokkos::Experimental::create_scatter_view(d_f); ndup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); } Kokkos::deep_copy(d_error_flag,0); EV_FLOAT ev; // loop over clusters to add constraint forces if (neighflag == HALF) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlist),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,nlist),*this); } else { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlist),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,nlist),*this); } copymode = 0; Kokkos::deep_copy(h_error_flag,d_error_flag); if (h_error_flag() == 2) error->warning(FLERR,"Shake determinant < 0.0"); else if (h_error_flag() == 3) error->one(FLERR,"Shake determinant = 0.0"); // store vflag for coordinate_constraints_end_of_step() vflag_post_force = vflag; // reduction over duplicated memory if (need_dup) Kokkos::Experimental::contribute(d_f,dup_f); atomKK->modified(execution_space,F_MASK); if (vflag_global) { virial[0] += ev.v[0]; virial[1] += ev.v[1]; virial[2] += ev.v[2]; virial[3] += ev.v[3]; virial[4] += ev.v[4]; virial[5] += ev.v[5]; } if (vflag_atom) { k_vatom.template modify(); k_vatom.template sync(); } // free duplicated memory if (need_dup) { dup_f = decltype(dup_f)(); dup_vatom = decltype(dup_vatom)(); } } /* ---------------------------------------------------------------------- substitute shake constraints with very strong bonds ------------------------------------------------------------------------- */ template void FixShakeKokkos::min_post_force(int vflag) { // not yet ported to Kokkos atomKK->sync(Host,X_MASK | F_MASK); k_shake_flag.sync_host(); k_shake_type.sync_host(); k_list.sync_host(); k_closest_list.sync_host(); FixShake::min_post_force(vflag); atomKK->modified(Host,F_MASK); } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakePostForce, const int &i, EV_FLOAT& ev) const { const int m = d_list[i]; if (d_shake_flag[m] == 2) shake(i,ev); else if (d_shake_flag[m] == 3) shake3(i,ev); else if (d_shake_flag[m] == 4) shake4(i,ev); else shake3angle(i,ev); } template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakePostForce, const int &i) const { EV_FLOAT ev; this->template operator()(TagFixShakePostForce(), i, ev); } /* ---------------------------------------------------------------------- count # of degrees-of-freedom removed by SHAKE for atoms in igroup ------------------------------------------------------------------------- */ template int FixShakeKokkos::dof(int igroup) { d_mask = atomKK->k_mask.view(); d_tag = atomKK->k_tag.view(); nlocal = atom->nlocal; atomKK->sync(execution_space,MASK_MASK|TAG_MASK); k_shake_flag.sync(); k_shake_atom.sync(); // count dof in a cluster if and only if // the central atom is in group and atom i is the central atom int n = 0; { // local variables for lambda capture auto d_shake_flag = this->d_shake_flag; auto d_shake_atom = this->d_shake_atom; auto tag = this->d_tag; auto mask = this->d_mask; auto groupbit = group->bitmask[igroup]; Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i, int& n) { if (!(mask[i] & groupbit)) return; if (d_shake_flag[i] == 0) return; if (d_shake_atom(i,0) != tag[i]) return; if (d_shake_flag[i] == 1) n += 3; else if (d_shake_flag[i] == 2) n += 1; else if (d_shake_flag[i] == 3) n += 2; else if (d_shake_flag[i] == 4) n += 3; },n); } int nall; MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world); return nall; } /* ---------------------------------------------------------------------- assumes NVE update, seems to be accurate enough for NVT,NPT,NPH as well ------------------------------------------------------------------------- */ template void FixShakeKokkos::unconstrained_update() { d_x = atomKK->k_x.view(); d_v = atomKK->k_v.view(); d_f = atomKK->k_f.view(); d_type = atomKK->k_type.view(); d_rmass = atomKK->k_rmass.view(); d_mass = atomKK->k_mass.view(); nlocal = atom->nlocal; if (d_rmass.data()) atomKK->sync(execution_space,X_MASK|V_MASK|F_MASK|RMASS_MASK); else atomKK->sync(execution_space,X_MASK|V_MASK|F_MASK|TYPE_MASK); k_shake_flag.sync(); k_xshake.sync(); { // local variables for lambda capture auto d_shake_flag = this->d_shake_flag; auto d_xshake = this->d_xshake; auto x = this->d_x; auto v = this->d_v; auto f = this->d_f; auto dtfsq = this->dtfsq; auto dtv = this->dtv; if (d_rmass.data()) { auto rmass = this->d_rmass; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i) { if (d_shake_flag[i]) { const double dtfmsq = dtfsq / rmass[i]; d_xshake(i,0) = x(i,0) + dtv*v(i,0) + dtfmsq*f(i,0); d_xshake(i,1) = x(i,1) + dtv*v(i,1) + dtfmsq*f(i,1); d_xshake(i,2) = x(i,2) + dtv*v(i,2) + dtfmsq*f(i,2); } else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0.0; }); } else { auto mass = this->d_mass; auto type = this->d_type; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i) { if (d_shake_flag[i]) { const double dtfmsq = dtfsq / mass[type[i]]; d_xshake(i,0) = x(i,0) + dtv*v(i,0) + dtfmsq*f(i,0); d_xshake(i,1) = x(i,1) + dtv*v(i,1) + dtfmsq*f(i,1); d_xshake(i,2) = x(i,2) + dtv*v(i,2) + dtfmsq*f(i,2); } else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0.0; }); } } k_xshake.modify(); } /* ---------------------------------------------------------------------- calculate SHAKE constraint forces for size 2 cluster = single bond ------------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake(int ilist, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); int atomlist[2]; double v[6]; double invmass0,invmass1; // local atom IDs and constraint distances int m = d_list[ilist]; int i0 = d_closest_list(ilist,0); int i1 = d_closest_list(ilist,1); double bond1 = d_bond_distance[d_shake_type(m,0)]; // r01 = distance vec between atoms double r01[3]; r01[0] = d_x(i0,0) - d_x(i1,0); r01[1] = d_x(i0,1) - d_x(i1,1); r01[2] = d_x(i0,2) - d_x(i1,2); // s01 = distance vec after unconstrained update double s01[3]; s01[0] = d_xshake(i0,0) - d_xshake(i1,0); s01[1] = d_xshake(i0,1) - d_xshake(i1,1); s01[2] = d_xshake(i0,2) - d_xshake(i1,2); // scalar distances between atoms double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; // a,b,c = coeffs in quadratic equation for lamda if (d_rmass.data()) { invmass0 = 1.0/d_rmass[i0]; invmass1 = 1.0/d_rmass[i1]; } else { invmass0 = 1.0/d_mass[d_type[i0]]; invmass1 = 1.0/d_mass[d_type[i1]]; } double a = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; double b = 2.0 * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); double c = s01sq - bond1*bond1; // error check double determ = b*b - 4.0*a*c; if (determ < 0.0) { //error->warning(FLERR,"Shake determinant < 0.0",0); d_error_flag() = 2; determ = 0.0; } // exact quadratic solution for lamda double lamda,lamda1,lamda2; lamda1 = (-b+sqrt(determ)) / (2.0*a); lamda2 = (-b-sqrt(determ)) / (2.0*a); if (fabs(lamda1) <= fabs(lamda2)) lamda = lamda1; else lamda = lamda2; // update forces if atom is owned by this processor lamda /= dtfsq; if (i0 < nlocal) { a_f(i0,0) += lamda*r01[0]; a_f(i0,1) += lamda*r01[1]; a_f(i0,2) += lamda*r01[2]; } if (i1 < nlocal) { a_f(i1,0) -= lamda*r01[0]; a_f(i1,1) -= lamda*r01[1]; a_f(i1,2) -= lamda*r01[2]; } if (EVFLAG) { int count = 0; if (i0 < nlocal) atomlist[count++] = i0; if (i1 < nlocal) atomlist[count++] = i1; v[0] = lamda*r01[0]*r01[0]; v[1] = lamda*r01[1]*r01[1]; v[2] = lamda*r01[2]*r01[2]; v[3] = lamda*r01[0]*r01[1]; v[4] = lamda*r01[0]*r01[2]; v[5] = lamda*r01[1]*r01[2]; v_tally(ev,count,atomlist,2.0,v); } } /* ---------------------------------------------------------------------- calculate SHAKE constraint forces for size 3 cluster = two bonds ------------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake3(int ilist, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); int atomlist[3]; double v[6]; double invmass0,invmass1,invmass2; // local atom IDs and constraint distances int m = d_list[ilist]; int i0 = d_closest_list(ilist,0); int i1 = d_closest_list(ilist,1); int i2 = d_closest_list(ilist,2); double bond1 = d_bond_distance[d_shake_type(m,0)]; double bond2 = d_bond_distance[d_shake_type(m,1)]; // r01,r02 = distance vec between atoms double r01[3]; r01[0] = d_x(i0,0) - d_x(i1,0); r01[1] = d_x(i0,1) - d_x(i1,1); r01[2] = d_x(i0,2) - d_x(i1,2); double r02[3]; r02[0] = d_x(i0,0) - d_x(i2,0); r02[1] = d_x(i0,1) - d_x(i2,1); r02[2] = d_x(i0,2) - d_x(i2,2); // s01,s02 = distance vec after unconstrained update double s01[3]; s01[0] = d_xshake(i0,0) - d_xshake(i1,0); s01[1] = d_xshake(i0,1) - d_xshake(i1,1); s01[2] = d_xshake(i0,2) - d_xshake(i1,2); double s02[3]; s02[0] = d_xshake(i0,0) - d_xshake(i2,0); s02[1] = d_xshake(i0,1) - d_xshake(i2,1); s02[2] = d_xshake(i0,2) - d_xshake(i2,2); // scalar distances between atoms double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { invmass0 = 1.0/d_rmass[i0]; invmass1 = 1.0/d_rmass[i1]; invmass2 = 1.0/d_rmass[i2]; } else { invmass0 = 1.0/d_mass[d_type[i0]]; invmass1 = 1.0/d_mass[d_type[i1]]; invmass2 = 1.0/d_mass[d_type[i2]]; } double a11 = 2.0 * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); double a12 = 2.0 * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); double a21 = 2.0 * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); double a22 = 2.0 * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); // inverse of matrix double determ = a11*a22 - a12*a21; if (determ == 0.0) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); double determinv = 1.0/determ; double a11inv = a22*determinv; double a12inv = -a12*determinv; double a21inv = -a21*determinv; double a22inv = a11*determinv; // quadratic correction coeffs double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; double quad1_0202 = invmass0*invmass0 * r02sq; double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; double quad2_0101 = invmass0*invmass0 * r01sq; double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; // iterate until converged double lamda01 = 0.0; double lamda02 = 0.0; int niter = 0; int done = 0; double quad1,quad2,b1,b2,lamda01_new,lamda02_new; while (!done && niter < max_iter) { quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 + quad1_0102 * lamda01*lamda02; quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 + quad2_0102 * lamda01*lamda02; b1 = bond1*bond1 - s01sq - quad1; b2 = bond2*bond2 - s02sq - quad2; lamda01_new = a11inv*b1 + a12inv*b2; lamda02_new = a21inv*b1 + a22inv*b2; done = 1; if (fabs(lamda01_new-lamda01) > tolerance) done = 0; if (fabs(lamda02_new-lamda02) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; // stop iterations before we have a floating point overflow // max double is < 1.0e308, so 1e150 is a reasonable cutoff if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150) done = 1; niter++; } // update forces if atom is owned by this processor lamda01 = lamda01/dtfsq; lamda02 = lamda02/dtfsq; if (i0 < nlocal) { a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0]; a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1]; a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2]; } if (i1 < nlocal) { a_f(i1,0) -= lamda01*r01[0]; a_f(i1,1) -= lamda01*r01[1]; a_f(i1,2) -= lamda01*r01[2]; } if (i2 < nlocal) { a_f(i2,0) -= lamda02*r02[0]; a_f(i2,1) -= lamda02*r02[1]; a_f(i2,2) -= lamda02*r02[2]; } if (EVFLAG) { int count = 0; if (i0 < nlocal) atomlist[count++] = i0; if (i1 < nlocal) atomlist[count++] = i1; if (i2 < nlocal) atomlist[count++] = i2; v[0] = lamda01*r01[0]*r01[0] + lamda02*r02[0]*r02[0]; v[1] = lamda01*r01[1]*r01[1] + lamda02*r02[1]*r02[1]; v[2] = lamda01*r01[2]*r01[2] + lamda02*r02[2]*r02[2]; v[3] = lamda01*r01[0]*r01[1] + lamda02*r02[0]*r02[1]; v[4] = lamda01*r01[0]*r01[2] + lamda02*r02[0]*r02[2]; v[5] = lamda01*r01[1]*r01[2] + lamda02*r02[1]*r02[2]; v_tally(ev,count,atomlist,3.0,v); } } /* ---------------------------------------------------------------------- calculate SHAKE constraint forces for size 4 cluster = three bonds ------------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake4(int ilist, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); int atomlist[4]; double v[6]; double invmass0,invmass1,invmass2,invmass3; // local atom IDs and constraint distances int m = d_list[ilist]; int i0 = d_closest_list(ilist,0); int i1 = d_closest_list(ilist,1); int i2 = d_closest_list(ilist,2); int i3 = d_closest_list(ilist,3); double bond1 = d_bond_distance[d_shake_type(m,0)]; double bond2 = d_bond_distance[d_shake_type(m,1)]; double bond3 = d_bond_distance[d_shake_type(m,2)]; // r01,r02,r03 = distance vec between atoms double r01[3]; r01[0] = d_x(i0,0) - d_x(i1,0); r01[1] = d_x(i0,1) - d_x(i1,1); r01[2] = d_x(i0,2) - d_x(i1,2); double r02[3]; r02[0] = d_x(i0,0) - d_x(i2,0); r02[1] = d_x(i0,1) - d_x(i2,1); r02[2] = d_x(i0,2) - d_x(i2,2); double r03[3]; r03[0] = d_x(i0,0) - d_x(i3,0); r03[1] = d_x(i0,1) - d_x(i3,1); r03[2] = d_x(i0,2) - d_x(i3,2); // s01,s02,s03 = distance vec after unconstrained update double s01[3]; s01[0] = d_xshake(i0,0) - d_xshake(i1,0); s01[1] = d_xshake(i0,1) - d_xshake(i1,1); s01[2] = d_xshake(i0,2) - d_xshake(i1,2); double s02[3]; s02[0] = d_xshake(i0,0) - d_xshake(i2,0); s02[1] = d_xshake(i0,1) - d_xshake(i2,1); s02[2] = d_xshake(i0,2) - d_xshake(i2,2); double s03[3]; s03[0] = d_xshake(i0,0) - d_xshake(i3,0); s03[1] = d_xshake(i0,1) - d_xshake(i3,1); s03[2] = d_xshake(i0,2) - d_xshake(i3,2); // scalar distances between atoms double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; double r03sq = r03[0]*r03[0] + r03[1]*r03[1] + r03[2]*r03[2]; double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; double s03sq = s03[0]*s03[0] + s03[1]*s03[1] + s03[2]*s03[2]; // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { invmass0 = 1.0/d_rmass[i0]; invmass1 = 1.0/d_rmass[i1]; invmass2 = 1.0/d_rmass[i2]; invmass3 = 1.0/d_rmass[i3]; } else { invmass0 = 1.0/d_mass[d_type[i0]]; invmass1 = 1.0/d_mass[d_type[i1]]; invmass2 = 1.0/d_mass[d_type[i2]]; invmass3 = 1.0/d_mass[d_type[i3]]; } double a11 = 2.0 * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); double a12 = 2.0 * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); double a13 = 2.0 * invmass0 * (s01[0]*r03[0] + s01[1]*r03[1] + s01[2]*r03[2]); double a21 = 2.0 * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); double a22 = 2.0 * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); double a23 = 2.0 * invmass0 * (s02[0]*r03[0] + s02[1]*r03[1] + s02[2]*r03[2]); double a31 = 2.0 * invmass0 * (s03[0]*r01[0] + s03[1]*r01[1] + s03[2]*r01[2]); double a32 = 2.0 * invmass0 * (s03[0]*r02[0] + s03[1]*r02[1] + s03[2]*r02[2]); double a33 = 2.0 * (invmass0+invmass3) * (s03[0]*r03[0] + s03[1]*r03[1] + s03[2]*r03[2]); // inverse of matrix; double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 - a13*a22*a31; if (determ == 0.0) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); double determinv = 1.0/determ; double a11inv = determinv * (a22*a33 - a23*a32); double a12inv = -determinv * (a12*a33 - a13*a32); double a13inv = determinv * (a12*a23 - a13*a22); double a21inv = -determinv * (a21*a33 - a23*a31); double a22inv = determinv * (a11*a33 - a13*a31); double a23inv = -determinv * (a11*a23 - a13*a21); double a31inv = determinv * (a21*a32 - a22*a31); double a32inv = -determinv * (a11*a32 - a12*a31); double a33inv = determinv * (a11*a22 - a12*a21); // quadratic correction coeffs double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); double r0103 = (r01[0]*r03[0] + r01[1]*r03[1] + r01[2]*r03[2]); double r0203 = (r02[0]*r03[0] + r02[1]*r03[1] + r02[2]*r03[2]); double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; double quad1_0202 = invmass0*invmass0 * r02sq; double quad1_0303 = invmass0*invmass0 * r03sq; double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; double quad1_0103 = 2.0 * (invmass0+invmass1)*invmass0 * r0103; double quad1_0203 = 2.0 * invmass0*invmass0 * r0203; double quad2_0101 = invmass0*invmass0 * r01sq; double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; double quad2_0303 = invmass0*invmass0 * r03sq; double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; double quad2_0103 = 2.0 * invmass0*invmass0 * r0103; double quad2_0203 = 2.0 * (invmass0+invmass2)*invmass0 * r0203; double quad3_0101 = invmass0*invmass0 * r01sq; double quad3_0202 = invmass0*invmass0 * r02sq; double quad3_0303 = (invmass0+invmass3)*(invmass0+invmass3) * r03sq; double quad3_0102 = 2.0 * invmass0*invmass0 * r0102; double quad3_0103 = 2.0 * (invmass0+invmass3)*invmass0 * r0103; double quad3_0203 = 2.0 * (invmass0+invmass3)*invmass0 * r0203; // iterate until converged double lamda01 = 0.0; double lamda02 = 0.0; double lamda03 = 0.0; int niter = 0; int done = 0; double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda03_new; while (!done && niter < max_iter) { quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 + quad1_0303 * lamda03*lamda03 + quad1_0102 * lamda01*lamda02 + quad1_0103 * lamda01*lamda03 + quad1_0203 * lamda02*lamda03; quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 + quad2_0303 * lamda03*lamda03 + quad2_0102 * lamda01*lamda02 + quad2_0103 * lamda01*lamda03 + quad2_0203 * lamda02*lamda03; quad3 = quad3_0101 * lamda01*lamda01 + quad3_0202 * lamda02*lamda02 + quad3_0303 * lamda03*lamda03 + quad3_0102 * lamda01*lamda02 + quad3_0103 * lamda01*lamda03 + quad3_0203 * lamda02*lamda03; b1 = bond1*bond1 - s01sq - quad1; b2 = bond2*bond2 - s02sq - quad2; b3 = bond3*bond3 - s03sq - quad3; lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3; lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3; lamda03_new = a31inv*b1 + a32inv*b2 + a33inv*b3; done = 1; if (fabs(lamda01_new-lamda01) > tolerance) done = 0; if (fabs(lamda02_new-lamda02) > tolerance) done = 0; if (fabs(lamda03_new-lamda03) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; lamda03 = lamda03_new; // stop iterations before we have a floating point overflow // max double is < 1.0e308, so 1e150 is a reasonable cutoff if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150 || fabs(lamda03) > 1e150) done = 1; niter++; } // update forces if atom is owned by this processor lamda01 = lamda01/dtfsq; lamda02 = lamda02/dtfsq; lamda03 = lamda03/dtfsq; if (i0 < nlocal) { a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0] + lamda03*r03[0]; a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1] + lamda03*r03[1]; a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2] + lamda03*r03[2]; } if (i1 < nlocal) { a_f(i1,0) -= lamda01*r01[0]; a_f(i1,1) -= lamda01*r01[1]; a_f(i1,2) -= lamda01*r01[2]; } if (i2 < nlocal) { a_f(i2,0) -= lamda02*r02[0]; a_f(i2,1) -= lamda02*r02[1]; a_f(i2,2) -= lamda02*r02[2]; } if (i3 < nlocal) { a_f(i3,0) -= lamda03*r03[0]; a_f(i3,1) -= lamda03*r03[1]; a_f(i3,2) -= lamda03*r03[2]; } if (EVFLAG) { int count = 0; if (i0 < nlocal) atomlist[count++] = i0; if (i1 < nlocal) atomlist[count++] = i1; if (i2 < nlocal) atomlist[count++] = i2; if (i3 < nlocal) atomlist[count++] = i3; v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda03*r03[0]*r03[0]; v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda03*r03[1]*r03[1]; v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda03*r03[2]*r03[2]; v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda03*r03[0]*r03[1]; v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda03*r03[0]*r03[2]; v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda03*r03[1]*r03[2]; v_tally(ev,count,atomlist,4.0,v); } } /* ---------------------------------------------------------------------- calculate SHAKE constraint forces for size 3 cluster = two bonds + angle ------------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake3angle(int ilist, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access>(); int atomlist[3]; double v[6]; double invmass0,invmass1,invmass2; // local atom IDs and constraint distances int m = d_list[ilist]; int i0 = d_closest_list(ilist,0); int i1 = d_closest_list(ilist,1); int i2 = d_closest_list(ilist,2); double bond1 = d_bond_distance[d_shake_type(m,0)]; double bond2 = d_bond_distance[d_shake_type(m,1)]; double bond12 = d_angle_distance[d_shake_type(m,2)]; // r01,r02,r12 = distance vec between atoms double r01[3]; r01[0] = d_x(i0,0) - d_x(i1,0); r01[1] = d_x(i0,1) - d_x(i1,1); r01[2] = d_x(i0,2) - d_x(i1,2); double r02[3]; r02[0] = d_x(i0,0) - d_x(i2,0); r02[1] = d_x(i0,1) - d_x(i2,1); r02[2] = d_x(i0,2) - d_x(i2,2); double r12[3]; r12[0] = d_x(i1,0) - d_x(i2,0); r12[1] = d_x(i1,1) - d_x(i2,1); r12[2] = d_x(i1,2) - d_x(i2,2); // s01,s02,s12 = distance vec after unconstrained update double s01[3]; s01[0] = d_xshake(i0,0) - d_xshake(i1,0); s01[1] = d_xshake(i0,1) - d_xshake(i1,1); s01[2] = d_xshake(i0,2) - d_xshake(i1,2); double s02[3]; s02[0] = d_xshake(i0,0) - d_xshake(i2,0); s02[1] = d_xshake(i0,1) - d_xshake(i2,1); s02[2] = d_xshake(i0,2) - d_xshake(i2,2); double s12[3]; s12[0] = d_xshake(i1,0) - d_xshake(i2,0); s12[1] = d_xshake(i1,1) - d_xshake(i2,1); s12[2] = d_xshake(i1,2) - d_xshake(i2,2); // scalar distances between atoms double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; double r12sq = r12[0]*r12[0] + r12[1]*r12[1] + r12[2]*r12[2]; double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; double s12sq = s12[0]*s12[0] + s12[1]*s12[1] + s12[2]*s12[2]; // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { invmass0 = 1.0/d_rmass[i0]; invmass1 = 1.0/d_rmass[i1]; invmass2 = 1.0/d_rmass[i2]; } else { invmass0 = 1.0/d_mass[d_type[i0]]; invmass1 = 1.0/d_mass[d_type[i1]]; invmass2 = 1.0/d_mass[d_type[i2]]; } double a11 = 2.0 * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); double a12 = 2.0 * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); double a13 = - 2.0 * invmass1 * (s01[0]*r12[0] + s01[1]*r12[1] + s01[2]*r12[2]); double a21 = 2.0 * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); double a22 = 2.0 * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); double a23 = 2.0 * invmass2 * (s02[0]*r12[0] + s02[1]*r12[1] + s02[2]*r12[2]); double a31 = - 2.0 * invmass1 * (s12[0]*r01[0] + s12[1]*r01[1] + s12[2]*r01[2]); double a32 = 2.0 * invmass2 * (s12[0]*r02[0] + s12[1]*r02[1] + s12[2]*r02[2]); double a33 = 2.0 * (invmass1+invmass2) * (s12[0]*r12[0] + s12[1]*r12[1] + s12[2]*r12[2]); // inverse of matrix double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 - a13*a22*a31; if (determ == 0.0) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); double determinv = 1.0/determ; double a11inv = determinv * (a22*a33 - a23*a32); double a12inv = -determinv * (a12*a33 - a13*a32); double a13inv = determinv * (a12*a23 - a13*a22); double a21inv = -determinv * (a21*a33 - a23*a31); double a22inv = determinv * (a11*a33 - a13*a31); double a23inv = -determinv * (a11*a23 - a13*a21); double a31inv = determinv * (a21*a32 - a22*a31); double a32inv = -determinv * (a11*a32 - a12*a31); double a33inv = determinv * (a11*a22 - a12*a21); // quadratic correction coeffs double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); double r0112 = (r01[0]*r12[0] + r01[1]*r12[1] + r01[2]*r12[2]); double r0212 = (r02[0]*r12[0] + r02[1]*r12[1] + r02[2]*r12[2]); double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; double quad1_0202 = invmass0*invmass0 * r02sq; double quad1_1212 = invmass1*invmass1 * r12sq; double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; double quad1_0112 = - 2.0 * (invmass0+invmass1)*invmass1 * r0112; double quad1_0212 = - 2.0 * invmass0*invmass1 * r0212; double quad2_0101 = invmass0*invmass0 * r01sq; double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; double quad2_1212 = invmass2*invmass2 * r12sq; double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; double quad2_0112 = 2.0 * invmass0*invmass2 * r0112; double quad2_0212 = 2.0 * (invmass0+invmass2)*invmass2 * r0212; double quad3_0101 = invmass1*invmass1 * r01sq; double quad3_0202 = invmass2*invmass2 * r02sq; double quad3_1212 = (invmass1+invmass2)*(invmass1+invmass2) * r12sq; double quad3_0102 = - 2.0 * invmass1*invmass2 * r0102; double quad3_0112 = - 2.0 * (invmass1+invmass2)*invmass1 * r0112; double quad3_0212 = 2.0 * (invmass1+invmass2)*invmass2 * r0212; // iterate until converged double lamda01 = 0.0; double lamda02 = 0.0; double lamda12 = 0.0; int niter = 0; int done = 0; double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda12_new; while (!done && niter < max_iter) { quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 + quad1_1212 * lamda12*lamda12 + quad1_0102 * lamda01*lamda02 + quad1_0112 * lamda01*lamda12 + quad1_0212 * lamda02*lamda12; quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 + quad2_1212 * lamda12*lamda12 + quad2_0102 * lamda01*lamda02 + quad2_0112 * lamda01*lamda12 + quad2_0212 * lamda02*lamda12; quad3 = quad3_0101 * lamda01*lamda01 + quad3_0202 * lamda02*lamda02 + quad3_1212 * lamda12*lamda12 + quad3_0102 * lamda01*lamda02 + quad3_0112 * lamda01*lamda12 + quad3_0212 * lamda02*lamda12; b1 = bond1*bond1 - s01sq - quad1; b2 = bond2*bond2 - s02sq - quad2; b3 = bond12*bond12 - s12sq - quad3; lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3; lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3; lamda12_new = a31inv*b1 + a32inv*b2 + a33inv*b3; done = 1; if (fabs(lamda01_new-lamda01) > tolerance) done = 0; if (fabs(lamda02_new-lamda02) > tolerance) done = 0; if (fabs(lamda12_new-lamda12) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; lamda12 = lamda12_new; // stop iterations before we have a floating point overflow // max double is < 1.0e308, so 1e150 is a reasonable cutoff if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150 || fabs(lamda12) > 1e150) done = 1; niter++; } // update forces if atom is owned by this processor lamda01 = lamda01/dtfsq; lamda02 = lamda02/dtfsq; lamda12 = lamda12/dtfsq; if (i0 < nlocal) { a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0]; a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1]; a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2]; } if (i1 < nlocal) { a_f(i1,0) -= lamda01*r01[0] - lamda12*r12[0]; a_f(i1,1) -= lamda01*r01[1] - lamda12*r12[1]; a_f(i1,2) -= lamda01*r01[2] - lamda12*r12[2]; } if (i2 < nlocal) { a_f(i2,0) -= lamda02*r02[0] + lamda12*r12[0]; a_f(i2,1) -= lamda02*r02[1] + lamda12*r12[1]; a_f(i2,2) -= lamda02*r02[2] + lamda12*r12[2]; } if (EVFLAG) { int count = 0; if (i0 < nlocal) atomlist[count++] = i0; if (i1 < nlocal) atomlist[count++] = i1; if (i2 < nlocal) atomlist[count++] = i2; v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda12*r12[0]*r12[0]; v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda12*r12[1]*r12[1]; v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda12*r12[2]*r12[2]; v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda12*r12[0]*r12[1]; v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda12*r12[0]*r12[2]; v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda12*r12[1]*r12[2]; v_tally(ev,count,atomlist,3.0,v); } } /* ---------------------------------------------------------------------- print-out bond & angle statistics ------------------------------------------------------------------------- */ template void FixShakeKokkos::stats() { atomKK->sync(Host,X_MASK); k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); k_list.sync_host(); k_closest_list.sync_host(); FixShake::stats(); } /* ---------------------------------------------------------------------- allocate local atom-based arrays ------------------------------------------------------------------------- */ template void FixShakeKokkos::grow_arrays(int nmax) { memoryKK->grow_kokkos(k_shake_flag,shake_flag,nmax,"shake:shake_flag"); memoryKK->grow_kokkos(k_shake_atom,shake_atom,nmax,4,"shake:shake_atom"); memoryKK->grow_kokkos(k_shake_type,shake_type,nmax,3,"shake:shake_type"); memoryKK->destroy_kokkos(k_xshake,xshake); memoryKK->create_kokkos(k_xshake,xshake,nmax,"shake:xshake"); d_shake_flag = k_shake_flag.view(); d_shake_atom = k_shake_atom.view(); d_shake_type = k_shake_type.view(); d_xshake = k_xshake.view(); memory->destroy(ftmp); memory->create(ftmp,nmax,3,"shake:ftmp"); memory->destroy(vtmp); memory->create(vtmp,nmax,3,"shake:vtmp"); } /* ---------------------------------------------------------------------- copy values within local atom-based arrays ------------------------------------------------------------------------- */ template void FixShakeKokkos::copy_arrays(int i, int j, int delflag) { k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); FixShake::copy_arrays(i,j,delflag); k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); } /* ---------------------------------------------------------------------- sort local atom-based arrays ------------------------------------------------------------------------- */ template void FixShakeKokkos::sort_kokkos(Kokkos::BinSort &Sorter) { // always sort on the device k_shake_flag.sync_device(); k_shake_atom.sync_device(); k_shake_type.sync_device(); Sorter.sort(LMPDeviceType(), k_shake_flag.d_view); Sorter.sort(LMPDeviceType(), k_shake_atom.d_view); Sorter.sort(LMPDeviceType(), k_shake_type.d_view); k_shake_flag.modify_device(); k_shake_atom.modify_device(); k_shake_type.modify_device(); } /* ---------------------------------------------------------------------- initialize one atom's array values, called when atom is created ------------------------------------------------------------------------- */ template void FixShakeKokkos::set_arrays(int i) { k_shake_flag.sync_host(); shake_flag[i] = 0; k_shake_flag.modify_host(); } /* ---------------------------------------------------------------------- update one atom's array values called when molecule is created from fix gcmc ------------------------------------------------------------------------- */ template void FixShakeKokkos::update_arrays(int i, int atom_offset) { k_shake_flag.sync_host(); k_shake_atom.sync_host(); FixShake::update_arrays(i,atom_offset); k_shake_flag.modify_host(); k_shake_atom.modify_host(); } /* ---------------------------------------------------------------------- initialize a molecule inserted by another fix, e.g. deposit or pour called when molecule is created nlocalprev = # of atoms on this proc before molecule inserted tagprev = atom ID previous to new atoms in the molecule xgeom,vcm,quat ignored ------------------------------------------------------------------------- */ template void FixShakeKokkos::set_molecule(int nlocalprev, tagint tagprev, int imol, double * xgeom, double * vcm, double * quat) { atomKK->sync(Host,TAG_MASK|MOLECULE_MASK); k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); FixShake::set_molecule(nlocalprev,tagprev,imol,xgeom,vcm,quat); k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::pack_exchange_item(const int &mysend, int &offset, const bool &final) const { const int i = d_exchange_sendlist(mysend); int flag = d_shake_flag[i]; if (!final) { if (flag == 1) offset += 7; else if (flag == 2) offset += 4; else if (flag == 3) offset += 6; else if (flag == 4) offset += 8; else offset++; } else { d_buf[mysend] = nsend + offset; int m = nsend + offset; d_buf[m++] = flag; if (flag == 1) { d_buf[m++] = d_shake_atom(i,0); d_buf[m++] = d_shake_atom(i,1); d_buf[m++] = d_shake_atom(i,2); d_buf[m++] = d_shake_type(i,0); d_buf[m++] = d_shake_type(i,1); d_buf[m++] = d_shake_type(i,2); } else if (flag == 2) { d_buf[m++] = d_shake_atom(i,0); d_buf[m++] = d_shake_atom(i,1); d_buf[m++] = d_shake_type(i,0); } else if (flag == 3) { d_buf[m++] = d_shake_atom(i,0); d_buf[m++] = d_shake_atom(i,1); d_buf[m++] = d_shake_atom(i,2); d_buf[m++] = d_shake_type(i,0); d_buf[m++] = d_shake_type(i,1); } else if (flag == 4) { d_buf[m++] = d_shake_atom(i,0); d_buf[m++] = d_shake_atom(i,1); d_buf[m++] = d_shake_atom(i,2); d_buf[m++] = d_shake_atom(i,3); d_buf[m++] = d_shake_type(i,0); d_buf[m++] = d_shake_type(i,1); d_buf[m++] = d_shake_type(i,2); } if (mysend == nsend-1) d_count() = m; offset = m - nsend; const int j = d_copylist(mysend); if (j > -1) { d_shake_flag[i] = d_shake_flag[j]; int flag = d_shake_flag[i]; if (flag == 1) { d_shake_atom(i,0) = d_shake_atom(j,0); d_shake_atom(i,1) = d_shake_atom(j,1); d_shake_atom(i,2) = d_shake_atom(j,2); d_shake_type(i,0) = d_shake_type(j,0); d_shake_type(i,1) = d_shake_type(j,1); d_shake_type(i,2) = d_shake_type(j,2); } else if (flag == 2) { d_shake_atom(i,0) = d_shake_atom(j,0); d_shake_atom(i,1) = d_shake_atom(j,1); d_shake_type(i,0) = d_shake_type(j,0); } else if (flag == 3) { d_shake_atom(i,0) = d_shake_atom(j,0); d_shake_atom(i,1) = d_shake_atom(j,1); d_shake_atom(i,2) = d_shake_atom(j,2); d_shake_type(i,0) = d_shake_type(j,0); d_shake_type(i,1) = d_shake_type(j,1); } else if (flag == 4) { d_shake_atom(i,0) = d_shake_atom(j,0); d_shake_atom(i,1) = d_shake_atom(j,1); d_shake_atom(i,2) = d_shake_atom(j,2); d_shake_atom(i,3) = d_shake_atom(j,3); d_shake_type(i,0) = d_shake_type(j,0); d_shake_type(i,1) = d_shake_type(j,1); d_shake_type(i,2) = d_shake_type(j,2); } } } } /* ---------------------------------------------------------------------- */ template int FixShakeKokkos::pack_exchange_kokkos( const int &nsend, DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist, ExecutionSpace space) { k_buf.sync(); k_copylist.sync(); k_exchange_sendlist.sync(); d_buf = typename ArrayTypes::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); d_copylist = k_copylist.view(); d_exchange_sendlist = k_exchange_sendlist.view(); this->nsend = nsend; k_shake_flag.template sync(); k_shake_atom.template sync(); k_shake_type.template sync(); Kokkos::deep_copy(d_count,0); copymode = 1; FixShakeKokkosPackExchangeFunctor pack_exchange_functor(this); Kokkos::parallel_scan(nsend,pack_exchange_functor); copymode = 0; k_buf.modify(); if (space == Host) k_buf.sync(); else k_buf.sync(); k_shake_flag.template modify(); k_shake_atom.template modify(); k_shake_type.template modify(); Kokkos::deep_copy(h_count,d_count); return h_count(); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakeUnpackExchange, const int &i) const { int index = d_indices(i); if (index > -1) { int m = d_buf[i]; int flag = d_shake_flag[index] = static_cast (d_buf[m++]); if (flag == 1) { d_shake_atom(index,0) = static_cast (d_buf[m++]); d_shake_atom(index,1) = static_cast (d_buf[m++]); d_shake_atom(index,2) = static_cast (d_buf[m++]); d_shake_type(index,0) = static_cast (d_buf[m++]); d_shake_type(index,1) = static_cast (d_buf[m++]); d_shake_type(index,2) = static_cast (d_buf[m++]); } else if (flag == 2) { d_shake_atom(index,0) = static_cast (d_buf[m++]); d_shake_atom(index,1) = static_cast (d_buf[m++]); d_shake_type(index,0) = static_cast (d_buf[m++]); } else if (flag == 3) { d_shake_atom(index,0) = static_cast (d_buf[m++]); d_shake_atom(index,1) = static_cast (d_buf[m++]); d_shake_atom(index,2) = static_cast (d_buf[m++]); d_shake_type(index,0) = static_cast (d_buf[m++]); d_shake_type(index,1) = static_cast (d_buf[m++]); } else if (flag == 4) { d_shake_atom(index,0) = static_cast (d_buf[m++]); d_shake_atom(index,1) = static_cast (d_buf[m++]); d_shake_atom(index,2) = static_cast (d_buf[m++]); d_shake_atom(index,3) = static_cast (d_buf[m++]); d_shake_type(index,0) = static_cast (d_buf[m++]); d_shake_type(index,1) = static_cast (d_buf[m++]); d_shake_type(index,2) = static_cast (d_buf[m++]); } } } /* ---------------------------------------------------------------------- */ template void FixShakeKokkos::unpack_exchange_kokkos( DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv, ExecutionSpace /*space*/) { k_buf.sync(); k_indices.sync(); d_buf = typename ArrayTypes::t_xfloat_1d_um( k_buf.template view().data(), k_buf.extent(0)*k_buf.extent(1)); d_indices = k_indices.view(); k_shake_flag.template sync(); k_shake_atom.template sync(); k_shake_type.template sync(); copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy(0,nrecv),*this); copymode = 0; k_shake_flag.template modify(); k_shake_atom.template modify(); k_shake_type.template modify(); } /* ---------------------------------------------------------------------- pack values in local atom-based arrays for exchange with another proc ------------------------------------------------------------------------- */ template int FixShakeKokkos::pack_exchange(int i, double *buf) { k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); int m = FixShake::pack_exchange(i,buf); k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); return m; } /* ---------------------------------------------------------------------- unpack values in local atom-based arrays from exchange with another proc ------------------------------------------------------------------------- */ template int FixShakeKokkos::unpack_exchange(int nlocal, double *buf) { k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); int m = FixShake::unpack_exchange(nlocal,buf); k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); return m; } /* ---------------------------------------------------------------------- */ template int FixShakeKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &k_buf, int pbc_flag, int* pbc) { d_sendlist = k_sendlist.view(); iswap = iswap_in; d_buf = k_buf.view(); if (domain->triclinic == 0) { dx = pbc[0]*domain->xprd; dy = pbc[1]*domain->yprd; dz = pbc[2]*domain->zprd; } else { dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; dz = pbc[2]*domain->zprd; } if (pbc_flag) Kokkos::parallel_for(Kokkos::RangePolicy >(0,n),*this); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,n),*this); return n*3; } template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakePackForwardComm, const int &i) const { const int j = d_sendlist(iswap, i); if (PBC_FLAG == 0) { d_buf[3*i] = d_xshake(j,0); d_buf[3*i+1] = d_xshake(j,1); d_buf[3*i+2] = d_xshake(j,2); } else { d_buf[3*i] = d_xshake(j,0) + dx; d_buf[3*i+1] = d_xshake(j,1) + dy; d_buf[3*i+2] = d_xshake(j,2) + dz; } } /* ---------------------------------------------------------------------- */ template int FixShakeKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { k_xshake.sync_host(); int m = FixShake::pack_forward_comm(n,list,buf,pbc_flag,pbc); k_xshake.modify_host(); return m; } /* ---------------------------------------------------------------------- */ template void FixShakeKokkos::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) { first = first_in; d_buf = buf.view(); Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); } template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakeUnpackForwardComm, const int &i) const { d_xshake(i + first,0) = d_buf[3*i]; d_xshake(i + first,1) = d_buf[3*i+1]; d_xshake(i + first,2) = d_buf[3*i+2]; } /* ---------------------------------------------------------------------- */ template void FixShakeKokkos::unpack_forward_comm(int n, int first, double *buf) { k_xshake.sync_host(); FixShake::unpack_forward_comm(n,first,buf); k_xshake.modify_host(); } /* ---------------------------------------------------------------------- add coordinate constraining forces this method is called at the end of a timestep ------------------------------------------------------------------------- */ template void FixShakeKokkos::shake_end_of_step(int vflag) { dtv = update->dt; dtfsq = 0.5 * update->dt * update->dt * force->ftm2v; FixShakeKokkos::post_force(vflag); if (!rattle) dtfsq = update->dt * update->dt * force->ftm2v; } /* ---------------------------------------------------------------------- calculate constraining forces based on the current configuration change coordinates ------------------------------------------------------------------------- */ template void FixShakeKokkos::correct_coordinates(int vflag) { x = atom->x; v = atom->v; f = atom->f; mass = atom->mass; rmass = atom->rmass; type = atom->type; atomKK->sync(Host,X_MASK|V_MASK|F_MASK); // save current forces and velocities so that you // initialize them to zero such that FixShake::unconstrained_coordinate_update has no effect for (int j=0; jmodified(Host,V_MASK|F_MASK); // call SHAKE to correct the coordinates which were updated without constraints // IMPORTANT: use 1 as argument and thereby enforce velocity Verlet dtfsq = 0.5 * update->dt * update->dt * force->ftm2v; FixShakeKokkos::post_force(vflag); atomKK->sync(Host,X_MASK|F_MASK); // integrate coordinates: x' = xnp1 + dt^2/2m_i * f, where f is the constraining force // NOTE: After this command, the coordinates geometry of the molecules will be correct! double dtfmsq; if (rmass) { for (int i = 0; i < nlocal; i++) { dtfmsq = dtfsq/ rmass[i]; x[i][0] = x[i][0] + dtfmsq*f[i][0]; x[i][1] = x[i][1] + dtfmsq*f[i][1]; x[i][2] = x[i][2] + dtfmsq*f[i][2]; } } else { for (int i = 0; i < nlocal; i++) { dtfmsq = dtfsq / mass[type[i]]; x[i][0] = x[i][0] + dtfmsq*f[i][0]; x[i][1] = x[i][1] + dtfmsq*f[i][1]; x[i][2] = x[i][2] + dtfmsq*f[i][2]; } } // copy forces and velocities back for (int j=0; jdt * update->dt * force->ftm2v; // communicate changes // NOTE: for compatibility xshake is temporarily set to x, such that pack/unpack_forward // can be used for communicating the coordinates. double **xtmp = xshake; xshake = x; forward_comm_device = 0; comm->forward_comm(this); forward_comm_device = 1; xshake = xtmp; atomKK->modified(Host,X_MASK|V_MASK|F_MASK); } /* ---------------------------------------------------------------------- tally virial into global and per-atom accumulators n = # of local owned atoms involved, with local indices in list v = total virial for the interaction involving total atoms increment global virial by n/total fraction increment per-atom virial of each atom in list by 1/total fraction this method can be used when fix computes forces in post_force() e.g. fix shake, fix rigid: compute virial only on owned atoms whether newton_bond is on or off other procs will tally left-over fractions for atoms they own ------------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::v_tally(EV_FLOAT &ev, int n, int *atomlist, double total, double *v) const { int m; if (vflag_global) { double fraction = n/total; ev.v[0] += fraction*v[0]; ev.v[1] += fraction*v[1]; ev.v[2] += fraction*v[2]; ev.v[3] += fraction*v[3]; ev.v[4] += fraction*v[4]; ev.v[5] += fraction*v[5]; } if (vflag_atom) { double fraction = 1.0/total; for (int i = 0; i < n; i++) { auto v_vatom = ScatterViewHelper,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access>(); m = atomlist[i]; a_vatom(m,0) += fraction*v[0]; a_vatom(m,1) += fraction*v[1]; a_vatom(m,2) += fraction*v[2]; a_vatom(m,3) += fraction*v[3]; a_vatom(m,4) += fraction*v[4]; a_vatom(m,5) += fraction*v[5]; } } } /* ---------------------------------------------------------------------- return local index of atom J or any of its images that is closest to atom I if J is not a valid index like -1, just return it copied from domain.cpp ------------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION int FixShakeKokkos::closest_image(const int i, int j) const { if (j < 0) return j; const X_FLOAT xi0 = d_x(i,0); const X_FLOAT xi1 = d_x(i,1); const X_FLOAT xi2 = d_x(i,2); int closest = j; X_FLOAT delx = xi0 - d_x(j,0); X_FLOAT dely = xi1 - d_x(j,1); X_FLOAT delz = xi2 - d_x(j,2); X_FLOAT rsqmin = delx*delx + dely*dely + delz*delz; X_FLOAT rsq; while (d_sametag[j] >= 0) { j = d_sametag[j]; delx = xi0 - d_x(j,0); dely = xi1 - d_x(j,1); delz = xi2 - d_x(j,2); rsq = delx*delx + dely*dely + delz*delz; if (rsq < rsqmin) { rsqmin = rsq; closest = j; } } return closest; } /* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { template class FixShakeKokkos; #ifdef LMP_KOKKOS_GPU template class FixShakeKokkos; #endif }