// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include #include #include #include #include #include "fix_shake_kokkos.h" #include "fix_rattle.h" #include "atom_kokkos.h" #include "atom_vec.h" #include "molecule.h" #include "update.h" #include "respa.h" #include "modify.h" #include "domain.h" #include "force.h" #include "bond.h" #include "angle.h" #include "comm.h" #include "group.h" #include "fix_respa.h" #include "math_const.h" #include "memory_kokkos.h" #include "error.h" #include "kokkos.h" #include "atom_masks.h" using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; #define RVOUS 1 // 0 for irregular, 1 for all2all #define BIG 1.0e20 #define MASSDELTA 0.1 /* ---------------------------------------------------------------------- */ template FixShakeKokkos::FixShakeKokkos(LAMMPS *lmp, int narg, char **arg) : FixShake(lmp, narg, arg) { kokkosable = 1; forward_comm_device = 1; atomKK = (AtomKokkos *)atom; execution_space = ExecutionSpaceFromDevice::space; datamask_read = EMPTY_MASK; datamask_modify = EMPTY_MASK; shake_flag_tmp = shake_flag; shake_atom_tmp = shake_atom; shake_type_tmp = shake_type; shake_flag = nullptr; shake_atom = nullptr; shake_type = nullptr; int nmax = atom->nmax; grow_arrays(nmax); for (int i = 0; i < nmax; i++) { k_shake_flag.h_view[i] = shake_flag_tmp[i]; k_shake_atom.h_view(i,0) = shake_atom_tmp[i][0]; k_shake_atom.h_view(i,1) = shake_atom_tmp[i][1]; k_shake_atom.h_view(i,2) = shake_atom_tmp[i][2]; k_shake_atom.h_view(i,3) = shake_atom_tmp[i][3]; k_shake_type.h_view(i,0) = shake_type_tmp[i][0]; k_shake_type.h_view(i,1) = shake_type_tmp[i][1]; k_shake_type.h_view(i,2) = shake_type_tmp[i][2]; } k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); k_bond_distance = DAT::tdual_float_1d("fix_shake:bond_distance",atom->nbondtypes+1); k_angle_distance = DAT::tdual_float_1d("fix_shake:angle_distance",atom->nangletypes+1); d_bond_distance = k_bond_distance.view(); d_angle_distance = k_angle_distance.view(); // use 1D view for scalars to reduce GPU memory operations d_scalars = typename AT::t_int_1d("neighbor:scalars",2); h_scalars = HAT::t_int_1d("neighbor:scalars_mirror",2); d_error_flag = Kokkos::subview(d_scalars,0); d_nlist = Kokkos::subview(d_scalars,1); h_error_flag = Kokkos::subview(h_scalars,0); h_nlist = Kokkos::subview(h_scalars,1); memory->destroy(shake_flag_tmp); memory->destroy(shake_atom_tmp); memory->destroy(shake_type_tmp); } /* ---------------------------------------------------------------------- */ template FixShakeKokkos::~FixShakeKokkos() { if (copymode) return; k_shake_flag.sync_host(); k_shake_atom.sync_host(); for (int i = 0; i < nlocal; i++) { if (shake_flag[i] == 0) continue; else if (shake_flag[i] == 1) { bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1); bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1); angletype_findset(i,shake_atom[i][1],shake_atom[i][2],1); } else if (shake_flag[i] == 2) { bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1); } else if (shake_flag[i] == 3) { bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1); bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1); } else if (shake_flag[i] == 4) { bondtype_findset(i,shake_atom[i][0],shake_atom[i][1],1); bondtype_findset(i,shake_atom[i][0],shake_atom[i][2],1); bondtype_findset(i,shake_atom[i][0],shake_atom[i][3],1); } } memoryKK->destroy_kokkos(k_shake_flag,shake_flag); memoryKK->destroy_kokkos(k_shake_atom,shake_atom); memoryKK->destroy_kokkos(k_shake_type,shake_type); memoryKK->destroy_kokkos(k_xshake,xshake); memoryKK->destroy_kokkos(k_list,list); memoryKK->destroy_kokkos(k_vatom,vatom); } /* ---------------------------------------------------------------------- set bond and angle distances this init must happen after force->bond and force->angle inits ------------------------------------------------------------------------- */ template void FixShakeKokkos::init() { FixShake::init(); if (utils::strmatch(update->integrate_style,"^respa")) error->all(FLERR,"Cannot yet use respa with Kokkos"); if (rattle) error->all(FLERR,"Cannot yet use KOKKOS package with fix rattle"); // set equilibrium bond distances for (int i = 1; i <= atom->nbondtypes; i++) k_bond_distance.h_view[i] = bond_distance[i]; // set equilibrium angle distances for (int i = 1; i <= atom->nangletypes; i++) k_angle_distance.h_view[i] = angle_distance[i]; k_bond_distance.modify_host(); k_angle_distance.modify_host(); k_bond_distance.sync(); k_angle_distance.sync(); } /* ---------------------------------------------------------------------- build list of SHAKE clusters to constrain if one or more atoms in cluster are on this proc, this proc lists the cluster exactly once ------------------------------------------------------------------------- */ template void FixShakeKokkos::pre_neighbor() { // local copies of atom quantities // used by SHAKE until next re-neighboring x = atom->x; v = atom->v; f = atom->f; mass = atom->mass; rmass = atom->rmass; type = atom->type; nlocal = atom->nlocal; map_style = atom->map_style; if (map_style == Atom::MAP_ARRAY) { k_map_array = atomKK->k_map_array; k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; } k_shake_flag.sync(); k_shake_atom.sync(); // extend size of SHAKE list if necessary if (nlocal > maxlist) { maxlist = nlocal; memoryKK->destroy_kokkos(k_list,list); memoryKK->create_kokkos(k_list,list,maxlist,"shake:list"); d_list = k_list.view(); } // Atom Map map_style = atom->map_style; if (map_style == Atom::MAP_ARRAY) { k_map_array = atomKK->k_map_array; k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; } // build list of SHAKE clusters I compute Kokkos::deep_copy(d_scalars,0); { // local variables for lambda capture auto d_shake_flag = this->d_shake_flag; auto d_shake_atom = this->d_shake_atom; auto d_list = this->d_list; auto d_error_flag = this->d_error_flag; auto d_nlist = this->d_nlist; auto map_style = atom->map_style; auto k_map_array = this->k_map_array; auto k_map_hash = this->k_map_hash; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i) { if (d_shake_flag[i]) { if (d_shake_flag[i] == 2) { const int atom1 = AtomKokkos::map_kokkos(d_shake_atom(i,0),map_style,k_map_array,k_map_hash); const int atom2 = AtomKokkos::map_kokkos(d_shake_atom(i,1),map_style,k_map_array,k_map_hash); if (atom1 == -1 || atom2 == -1) { d_error_flag() = 1; } if (i <= atom1 && i <= atom2) { const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1); d_list[nlist] = i; } } else if (d_shake_flag[i] % 2 == 1) { const int atom1 = AtomKokkos::map_kokkos(d_shake_atom(i,0),map_style,k_map_array,k_map_hash); const int atom2 = AtomKokkos::map_kokkos(d_shake_atom(i,1),map_style,k_map_array,k_map_hash); const int atom3 = AtomKokkos::map_kokkos(d_shake_atom(i,2),map_style,k_map_array,k_map_hash); if (atom1 == -1 || atom2 == -1 || atom3 == -1) d_error_flag() = 1; if (i <= atom1 && i <= atom2 && i <= atom3) { const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1); d_list[nlist] = i; } } else { const int atom1 = AtomKokkos::map_kokkos(d_shake_atom(i,0),map_style,k_map_array,k_map_hash); const int atom2 = AtomKokkos::map_kokkos(d_shake_atom(i,1),map_style,k_map_array,k_map_hash); const int atom3 = AtomKokkos::map_kokkos(d_shake_atom(i,2),map_style,k_map_array,k_map_hash); const int atom4 = AtomKokkos::map_kokkos(d_shake_atom(i,3),map_style,k_map_array,k_map_hash); if (atom1 == -1 || atom2 == -1 || atom3 == -1 || atom4 == -1) d_error_flag() = 1; if (i <= atom1 && i <= atom2 && i <= atom3 && i <= atom4) { const int nlist = Kokkos::atomic_fetch_add(&d_nlist(),1); d_list[nlist] = i; } } } }); } Kokkos::deep_copy(h_scalars,d_scalars); nlist = h_nlist(); if (h_error_flag() == 1) { error->one(FLERR,"Shake atoms missing on proc " "{} at step {}",me,update->ntimestep); } } /* ---------------------------------------------------------------------- compute the force adjustment for SHAKE constraint ------------------------------------------------------------------------- */ template void FixShakeKokkos::post_force(int vflag) { copymode = 1; d_x = atomKK->k_x.view(); d_f = atomKK->k_f.view(); d_type = atomKK->k_type.view(); d_rmass = atomKK->k_rmass.view(); d_mass = atomKK->k_mass.view(); nlocal = atomKK->nlocal; map_style = atom->map_style; if (map_style == Atom::MAP_ARRAY) { k_map_array = atomKK->k_map_array; k_map_array.template sync(); } else if (map_style == Atom::MAP_HASH) { k_map_hash = atomKK->k_map_hash; } if (d_rmass.data()) atomKK->sync(execution_space,X_MASK|F_MASK|RMASS_MASK); else atomKK->sync(execution_space,X_MASK|F_MASK|TYPE_MASK); k_shake_flag.sync(); k_shake_atom.sync(); k_shake_type.sync(); if (update->ntimestep == next_output) { atomKK->sync(Host,X_MASK); k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); stats(); } // xshake = unconstrained move with current v,f // communicate results if necessary unconstrained_update(); if (nprocs > 1) comm->forward_comm_fix(this); k_xshake.sync(); // virial setup v_init(vflag); // reallocate per-atom arrays if necessary if (vflag_atom) { memoryKK->destroy_kokkos(k_vatom,vatom); memoryKK->create_kokkos(k_vatom,vatom,maxvatom,"improper:vatom"); d_vatom = k_vatom.template view(); } neighflag = lmp->kokkos->neighflag; // FULL neighlist still needs atomics in fix shake if (neighflag == FULL) { if (lmp->kokkos->nthreads > 1 || lmp->kokkos->ngpus > 0) neighflag = HALFTHREAD; else neighflag = HALF; } need_dup = 0; if (neighflag != HALF) need_dup = std::is_same::value,Kokkos::Experimental::ScatterDuplicated>::value; // allocate duplicated memory if (need_dup) { dup_f = Kokkos::Experimental::create_scatter_view(d_f); dup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); } else { ndup_f = Kokkos::Experimental::create_scatter_view(d_f); ndup_vatom = Kokkos::Experimental::create_scatter_view(d_vatom); } Kokkos::deep_copy(d_error_flag,0); update_domain_variables(); EV_FLOAT ev; // loop over clusters to add constraint forces if (neighflag == HALF) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlist),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,nlist),*this); } else { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,nlist),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,nlist),*this); } copymode = 0; Kokkos::deep_copy(h_error_flag,d_error_flag); if (h_error_flag() == 2) error->warning(FLERR,"Shake determinant < 0.0"); else if (h_error_flag() == 3) error->one(FLERR,"Shake determinant = 0.0"); // store vflag for coordinate_constraints_end_of_step() vflag_post_force = vflag; // reduction over duplicated memory if (need_dup) Kokkos::Experimental::contribute(d_f,dup_f); atomKK->modified(execution_space,F_MASK); if (vflag_global) { virial[0] += ev.v[0]; virial[1] += ev.v[1]; virial[2] += ev.v[2]; virial[3] += ev.v[3]; virial[4] += ev.v[4]; virial[5] += ev.v[5]; } if (vflag_atom) { k_vatom.template modify(); k_vatom.template sync(); } // free duplicated memory if (need_dup) { dup_f = decltype(dup_f)(); dup_vatom = decltype(dup_vatom)(); } } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakePostForce, const int &i, EV_FLOAT& ev) const { const int m = d_list[i]; if (d_shake_flag[m] == 2) shake(m,ev); else if (d_shake_flag[m] == 3) shake3(m,ev); else if (d_shake_flag[m] == 4) shake4(m,ev); else shake3angle(m,ev); } template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakePostForce, const int &i) const { EV_FLOAT ev; this->template operator()(TagFixShakePostForce(), i, ev); } /* ---------------------------------------------------------------------- count # of degrees-of-freedom removed by SHAKE for atoms in igroup ------------------------------------------------------------------------- */ template int FixShakeKokkos::dof(int igroup) { d_mask = atomKK->k_mask.view(); d_tag = atomKK->k_tag.view(); nlocal = atom->nlocal; atomKK->sync(execution_space,MASK_MASK|TAG_MASK); k_shake_flag.sync(); k_shake_atom.sync(); // count dof in a cluster if and only if // the central atom is in group and atom i is the central atom int n = 0; { // local variables for lambda capture auto d_shake_flag = this->d_shake_flag; auto d_shake_atom = this->d_shake_atom; auto tag = this->d_tag; auto mask = this->d_mask; auto groupbit = group->bitmask[igroup]; Kokkos::parallel_reduce(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i, int& n) { if (!(mask[i] & groupbit)) return; if (d_shake_flag[i] == 0) return; if (d_shake_atom(i,0) != tag[i]) return; if (d_shake_flag[i] == 1) n += 3; else if (d_shake_flag[i] == 2) n += 1; else if (d_shake_flag[i] == 3) n += 2; else if (d_shake_flag[i] == 4) n += 3; },n); } int nall; MPI_Allreduce(&n,&nall,1,MPI_INT,MPI_SUM,world); return nall; } /* ---------------------------------------------------------------------- assumes NVE update, seems to be accurate enough for NVT,NPT,NPH as well ------------------------------------------------------------------------- */ template void FixShakeKokkos::unconstrained_update() { d_x = atomKK->k_x.view(); d_v = atomKK->k_v.view(); d_f = atomKK->k_f.view(); d_type = atomKK->k_type.view(); d_rmass = atomKK->k_rmass.view(); d_mass = atomKK->k_mass.view(); nlocal = atom->nlocal; if (d_rmass.data()) atomKK->sync(execution_space,X_MASK|V_MASK|F_MASK|RMASS_MASK); else atomKK->sync(execution_space,X_MASK|V_MASK|F_MASK|TYPE_MASK); k_shake_flag.sync(); k_xshake.sync(); { // local variables for lambda capture auto d_shake_flag = this->d_shake_flag; auto d_xshake = this->d_xshake; auto x = this->d_x; auto v = this->d_v; auto f = this->d_f; auto dtfsq = this->dtfsq; auto dtv = this->dtv; if (d_rmass.data()) { auto rmass = this->d_rmass; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i) { if (d_shake_flag[i]) { const double dtfmsq = dtfsq / rmass[i]; d_xshake(i,0) = x(i,0) + dtv*v(i,0) + dtfmsq*f(i,0); d_xshake(i,1) = x(i,1) + dtv*v(i,1) + dtfmsq*f(i,1); d_xshake(i,2) = x(i,2) + dtv*v(i,2) + dtfmsq*f(i,2); } else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0.0; }); } else { auto mass = this->d_mass; auto type = this->d_type; Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal), LAMMPS_LAMBDA(const int& i) { if (d_shake_flag[i]) { const double dtfmsq = dtfsq / mass[type[i]]; d_xshake(i,0) = x(i,0) + dtv*v(i,0) + dtfmsq*f(i,0); d_xshake(i,1) = x(i,1) + dtv*v(i,1) + dtfmsq*f(i,1); d_xshake(i,2) = x(i,2) + dtv*v(i,2) + dtfmsq*f(i,2); } else d_xshake(i,2) = d_xshake(i,1) = d_xshake(i,0) = 0.0; }); } } k_xshake.modify(); } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake(int m, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); int nlist,list[2]; double v[6]; double invmass0,invmass1; // local atom IDs and constraint distances int i0 = AtomKokkos::map_kokkos(d_shake_atom(m,0),map_style,k_map_array,k_map_hash); int i1 = AtomKokkos::map_kokkos(d_shake_atom(m,1),map_style,k_map_array,k_map_hash); double bond1 = d_bond_distance[d_shake_type(m,0)]; // r01 = distance vec between atoms, with PBC double r01[3]; r01[0] = d_x(i0,0) - d_x(i1,0); r01[1] = d_x(i0,1) - d_x(i1,1); r01[2] = d_x(i0,2) - d_x(i1,2); minimum_image(r01); // s01 = distance vec after unconstrained update, with PBC // use Domain::minimum_image_once(), not minimum_image() // b/c xshake values might be huge, due to e.g. fix gcmc double s01[3]; s01[0] = d_xshake(i0,0) - d_xshake(i1,0); s01[1] = d_xshake(i0,1) - d_xshake(i1,1); s01[2] = d_xshake(i0,2) - d_xshake(i1,2); minimum_image_once(s01); // scalar distances between atoms double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; // a,b,c = coeffs in quadratic equation for lamda if (d_rmass.data()) { invmass0 = 1.0/d_rmass[i0]; invmass1 = 1.0/d_rmass[i1]; } else { invmass0 = 1.0/d_mass[d_type[i0]]; invmass1 = 1.0/d_mass[d_type[i1]]; } double a = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; double b = 2.0 * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); double c = s01sq - bond1*bond1; // error check double determ = b*b - 4.0*a*c; if (determ < 0.0) { //error->warning(FLERR,"Shake determinant < 0.0",0); d_error_flag() = 2; determ = 0.0; } // exact quadratic solution for lamda double lamda,lamda1,lamda2; lamda1 = (-b+sqrt(determ)) / (2.0*a); lamda2 = (-b-sqrt(determ)) / (2.0*a); if (fabs(lamda1) <= fabs(lamda2)) lamda = lamda1; else lamda = lamda2; // update forces if atom is owned by this processor lamda /= dtfsq; if (i0 < nlocal) { a_f(i0,0) += lamda*r01[0]; a_f(i0,1) += lamda*r01[1]; a_f(i0,2) += lamda*r01[2]; } if (i1 < nlocal) { a_f(i1,0) -= lamda*r01[0]; a_f(i1,1) -= lamda*r01[1]; a_f(i1,2) -= lamda*r01[2]; } if (EVFLAG) { nlist = 0; if (i0 < nlocal) list[nlist++] = i0; if (i1 < nlocal) list[nlist++] = i1; v[0] = lamda*r01[0]*r01[0]; v[1] = lamda*r01[1]*r01[1]; v[2] = lamda*r01[2]*r01[2]; v[3] = lamda*r01[0]*r01[1]; v[4] = lamda*r01[0]*r01[2]; v[5] = lamda*r01[1]*r01[2]; v_tally(ev,nlist,list,2.0,v); } } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake3(int m, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); int nlist,list[3]; double v[6]; double invmass0,invmass1,invmass2; // local atom IDs and constraint distances int i0 = AtomKokkos::map_kokkos(d_shake_atom(m,0),map_style,k_map_array,k_map_hash); int i1 = AtomKokkos::map_kokkos(d_shake_atom(m,1),map_style,k_map_array,k_map_hash); int i2 = AtomKokkos::map_kokkos(d_shake_atom(m,2),map_style,k_map_array,k_map_hash); double bond1 = d_bond_distance[d_shake_type(m,0)]; double bond2 = d_bond_distance[d_shake_type(m,1)]; // r01,r02 = distance vec between atoms, with PBC double r01[3]; r01[0] = d_x(i0,0) - d_x(i1,0); r01[1] = d_x(i0,1) - d_x(i1,1); r01[2] = d_x(i0,2) - d_x(i1,2); minimum_image(r01); double r02[3]; r02[0] = d_x(i0,0) - d_x(i2,0); r02[1] = d_x(i0,1) - d_x(i2,1); r02[2] = d_x(i0,2) - d_x(i2,2); minimum_image(r02); // s01,s02 = distance vec after unconstrained update, with PBC // use Domain::minimum_image_once(), not minimum_image() // b/c xshake values might be huge, due to e.g. fix gcmc double s01[3]; s01[0] = d_xshake(i0,0) - d_xshake(i1,0); s01[1] = d_xshake(i0,1) - d_xshake(i1,1); s01[2] = d_xshake(i0,2) - d_xshake(i1,2); minimum_image_once(s01); double s02[3]; s02[0] = d_xshake(i0,0) - d_xshake(i2,0); s02[1] = d_xshake(i0,1) - d_xshake(i2,1); s02[2] = d_xshake(i0,2) - d_xshake(i2,2); minimum_image_once(s02); // scalar distances between atoms double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { invmass0 = 1.0/d_rmass[i0]; invmass1 = 1.0/d_rmass[i1]; invmass2 = 1.0/d_rmass[i2]; } else { invmass0 = 1.0/d_mass[d_type[i0]]; invmass1 = 1.0/d_mass[d_type[i1]]; invmass2 = 1.0/d_mass[d_type[i2]]; } double a11 = 2.0 * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); double a12 = 2.0 * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); double a21 = 2.0 * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); double a22 = 2.0 * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); // inverse of matrix double determ = a11*a22 - a12*a21; if (determ == 0.0) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); double determinv = 1.0/determ; double a11inv = a22*determinv; double a12inv = -a12*determinv; double a21inv = -a21*determinv; double a22inv = a11*determinv; // quadratic correction coeffs double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; double quad1_0202 = invmass0*invmass0 * r02sq; double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; double quad2_0101 = invmass0*invmass0 * r01sq; double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; // iterate until converged double lamda01 = 0.0; double lamda02 = 0.0; int niter = 0; int done = 0; double quad1,quad2,b1,b2,lamda01_new,lamda02_new; while (!done && niter < max_iter) { quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 + quad1_0102 * lamda01*lamda02; quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 + quad2_0102 * lamda01*lamda02; b1 = bond1*bond1 - s01sq - quad1; b2 = bond2*bond2 - s02sq - quad2; lamda01_new = a11inv*b1 + a12inv*b2; lamda02_new = a21inv*b1 + a22inv*b2; done = 1; if (fabs(lamda01_new-lamda01) > tolerance) done = 0; if (fabs(lamda02_new-lamda02) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; // stop iterations before we have a floating point overflow // max double is < 1.0e308, so 1e150 is a reasonable cutoff if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150) done = 1; niter++; } // update forces if atom is owned by this processor lamda01 = lamda01/dtfsq; lamda02 = lamda02/dtfsq; if (i0 < nlocal) { a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0]; a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1]; a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2]; } if (i1 < nlocal) { a_f(i1,0) -= lamda01*r01[0]; a_f(i1,1) -= lamda01*r01[1]; a_f(i1,2) -= lamda01*r01[2]; } if (i2 < nlocal) { a_f(i2,0) -= lamda02*r02[0]; a_f(i2,1) -= lamda02*r02[1]; a_f(i2,2) -= lamda02*r02[2]; } if (EVFLAG) { nlist = 0; if (i0 < nlocal) list[nlist++] = i0; if (i1 < nlocal) list[nlist++] = i1; if (i2 < nlocal) list[nlist++] = i2; v[0] = lamda01*r01[0]*r01[0] + lamda02*r02[0]*r02[0]; v[1] = lamda01*r01[1]*r01[1] + lamda02*r02[1]*r02[1]; v[2] = lamda01*r01[2]*r01[2] + lamda02*r02[2]*r02[2]; v[3] = lamda01*r01[0]*r01[1] + lamda02*r02[0]*r02[1]; v[4] = lamda01*r01[0]*r01[2] + lamda02*r02[0]*r02[2]; v[5] = lamda01*r01[1]*r01[2] + lamda02*r02[1]*r02[2]; v_tally(ev,nlist,list,3.0,v); } } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake4(int m, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); int nlist,list[4]; double v[6]; double invmass0,invmass1,invmass2,invmass3; // local atom IDs and constraint distances int i0 = AtomKokkos::map_kokkos(d_shake_atom(m,0),map_style,k_map_array,k_map_hash); int i1 = AtomKokkos::map_kokkos(d_shake_atom(m,1),map_style,k_map_array,k_map_hash); int i2 = AtomKokkos::map_kokkos(d_shake_atom(m,2),map_style,k_map_array,k_map_hash); int i3 = AtomKokkos::map_kokkos(d_shake_atom(m,3),map_style,k_map_array,k_map_hash); double bond1 = d_bond_distance[d_shake_type(m,0)]; double bond2 = d_bond_distance[d_shake_type(m,1)]; double bond3 = d_bond_distance[d_shake_type(m,2)]; // r01,r02,r03 = distance vec between atoms, with PBC double r01[3]; r01[0] = d_x(i0,0) - d_x(i1,0); r01[1] = d_x(i0,1) - d_x(i1,1); r01[2] = d_x(i0,2) - d_x(i1,2); minimum_image(r01); double r02[3]; r02[0] = d_x(i0,0) - d_x(i2,0); r02[1] = d_x(i0,1) - d_x(i2,1); r02[2] = d_x(i0,2) - d_x(i2,2); minimum_image(r02); double r03[3]; r03[0] = d_x(i0,0) - d_x(i3,0); r03[1] = d_x(i0,1) - d_x(i3,1); r03[2] = d_x(i0,2) - d_x(i3,2); minimum_image(r03); // s01,s02,s03 = distance vec after unconstrained update, with PBC // use Domain::minimum_image_once(), not minimum_image() // b/c xshake values might be huge, due to e.g. fix gcmc double s01[3]; s01[0] = d_xshake(i0,0) - d_xshake(i1,0); s01[1] = d_xshake(i0,1) - d_xshake(i1,1); s01[2] = d_xshake(i0,2) - d_xshake(i1,2); minimum_image_once(s01); double s02[3]; s02[0] = d_xshake(i0,0) - d_xshake(i2,0); s02[1] = d_xshake(i0,1) - d_xshake(i2,1); s02[2] = d_xshake(i0,2) - d_xshake(i2,2); minimum_image_once(s02); double s03[3]; s03[0] = d_xshake(i0,0) - d_xshake(i3,0); s03[1] = d_xshake(i0,1) - d_xshake(i3,1); s03[2] = d_xshake(i0,2) - d_xshake(i3,2); minimum_image_once(s03); // scalar distances between atoms double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; double r03sq = r03[0]*r03[0] + r03[1]*r03[1] + r03[2]*r03[2]; double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; double s03sq = s03[0]*s03[0] + s03[1]*s03[1] + s03[2]*s03[2]; // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { invmass0 = 1.0/d_rmass[i0]; invmass1 = 1.0/d_rmass[i1]; invmass2 = 1.0/d_rmass[i2]; invmass3 = 1.0/d_rmass[i3]; } else { invmass0 = 1.0/d_mass[d_type[i0]]; invmass1 = 1.0/d_mass[d_type[i1]]; invmass2 = 1.0/d_mass[d_type[i2]]; invmass3 = 1.0/d_mass[d_type[i3]]; } double a11 = 2.0 * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); double a12 = 2.0 * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); double a13 = 2.0 * invmass0 * (s01[0]*r03[0] + s01[1]*r03[1] + s01[2]*r03[2]); double a21 = 2.0 * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); double a22 = 2.0 * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); double a23 = 2.0 * invmass0 * (s02[0]*r03[0] + s02[1]*r03[1] + s02[2]*r03[2]); double a31 = 2.0 * invmass0 * (s03[0]*r01[0] + s03[1]*r01[1] + s03[2]*r01[2]); double a32 = 2.0 * invmass0 * (s03[0]*r02[0] + s03[1]*r02[1] + s03[2]*r02[2]); double a33 = 2.0 * (invmass0+invmass3) * (s03[0]*r03[0] + s03[1]*r03[1] + s03[2]*r03[2]); // inverse of matrix; double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 - a13*a22*a31; if (determ == 0.0) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); double determinv = 1.0/determ; double a11inv = determinv * (a22*a33 - a23*a32); double a12inv = -determinv * (a12*a33 - a13*a32); double a13inv = determinv * (a12*a23 - a13*a22); double a21inv = -determinv * (a21*a33 - a23*a31); double a22inv = determinv * (a11*a33 - a13*a31); double a23inv = -determinv * (a11*a23 - a13*a21); double a31inv = determinv * (a21*a32 - a22*a31); double a32inv = -determinv * (a11*a32 - a12*a31); double a33inv = determinv * (a11*a22 - a12*a21); // quadratic correction coeffs double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); double r0103 = (r01[0]*r03[0] + r01[1]*r03[1] + r01[2]*r03[2]); double r0203 = (r02[0]*r03[0] + r02[1]*r03[1] + r02[2]*r03[2]); double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; double quad1_0202 = invmass0*invmass0 * r02sq; double quad1_0303 = invmass0*invmass0 * r03sq; double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; double quad1_0103 = 2.0 * (invmass0+invmass1)*invmass0 * r0103; double quad1_0203 = 2.0 * invmass0*invmass0 * r0203; double quad2_0101 = invmass0*invmass0 * r01sq; double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; double quad2_0303 = invmass0*invmass0 * r03sq; double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; double quad2_0103 = 2.0 * invmass0*invmass0 * r0103; double quad2_0203 = 2.0 * (invmass0+invmass2)*invmass0 * r0203; double quad3_0101 = invmass0*invmass0 * r01sq; double quad3_0202 = invmass0*invmass0 * r02sq; double quad3_0303 = (invmass0+invmass3)*(invmass0+invmass3) * r03sq; double quad3_0102 = 2.0 * invmass0*invmass0 * r0102; double quad3_0103 = 2.0 * (invmass0+invmass3)*invmass0 * r0103; double quad3_0203 = 2.0 * (invmass0+invmass3)*invmass0 * r0203; // iterate until converged double lamda01 = 0.0; double lamda02 = 0.0; double lamda03 = 0.0; int niter = 0; int done = 0; double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda03_new; while (!done && niter < max_iter) { quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 + quad1_0303 * lamda03*lamda03 + quad1_0102 * lamda01*lamda02 + quad1_0103 * lamda01*lamda03 + quad1_0203 * lamda02*lamda03; quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 + quad2_0303 * lamda03*lamda03 + quad2_0102 * lamda01*lamda02 + quad2_0103 * lamda01*lamda03 + quad2_0203 * lamda02*lamda03; quad3 = quad3_0101 * lamda01*lamda01 + quad3_0202 * lamda02*lamda02 + quad3_0303 * lamda03*lamda03 + quad3_0102 * lamda01*lamda02 + quad3_0103 * lamda01*lamda03 + quad3_0203 * lamda02*lamda03; b1 = bond1*bond1 - s01sq - quad1; b2 = bond2*bond2 - s02sq - quad2; b3 = bond3*bond3 - s03sq - quad3; lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3; lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3; lamda03_new = a31inv*b1 + a32inv*b2 + a33inv*b3; done = 1; if (fabs(lamda01_new-lamda01) > tolerance) done = 0; if (fabs(lamda02_new-lamda02) > tolerance) done = 0; if (fabs(lamda03_new-lamda03) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; lamda03 = lamda03_new; // stop iterations before we have a floating point overflow // max double is < 1.0e308, so 1e150 is a reasonable cutoff if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150 || fabs(lamda03) > 1e150) done = 1; niter++; } // update forces if atom is owned by this processor lamda01 = lamda01/dtfsq; lamda02 = lamda02/dtfsq; lamda03 = lamda03/dtfsq; if (i0 < nlocal) { a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0] + lamda03*r03[0]; a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1] + lamda03*r03[1]; a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2] + lamda03*r03[2]; } if (i1 < nlocal) { a_f(i1,0) -= lamda01*r01[0]; a_f(i1,1) -= lamda01*r01[1]; a_f(i1,2) -= lamda01*r01[2]; } if (i2 < nlocal) { a_f(i2,0) -= lamda02*r02[0]; a_f(i2,1) -= lamda02*r02[1]; a_f(i2,2) -= lamda02*r02[2]; } if (i3 < nlocal) { a_f(i3,0) -= lamda03*r03[0]; a_f(i3,1) -= lamda03*r03[1]; a_f(i3,2) -= lamda03*r03[2]; } if (EVFLAG) { nlist = 0; if (i0 < nlocal) list[nlist++] = i0; if (i1 < nlocal) list[nlist++] = i1; if (i2 < nlocal) list[nlist++] = i2; if (i3 < nlocal) list[nlist++] = i3; v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda03*r03[0]*r03[0]; v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda03*r03[1]*r03[1]; v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda03*r03[2]*r03[2]; v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda03*r03[0]*r03[1]; v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda03*r03[0]*r03[2]; v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda03*r03[1]*r03[2]; v_tally(ev,nlist,list,4.0,v); } } /* ---------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::shake3angle(int m, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_f = ScatterViewHelper::value,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); auto a_f = v_f.template access::value>(); int nlist,list[3]; double v[6]; double invmass0,invmass1,invmass2; // local atom IDs and constraint distances int i0 = AtomKokkos::map_kokkos(d_shake_atom(m,0),map_style,k_map_array,k_map_hash); int i1 = AtomKokkos::map_kokkos(d_shake_atom(m,1),map_style,k_map_array,k_map_hash); int i2 = AtomKokkos::map_kokkos(d_shake_atom(m,2),map_style,k_map_array,k_map_hash); double bond1 = d_bond_distance[d_shake_type(m,0)]; double bond2 = d_bond_distance[d_shake_type(m,1)]; double bond12 = d_angle_distance[d_shake_type(m,2)]; // r01,r02,r12 = distance vec between atoms, with PBC double r01[3]; r01[0] = d_x(i0,0) - d_x(i1,0); r01[1] = d_x(i0,1) - d_x(i1,1); r01[2] = d_x(i0,2) - d_x(i1,2); minimum_image(r01); double r02[3]; r02[0] = d_x(i0,0) - d_x(i2,0); r02[1] = d_x(i0,1) - d_x(i2,1); r02[2] = d_x(i0,2) - d_x(i2,2); minimum_image(r02); double r12[3]; r12[0] = d_x(i1,0) - d_x(i2,0); r12[1] = d_x(i1,1) - d_x(i2,1); r12[2] = d_x(i1,2) - d_x(i2,2); minimum_image(r12); // s01,s02,s12 = distance vec after unconstrained update, with PBC // use Domain::minimum_image_once(), not minimum_image() // b/c xshake values might be huge, due to e.g. fix gcmc double s01[3]; s01[0] = d_xshake(i0,0) - d_xshake(i1,0); s01[1] = d_xshake(i0,1) - d_xshake(i1,1); s01[2] = d_xshake(i0,2) - d_xshake(i1,2); minimum_image_once(s01); double s02[3]; s02[0] = d_xshake(i0,0) - d_xshake(i2,0); s02[1] = d_xshake(i0,1) - d_xshake(i2,1); s02[2] = d_xshake(i0,2) - d_xshake(i2,2); minimum_image_once(s02); double s12[3]; s12[0] = d_xshake(i1,0) - d_xshake(i2,0); s12[1] = d_xshake(i1,1) - d_xshake(i2,1); s12[2] = d_xshake(i1,2) - d_xshake(i2,2); minimum_image_once(s12); // scalar distances between atoms double r01sq = r01[0]*r01[0] + r01[1]*r01[1] + r01[2]*r01[2]; double r02sq = r02[0]*r02[0] + r02[1]*r02[1] + r02[2]*r02[2]; double r12sq = r12[0]*r12[0] + r12[1]*r12[1] + r12[2]*r12[2]; double s01sq = s01[0]*s01[0] + s01[1]*s01[1] + s01[2]*s01[2]; double s02sq = s02[0]*s02[0] + s02[1]*s02[1] + s02[2]*s02[2]; double s12sq = s12[0]*s12[0] + s12[1]*s12[1] + s12[2]*s12[2]; // matrix coeffs and rhs for lamda equations if (d_rmass.data()) { invmass0 = 1.0/d_rmass[i0]; invmass1 = 1.0/d_rmass[i1]; invmass2 = 1.0/d_rmass[i2]; } else { invmass0 = 1.0/d_mass[d_type[i0]]; invmass1 = 1.0/d_mass[d_type[i1]]; invmass2 = 1.0/d_mass[d_type[i2]]; } double a11 = 2.0 * (invmass0+invmass1) * (s01[0]*r01[0] + s01[1]*r01[1] + s01[2]*r01[2]); double a12 = 2.0 * invmass0 * (s01[0]*r02[0] + s01[1]*r02[1] + s01[2]*r02[2]); double a13 = - 2.0 * invmass1 * (s01[0]*r12[0] + s01[1]*r12[1] + s01[2]*r12[2]); double a21 = 2.0 * invmass0 * (s02[0]*r01[0] + s02[1]*r01[1] + s02[2]*r01[2]); double a22 = 2.0 * (invmass0+invmass2) * (s02[0]*r02[0] + s02[1]*r02[1] + s02[2]*r02[2]); double a23 = 2.0 * invmass2 * (s02[0]*r12[0] + s02[1]*r12[1] + s02[2]*r12[2]); double a31 = - 2.0 * invmass1 * (s12[0]*r01[0] + s12[1]*r01[1] + s12[2]*r01[2]); double a32 = 2.0 * invmass2 * (s12[0]*r02[0] + s12[1]*r02[1] + s12[2]*r02[2]); double a33 = 2.0 * (invmass1+invmass2) * (s12[0]*r12[0] + s12[1]*r12[1] + s12[2]*r12[2]); // inverse of matrix double determ = a11*a22*a33 + a12*a23*a31 + a13*a21*a32 - a11*a23*a32 - a12*a21*a33 - a13*a22*a31; if (determ == 0.0) d_error_flag() = 3; //error->one(FLERR,"Shake determinant = 0.0"); double determinv = 1.0/determ; double a11inv = determinv * (a22*a33 - a23*a32); double a12inv = -determinv * (a12*a33 - a13*a32); double a13inv = determinv * (a12*a23 - a13*a22); double a21inv = -determinv * (a21*a33 - a23*a31); double a22inv = determinv * (a11*a33 - a13*a31); double a23inv = -determinv * (a11*a23 - a13*a21); double a31inv = determinv * (a21*a32 - a22*a31); double a32inv = -determinv * (a11*a32 - a12*a31); double a33inv = determinv * (a11*a22 - a12*a21); // quadratic correction coeffs double r0102 = (r01[0]*r02[0] + r01[1]*r02[1] + r01[2]*r02[2]); double r0112 = (r01[0]*r12[0] + r01[1]*r12[1] + r01[2]*r12[2]); double r0212 = (r02[0]*r12[0] + r02[1]*r12[1] + r02[2]*r12[2]); double quad1_0101 = (invmass0+invmass1)*(invmass0+invmass1) * r01sq; double quad1_0202 = invmass0*invmass0 * r02sq; double quad1_1212 = invmass1*invmass1 * r12sq; double quad1_0102 = 2.0 * (invmass0+invmass1)*invmass0 * r0102; double quad1_0112 = - 2.0 * (invmass0+invmass1)*invmass1 * r0112; double quad1_0212 = - 2.0 * invmass0*invmass1 * r0212; double quad2_0101 = invmass0*invmass0 * r01sq; double quad2_0202 = (invmass0+invmass2)*(invmass0+invmass2) * r02sq; double quad2_1212 = invmass2*invmass2 * r12sq; double quad2_0102 = 2.0 * (invmass0+invmass2)*invmass0 * r0102; double quad2_0112 = 2.0 * invmass0*invmass2 * r0112; double quad2_0212 = 2.0 * (invmass0+invmass2)*invmass2 * r0212; double quad3_0101 = invmass1*invmass1 * r01sq; double quad3_0202 = invmass2*invmass2 * r02sq; double quad3_1212 = (invmass1+invmass2)*(invmass1+invmass2) * r12sq; double quad3_0102 = - 2.0 * invmass1*invmass2 * r0102; double quad3_0112 = - 2.0 * (invmass1+invmass2)*invmass1 * r0112; double quad3_0212 = 2.0 * (invmass1+invmass2)*invmass2 * r0212; // iterate until converged double lamda01 = 0.0; double lamda02 = 0.0; double lamda12 = 0.0; int niter = 0; int done = 0; double quad1,quad2,quad3,b1,b2,b3,lamda01_new,lamda02_new,lamda12_new; while (!done && niter < max_iter) { quad1 = quad1_0101 * lamda01*lamda01 + quad1_0202 * lamda02*lamda02 + quad1_1212 * lamda12*lamda12 + quad1_0102 * lamda01*lamda02 + quad1_0112 * lamda01*lamda12 + quad1_0212 * lamda02*lamda12; quad2 = quad2_0101 * lamda01*lamda01 + quad2_0202 * lamda02*lamda02 + quad2_1212 * lamda12*lamda12 + quad2_0102 * lamda01*lamda02 + quad2_0112 * lamda01*lamda12 + quad2_0212 * lamda02*lamda12; quad3 = quad3_0101 * lamda01*lamda01 + quad3_0202 * lamda02*lamda02 + quad3_1212 * lamda12*lamda12 + quad3_0102 * lamda01*lamda02 + quad3_0112 * lamda01*lamda12 + quad3_0212 * lamda02*lamda12; b1 = bond1*bond1 - s01sq - quad1; b2 = bond2*bond2 - s02sq - quad2; b3 = bond12*bond12 - s12sq - quad3; lamda01_new = a11inv*b1 + a12inv*b2 + a13inv*b3; lamda02_new = a21inv*b1 + a22inv*b2 + a23inv*b3; lamda12_new = a31inv*b1 + a32inv*b2 + a33inv*b3; done = 1; if (fabs(lamda01_new-lamda01) > tolerance) done = 0; if (fabs(lamda02_new-lamda02) > tolerance) done = 0; if (fabs(lamda12_new-lamda12) > tolerance) done = 0; lamda01 = lamda01_new; lamda02 = lamda02_new; lamda12 = lamda12_new; // stop iterations before we have a floating point overflow // max double is < 1.0e308, so 1e150 is a reasonable cutoff if (fabs(lamda01) > 1e150 || fabs(lamda02) > 1e150 || fabs(lamda12) > 1e150) done = 1; niter++; } // update forces if atom is owned by this processor lamda01 = lamda01/dtfsq; lamda02 = lamda02/dtfsq; lamda12 = lamda12/dtfsq; if (i0 < nlocal) { a_f(i0,0) += lamda01*r01[0] + lamda02*r02[0]; a_f(i0,1) += lamda01*r01[1] + lamda02*r02[1]; a_f(i0,2) += lamda01*r01[2] + lamda02*r02[2]; } if (i1 < nlocal) { a_f(i1,0) -= lamda01*r01[0] - lamda12*r12[0]; a_f(i1,1) -= lamda01*r01[1] - lamda12*r12[1]; a_f(i1,2) -= lamda01*r01[2] - lamda12*r12[2]; } if (i2 < nlocal) { a_f(i2,0) -= lamda02*r02[0] + lamda12*r12[0]; a_f(i2,1) -= lamda02*r02[1] + lamda12*r12[1]; a_f(i2,2) -= lamda02*r02[2] + lamda12*r12[2]; } if (EVFLAG) { nlist = 0; if (i0 < nlocal) list[nlist++] = i0; if (i1 < nlocal) list[nlist++] = i1; if (i2 < nlocal) list[nlist++] = i2; v[0] = lamda01*r01[0]*r01[0]+lamda02*r02[0]*r02[0]+lamda12*r12[0]*r12[0]; v[1] = lamda01*r01[1]*r01[1]+lamda02*r02[1]*r02[1]+lamda12*r12[1]*r12[1]; v[2] = lamda01*r01[2]*r01[2]+lamda02*r02[2]*r02[2]+lamda12*r12[2]*r12[2]; v[3] = lamda01*r01[0]*r01[1]+lamda02*r02[0]*r02[1]+lamda12*r12[0]*r12[1]; v[4] = lamda01*r01[0]*r01[2]+lamda02*r02[0]*r02[2]+lamda12*r12[0]*r12[2]; v[5] = lamda01*r01[1]*r01[2]+lamda02*r02[1]*r02[2]+lamda12*r12[1]*r12[2]; v_tally(ev,nlist,list,3.0,v); } } /* ---------------------------------------------------------------------- allocate local atom-based arrays ------------------------------------------------------------------------- */ template void FixShakeKokkos::grow_arrays(int nmax) { memoryKK->grow_kokkos(k_shake_flag,shake_flag,nmax,"shake:shake_flag"); memoryKK->grow_kokkos(k_shake_atom,shake_atom,nmax,4,"shake:shake_atom"); memoryKK->grow_kokkos(k_shake_type,shake_type,nmax,3,"shake:shake_type"); memoryKK->destroy_kokkos(k_xshake,xshake); memoryKK->create_kokkos(k_xshake,xshake,nmax,"shake:xshake"); d_shake_flag = k_shake_flag.view(); d_shake_atom = k_shake_atom.view(); d_shake_type = k_shake_type.view(); d_xshake = k_xshake.view(); memory->destroy(ftmp); memory->create(ftmp,nmax,3,"shake:ftmp"); memory->destroy(vtmp); memory->create(vtmp,nmax,3,"shake:vtmp"); } /* ---------------------------------------------------------------------- copy values within local atom-based arrays ------------------------------------------------------------------------- */ template void FixShakeKokkos::copy_arrays(int i, int j, int delflag) { k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); FixShake::copy_arrays(i,j,delflag); k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); } /* ---------------------------------------------------------------------- initialize one atom's array values, called when atom is created ------------------------------------------------------------------------- */ template void FixShakeKokkos::set_arrays(int i) { k_shake_flag.sync_host(); shake_flag[i] = 0; k_shake_flag.modify_host(); } /* ---------------------------------------------------------------------- update one atom's array values called when molecule is created from fix gcmc ------------------------------------------------------------------------- */ template void FixShakeKokkos::update_arrays(int i, int atom_offset) { k_shake_flag.sync_host(); k_shake_atom.sync_host(); FixShake::update_arrays(i,atom_offset); k_shake_flag.modify_host(); k_shake_atom.modify_host(); } /* ---------------------------------------------------------------------- initialize a molecule inserted by another fix, e.g. deposit or pour called when molecule is created nlocalprev = # of atoms on this proc before molecule inserted tagprev = atom ID previous to new atoms in the molecule xgeom,vcm,quat ignored ------------------------------------------------------------------------- */ template void FixShakeKokkos::set_molecule(int nlocalprev, tagint tagprev, int imol, double * xgeom, double * vcm, double * quat) { atomKK->sync(Host,TAG_MASK); k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); FixShake::set_molecule(nlocalprev,tagprev,imol,xgeom,vcm,quat); k_shake_atom.modify_host(); k_shake_type.modify_host(); } /* ---------------------------------------------------------------------- pack values in local atom-based arrays for exchange with another proc ------------------------------------------------------------------------- */ template int FixShakeKokkos::pack_exchange(int i, double *buf) { k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); int m = FixShake::pack_exchange(i,buf); k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); return m; } /* ---------------------------------------------------------------------- unpack values in local atom-based arrays from exchange with another proc ------------------------------------------------------------------------- */ template int FixShakeKokkos::unpack_exchange(int nlocal, double *buf) { k_shake_flag.sync_host(); k_shake_atom.sync_host(); k_shake_type.sync_host(); int m = FixShake::unpack_exchange(nlocal,buf); k_shake_flag.modify_host(); k_shake_atom.modify_host(); k_shake_type.modify_host(); return m; } /* ---------------------------------------------------------------------- */ template int FixShakeKokkos::pack_forward_comm_fix_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &k_buf, int pbc_flag, int* pbc) { d_sendlist = k_sendlist.view(); iswap = iswap_in; d_buf = k_buf.view(); if (domain->triclinic == 0) { dx = pbc[0]*domain->xprd; dy = pbc[1]*domain->yprd; dz = pbc[2]*domain->zprd; } else { dx = pbc[0]*domain->xprd + pbc[5]*domain->xy + pbc[4]*domain->xz; dy = pbc[1]*domain->yprd + pbc[3]*domain->yz; dz = pbc[2]*domain->zprd; } if (pbc_flag) Kokkos::parallel_for(Kokkos::RangePolicy >(0,n),*this); else Kokkos::parallel_for(Kokkos::RangePolicy >(0,n),*this); return n*3; } template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakePackForwardComm, const int &i) const { const int j = d_sendlist(iswap, i); if (PBC_FLAG == 0) { d_buf[3*i] = d_xshake(j,0); d_buf[3*i+1] = d_xshake(j,1); d_buf[3*i+2] = d_xshake(j,2); } else { d_buf[3*i] = d_xshake(j,0) + dx; d_buf[3*i+1] = d_xshake(j,1) + dy; d_buf[3*i+2] = d_xshake(j,2) + dz; } } /* ---------------------------------------------------------------------- */ template int FixShakeKokkos::pack_forward_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { k_xshake.sync_host(); int m = FixShake::pack_forward_comm(n,list,buf,pbc_flag,pbc); k_xshake.modify_host(); return m; } /* ---------------------------------------------------------------------- */ template void FixShakeKokkos::unpack_forward_comm_fix_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) { first = first_in; d_buf = buf.view(); Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); } template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::operator()(TagFixShakeUnpackForwardComm, const int &i) const { d_xshake(i + first,0) = d_buf[3*i]; d_xshake(i + first,1) = d_buf[3*i+1]; d_xshake(i + first,2) = d_buf[3*i+2]; } /* ---------------------------------------------------------------------- */ template void FixShakeKokkos::unpack_forward_comm(int n, int first, double *buf) { k_xshake.sync_host(); FixShake::unpack_forward_comm(n,first,buf); k_xshake.modify_host(); } /* ---------------------------------------------------------------------- add coordinate constraining forces this method is called at the end of a timestep ------------------------------------------------------------------------- */ template void FixShakeKokkos::shake_end_of_step(int vflag) { dtv = update->dt; dtfsq = 0.5 * update->dt * update->dt * force->ftm2v; FixShakeKokkos::post_force(vflag); if (!rattle) dtfsq = update->dt * update->dt * force->ftm2v; } /* ---------------------------------------------------------------------- calculate constraining forces based on the current configuration change coordinates ------------------------------------------------------------------------- */ template void FixShakeKokkos::correct_coordinates(int vflag) { atomKK->sync(Host,X_MASK|V_MASK|F_MASK); // save current forces and velocities so that you // initialize them to zero such that FixShake::unconstrained_coordinate_update has no effect for (int j=0; jmodified(Host,V_MASK|F_MASK); // call SHAKE to correct the coordinates which were updated without constraints // IMPORTANT: use 1 as argument and thereby enforce velocity Verlet dtfsq = 0.5 * update->dt * update->dt * force->ftm2v; FixShakeKokkos::post_force(vflag); atomKK->sync(Host,X_MASK|F_MASK); // integrate coordinates: x' = xnp1 + dt^2/2m_i * f, where f is the constraining force // NOTE: After this command, the coordinates geometry of the molecules will be correct! double dtfmsq; if (rmass) { for (int i = 0; i < nlocal; i++) { dtfmsq = dtfsq/ rmass[i]; x[i][0] = x[i][0] + dtfmsq*f[i][0]; x[i][1] = x[i][1] + dtfmsq*f[i][1]; x[i][2] = x[i][2] + dtfmsq*f[i][2]; } } else { for (int i = 0; i < nlocal; i++) { dtfmsq = dtfsq / mass[type[i]]; x[i][0] = x[i][0] + dtfmsq*f[i][0]; x[i][1] = x[i][1] + dtfmsq*f[i][1]; x[i][2] = x[i][2] + dtfmsq*f[i][2]; } } // copy forces and velocities back for (int j=0; jdt * update->dt * force->ftm2v; // communicate changes // NOTE: for compatibility xshake is temporarily set to x, such that pack/unpack_forward // can be used for communicating the coordinates. double **xtmp = xshake; xshake = x; if (nprocs > 1) { forward_comm_device = 0; comm->forward_comm_fix(this); forward_comm_device = 1; } xshake = xtmp; atomKK->modified(Host,X_MASK|V_MASK|F_MASK); } /* ---------------------------------------------------------------------- tally virial into global and per-atom accumulators n = # of local owned atoms involved, with local indices in list v = total virial for the interaction involving total atoms increment global virial by n/total fraction increment per-atom virial of each atom in list by 1/total fraction this method can be used when fix computes forces in post_force() e.g. fix shake, fix rigid: compute virial only on owned atoms whether newton_bond is on or off other procs will tally left-over fractions for atoms they own ------------------------------------------------------------------------- */ template template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::v_tally(EV_FLOAT &ev, int n, int *list, double total, double *v) const { int m; if (vflag_global) { double fraction = n/total; ev.v[0] += fraction*v[0]; ev.v[1] += fraction*v[1]; ev.v[2] += fraction*v[2]; ev.v[3] += fraction*v[3]; ev.v[4] += fraction*v[4]; ev.v[5] += fraction*v[5]; } if (vflag_atom) { double fraction = 1.0/total; for (int i = 0; i < n; i++) { auto v_vatom = ScatterViewHelper::value,decltype(dup_vatom),decltype(ndup_vatom)>::get(dup_vatom,ndup_vatom); auto a_vatom = v_vatom.template access::value>(); m = list[i]; a_vatom(m,0) += fraction*v[0]; a_vatom(m,1) += fraction*v[1]; a_vatom(m,2) += fraction*v[2]; a_vatom(m,3) += fraction*v[3]; a_vatom(m,4) += fraction*v[4]; a_vatom(m,5) += fraction*v[5]; } } } /* ---------------------------------------------------------------------- */ template void FixShakeKokkos::update_domain_variables() { triclinic = domain->triclinic; xperiodic = domain->xperiodic; xprd_half = domain->xprd_half; xprd = domain->xprd; yperiodic = domain->yperiodic; yprd_half = domain->yprd_half; yprd = domain->yprd; zperiodic = domain->zperiodic; zprd_half = domain->zprd_half; zprd = domain->zprd; xy = domain->xy; xz = domain->xz; yz = domain->yz; } /* ---------------------------------------------------------------------- minimum image convention in periodic dimensions use 1/2 of box size as test for triclinic, also add/subtract tilt factors in other dims as needed changed "if" to "while" to enable distance to far-away ghost atom returned by atom->map() to be wrapped back into box could be problem for looking up atom IDs when cutoff > boxsize this should not be used if atom has moved infinitely far outside box b/c while could iterate forever e.g. fix shake prediction of new position with highly overlapped atoms use minimum_image_once() instead copied from domain.cpp ------------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::minimum_image(double *delta) const { if (triclinic == 0) { if (xperiodic) { while (fabs(delta[0]) > xprd_half) { if (delta[0] < 0.0) delta[0] += xprd; else delta[0] -= xprd; } } if (yperiodic) { while (fabs(delta[1]) > yprd_half) { if (delta[1] < 0.0) delta[1] += yprd; else delta[1] -= yprd; } } if (zperiodic) { while (fabs(delta[2]) > zprd_half) { if (delta[2] < 0.0) delta[2] += zprd; else delta[2] -= zprd; } } } else { if (zperiodic) { while (fabs(delta[2]) > zprd_half) { if (delta[2] < 0.0) { delta[2] += zprd; delta[1] += yz; delta[0] += xz; } else { delta[2] -= zprd; delta[1] -= yz; delta[0] -= xz; } } } if (yperiodic) { while (fabs(delta[1]) > yprd_half) { if (delta[1] < 0.0) { delta[1] += yprd; delta[0] += xy; } else { delta[1] -= yprd; delta[0] -= xy; } } } if (xperiodic) { while (fabs(delta[0]) > xprd_half) { if (delta[0] < 0.0) delta[0] += xprd; else delta[0] -= xprd; } } } } /* ---------------------------------------------------------------------- minimum image convention in periodic dimensions use 1/2 of box size as test for triclinic, also add/subtract tilt factors in other dims as needed only shift by one box length in each direction this should not be used if multiple box shifts are required copied from domain.cpp ------------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION void FixShakeKokkos::minimum_image_once(double *delta) const { if (triclinic == 0) { if (xperiodic) { if (fabs(delta[0]) > xprd_half) { if (delta[0] < 0.0) delta[0] += xprd; else delta[0] -= xprd; } } if (yperiodic) { if (fabs(delta[1]) > yprd_half) { if (delta[1] < 0.0) delta[1] += yprd; else delta[1] -= yprd; } } if (zperiodic) { if (fabs(delta[2]) > zprd_half) { if (delta[2] < 0.0) delta[2] += zprd; else delta[2] -= zprd; } } } else { if (zperiodic) { if (fabs(delta[2]) > zprd_half) { if (delta[2] < 0.0) { delta[2] += zprd; delta[1] += yz; delta[0] += xz; } else { delta[2] -= zprd; delta[1] -= yz; delta[0] -= xz; } } } if (yperiodic) { if (fabs(delta[1]) > yprd_half) { if (delta[1] < 0.0) { delta[1] += yprd; delta[0] += xy; } else { delta[1] -= yprd; delta[0] -= xy; } } } if (xperiodic) { if (fabs(delta[0]) > xprd_half) { if (delta[0] < 0.0) delta[0] += xprd; else delta[0] -= xprd; } } } } /* ---------------------------------------------------------------------- */ namespace LAMMPS_NS { template class FixShakeKokkos; #ifdef LMP_KOKKOS_GPU template class FixShakeKokkos; #endif }