diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp new file mode 100755 index 0000000000..2e56307779 --- /dev/null +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -0,0 +1,1177 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "kokkos.h" +#include "pair_kokkos.h" +#include "pair_eam_alloy_kokkos.h" +#include "atom_kokkos.h" +#include "force.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list_kokkos.h" +#include "neigh_request.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +// Cannot use virtual inheritance on the GPU, so must duplicate code + +/* ---------------------------------------------------------------------- */ + +template +PairEAMAlloyKokkos::PairEAMAlloyKokkos(LAMMPS *lmp) : PairEAM(lmp) +{ + respa_enable = 0; + one_coeff = 1; + manybody_flag = 1; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairEAMAlloyKokkos::~PairEAMAlloyKokkos() +{ + if (!copymode) { + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } + + atomKK->sync(execution_space,datamask_read); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + nmax = atom->nmax; + k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax); + k_fp = DAT::tdual_ffloat_1d("pair:fp",nmax); + d_rho = k_rho.d_view; + d_fp = k_fp.d_view; + h_rho = k_rho.h_view; + h_fp = k_fp.h_view; + } + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + v_rho = k_rho.view(); + type = atomKK->k_type.view(); + tag = atomKK->k_tag.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + int inum = list->inum; + + // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle + + k_list->clean_copy(); + copymode = 1; + + // zero out density + + if (newton_pair) + Kokkos::parallel_for(Kokkos::RangePolicy(0,nall),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + DeviceType::fence(); + + // loop over neighbors of my atoms + + EV_FLOAT ev; + + // compute kernel A + + if (neighflag == HALF || neighflag == HALFTHREAD) { + + if (neighflag == HALF) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + DeviceType::fence(); + + // communicate and sum densities (on the host) + + if (newton_pair) { + k_rho.template modify(); + k_rho.template sync(); + comm->reverse_comm_pair(this); + k_rho.template modify(); + k_rho.template sync(); + } + + // compute kernel B + + if (eflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + + } else if (neighflag == FULL) { + + // compute kernel AB + + if (eflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + } + + if (eflag) { + eng_vdwl += ev.evdwl; + ev.evdwl = 0.0; + } + + // communicate derivative of embedding function (on the device) + + comm->forward_comm_pair(this); + + // compute kernel C + + if (evflag) { + if (neighflag == HALF) { + if (newton_pair) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } else if (neighflag == FULL) { + if (newton_pair) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } + } else { + if (neighflag == HALF) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == FULL) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + } + DeviceType::fence(); + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::init_style() +{ + // convert read-in file(s) to arrays and spline them + + PairEAM::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy"); + } + +} + +template +void PairEAMAlloyKokkos::file2array() +{ + file2array_alloy(); + + int i,j; + int n = atom->ntypes; + + DAT::tdual_int_1d k_type2frho = DAT::tdual_int_1d("pair:type2frho",n+1); + DAT::tdual_int_2d k_type2rhor = DAT::tdual_int_2d("pair:type2rhor",n+1,n+1); + DAT::tdual_int_2d k_type2z2r = DAT::tdual_int_2d("pair:type2z2r",n+1,n+1); + + HAT::t_int_1d h_type2frho = k_type2frho.h_view; + HAT::t_int_2d h_type2rhor = k_type2rhor.h_view; + HAT::t_int_2d h_type2z2r = k_type2z2r.h_view; + + for (i = 1; i <= n; i++) { + h_type2frho[i] = type2frho[i]; + for (j = 1; j <= n; j++) { + h_type2rhor(i,j) = type2rhor[i][j]; + h_type2z2r(i,j) = type2z2r[i][j]; + } + } + k_type2frho.template modify(); + k_type2frho.template sync(); + k_type2rhor.template modify(); + k_type2rhor.template sync(); + k_type2z2r.template modify(); + k_type2z2r.template sync(); + + d_type2frho = k_type2frho.d_view; + d_type2rhor = k_type2rhor.d_view; + d_type2z2r = k_type2z2r.d_view; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::array2spline() +{ + rdr = 1.0/dr; + rdrho = 1.0/drho; + + tdual_ffloat_2d_n7 k_frho_spline = tdual_ffloat_2d_n7("pair:frho",nfrho,nrho+1); + tdual_ffloat_2d_n7 k_rhor_spline = tdual_ffloat_2d_n7("pair:rhor",nrhor,nr+1); + tdual_ffloat_2d_n7 k_z2r_spline = tdual_ffloat_2d_n7("pair:z2r",nz2r,nr+1); + + t_host_ffloat_2d_n7 h_frho_spline = k_frho_spline.h_view; + t_host_ffloat_2d_n7 h_rhor_spline = k_rhor_spline.h_view; + t_host_ffloat_2d_n7 h_z2r_spline = k_z2r_spline.h_view; + + for (int i = 0; i < nfrho; i++) + interpolate(nrho,drho,frho[i],h_frho_spline,i); + k_frho_spline.template modify(); + k_frho_spline.template sync(); + + for (int i = 0; i < nrhor; i++) + interpolate(nr,dr,rhor[i],h_rhor_spline,i); + k_rhor_spline.template modify(); + k_rhor_spline.template sync(); + + for (int i = 0; i < nz2r; i++) + interpolate(nr,dr,z2r[i],h_z2r_spline,i); + k_z2r_spline.template modify(); + k_z2r_spline.template sync(); + + d_frho_spline = k_frho_spline.d_view; + d_rhor_spline = k_rhor_spline.d_view; + d_z2r_spline = k_z2r_spline.d_view; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::interpolate(int n, double delta, double *f, t_host_ffloat_2d_n7 h_spline, int i) +{ + for (int m = 1; m <= n; m++) h_spline(i,m,6) = f[m]; + + h_spline(i,1,5) = h_spline(i,2,6) - h_spline(i,1,6); + h_spline(i,2,5) = 0.5 * (h_spline(i,3,6)-h_spline(i,1,6)); + h_spline(i,n-1,5) = 0.5 * (h_spline(i,n,6)-h_spline(i,n-2,6)); + h_spline(i,n,5) = h_spline(i,n,6) - h_spline(i,n-1,6); + + for (int m = 3; m <= n-2; m++) + h_spline(i,m,5) = ((h_spline(i,m-2,6)-h_spline(i,m+2,6)) + + 8.0*(h_spline(i,m+1,6)-h_spline(i,m-1,6))) / 12.0; + + for (int m = 1; m <= n-1; m++) { + h_spline(i,m,4) = 3.0*(h_spline(i,m+1,6)-h_spline(i,m,6)) - + 2.0*h_spline(i,m,5) - h_spline(i,m+1,5); + h_spline(i,m,3) = h_spline(i,m,5) + h_spline(i,m+1,5) - + 2.0*(h_spline(i,m+1,6)-h_spline(i,m,6)); + } + + h_spline(i,n,4) = 0.0; + h_spline(i,n,3) = 0.0; + + for (int m = 1; m <= n; m++) { + h_spline(i,m,2) = h_spline(i,m,5)/delta; + h_spline(i,m,1) = 2.0*h_spline(i,m,4)/delta; + h_spline(i,m,0) = 3.0*h_spline(i,m,3)/delta; + } +} + +/* ---------------------------------------------------------------------- */ + +template +int PairEAMAlloyKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf, + int pbc_flag, int *pbc) +{ + d_sendlist = k_sendlist.view(); + iswap = iswap_in; + v_buf = buf.view(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + DeviceType::fence(); + return n; +} + +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyPackForwardComm, const int &i) const { + int j = d_sendlist(iswap, i); + v_buf[i] = d_fp[j]; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) +{ + first = first_in; + v_buf = buf.view(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + DeviceType::fence(); +} + +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyUnpackForwardComm, const int &i) const { + d_fp[i + first] = v_buf[i]; +} + +/* ---------------------------------------------------------------------- */ + +template +int PairEAMAlloyKokkos::pack_forward_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j; + + for (i = 0; i < n; i++) { + j = list[i]; + buf[i] = h_fp[j]; + } + return n; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::unpack_forward_comm(int n, int first, double *buf) +{ + for (int i = 0; i < n; i++) { + h_fp[i + first] = buf[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +int PairEAMAlloyKokkos::pack_reverse_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) buf[m++] = h_rho[i]; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::unpack_reverse_comm(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + h_rho[j] += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyInitialize, const int &i) const { + d_rho[i] = 0.0; +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelA, const int &ii) const { + + // rho = density at each atom + // loop over neighbors of my atoms + + // The rho array is atomic for Half/Thread neighbor style + Kokkos::View::value> > rho = v_rho; + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + F_FLOAT rhotmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + //int j = d_neighbors_i[jj]; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + F_FLOAT p = sqrt(rsq)*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + if (NEWTON_PAIR || j < nlocal) { + const int d_type2rhor_ij = d_type2rhor(itype,jtype); + rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p + + d_rhor_spline(d_type2rhor_ij,m,5))*p + d_rhor_spline(d_type2rhor_ij,m,6); + } + } + + } + rho[i] += rhotmp; +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelB, const int &ii, EV_FLOAT& ev) const { + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + // if rho > rhomax (e.g. due to close approach of two atoms), + // will exceed table, so add linear term to conserve energy + + const int i = d_ilist[ii]; + const int itype = type(i); + + F_FLOAT p = d_rho[i]*rdrho + 1.0; + int m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + const int d_type2frho_i = d_type2frho[itype]; + d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); + if (EFLAG) { + F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); + if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); + if (eflag_global) ev.evdwl += phi; + if (eflag_atom) d_eatom[i] += phi; + } + +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelB, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMAlloyKernelB(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelAB, const int &ii, EV_FLOAT& ev) const { + + // rho = density at each atom + // loop over neighbors of my atoms + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + F_FLOAT rhotmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + //int j = d_neighbors_i[jj]; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + F_FLOAT p = sqrt(rsq)*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + } + + } + d_rho[i] += rhotmp; + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + // if rho > rhomax (e.g. due to close approach of two atoms), + // will exceed table, so add linear term to conserve energy + + F_FLOAT p = d_rho[i]*rdrho + 1.0; + int m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + const int d_type2frho_i = d_type2frho[itype]; + d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); + if (EFLAG) { + F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); + if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); + if (eflag_global) ev.evdwl += phi; + if (eflag_atom) d_eatom[i] += phi; + } + +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelAB, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMAlloyKernelAB(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC, const int &ii, EV_FLOAT& ev) const { + + // The f array is atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + F_FLOAT fxtmp = 0.0; + F_FLOAT fytmp = 0.0; + F_FLOAT fztmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + //int j = d_neighbors_i[jj]; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < cutforcesq) { + const F_FLOAT r = sqrt(rsq); + F_FLOAT p = r*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + const int d_type2rhor_ij = d_type2rhor(itype,jtype); + const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p + + d_rhor_spline(d_type2rhor_ij,m,2); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p + + d_rhor_spline(d_type2rhor_ji,m,2); + const int d_type2z2r_ij = d_type2z2r(itype,jtype); + const F_FLOAT z2p = (d_z2r_spline(d_type2z2r_ij,m,0)*p + d_z2r_spline(d_type2z2r_ij,m,1))*p + + d_z2r_spline(d_type2z2r_ij,m,2); + const F_FLOAT z2 = ((d_z2r_spline(d_type2z2r_ij,m,3)*p + d_z2r_spline(d_type2z2r_ij,m,4))*p + + d_z2r_spline(d_type2z2r_ij,m,5))*p + d_z2r_spline(d_type2z2r_ij,m,6); + + const F_FLOAT recip = 1.0/r; + const F_FLOAT phi = z2*recip; + const F_FLOAT phip = z2p*recip - phi*recip; + const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip; + const F_FLOAT fpair = -psip*recip; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; + } + + if (EVFLAG) { + if (eflag) { + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,phi,fpair,delx,dely,delz); + } + + } + } + + a_f(i,0) += fxtmp; + a_f(i,1) += fytmp; + a_f(i,2) += fztmp; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelC, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMAlloyKernelC(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMAlloyKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int EFLAG = eflag; + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (EFLAG) { + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf; + if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf; + } else { + v_eatom[i] += epairhalf; + } + } + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } else { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +// Duplicate PairEAMAlloy functions + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs + read DYNAMO setfl file +------------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::coeff(int narg, char **arg) +{ + int i,j; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read EAM setfl file + + if (setfl) { + for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i]; + delete [] setfl->elements; + delete [] setfl->mass; + memory->destroy(setfl->frho); + memory->destroy(setfl->rhor); + memory->destroy(setfl->z2r); + delete setfl; + } + setfl = new Setfl(); + read_file(arg[2]); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < setfl->nelements; j++) + if (strcmp(arg[i],setfl->elements[j]) == 0) break; + if (j < setfl->nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + // set mass of atom type if i = j + + int count = 0; + for (i = 1; i <= n; i++) { + for (j = i; j <= n; j++) { + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + if (i == j) atom->set_mass(i,setfl->mass[map[i]]); + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + read a multi-element DYNAMO setfl file +------------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::read_file(char *filename) +{ + Setfl *file = setfl; + + // open potential file + + int me = comm->me; + FILE *fptr; + char line[MAXLINE]; + + if (me == 0) { + fptr = force->open_potential(filename); + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open EAM potential file %s",filename); + error->one(FLERR,str); + } + } + + // read and broadcast header + // extract element names from nelements line + + int n; + if (me == 0) { + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + sscanf(line,"%d",&file->nelements); + int nwords = atom->count_words(line); + if (nwords != file->nelements + 1) + error->all(FLERR,"Incorrect element names in EAM potential file"); + + char **words = new char*[file->nelements+1]; + nwords = 0; + strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + file->elements = new char*[file->nelements]; + for (int i = 0; i < file->nelements; i++) { + n = strlen(words[i]) + 1; + file->elements[i] = new char[n]; + strcpy(file->elements[i],words[i]); + } + delete [] words; + + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg %d %lg %lg", + &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); + } + + MPI_Bcast(&file->nrho,1,MPI_INT,0,world); + MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->nr,1,MPI_INT,0,world); + MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); + + file->mass = new double[file->nelements]; + memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho"); + memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor"); + memory->create(file->z2r,file->nelements,file->nelements,file->nr+1, + "pair:z2r"); + + int i,j,tmp; + for (i = 0; i < file->nelements; i++) { + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg",&tmp,&file->mass[i]); + } + MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); + + if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); + MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); + if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]); + MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world); + } + + for (i = 0; i < file->nelements; i++) + for (j = 0; j <= i; j++) { + if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); + MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + + // close the potential file + + if (me == 0) fclose(fptr); +} + +/* ---------------------------------------------------------------------- + copy read-in setfl potential to standard array format +------------------------------------------------------------------------- */ + +template +void PairEAMAlloyKokkos::file2array_alloy() +{ + int i,j,m,n; + int ntypes = atom->ntypes; + + // set function params directly from setfl file + + nrho = setfl->nrho; + nr = setfl->nr; + drho = setfl->drho; + dr = setfl->dr; + rhomax = (nrho-1) * drho; + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of setfl elements + 1 for zero array + + nfrho = setfl->nelements + 1; + memory->destroy(frho); + memory->create(frho,nfrho,nrho+1,"pair:frho"); + + // copy each element's frho to global frho + + for (i = 0; i < setfl->nelements; i++) + for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m]; + + // add extra frho of zeroes for non-EAM types to point to (pair hybrid) + // this is necessary b/c fp is still computed for non-EAM atoms + + for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; + + // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to + // if atom type doesn't point to element (non-EAM atom in pair hybrid) + // then map it to last frho array of zeroes + + for (i = 1; i <= ntypes; i++) + if (map[i] >= 0) type2frho[i] = map[i]; + else type2frho[i] = nfrho-1; + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = # of setfl elements + + nrhor = setfl->nelements; + memory->destroy(rhor); + memory->create(rhor,nrhor,nr+1,"pair:rhor"); + + // copy each element's rhor to global rhor + + for (i = 0; i < setfl->nelements; i++) + for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m]; + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for setfl files, I,J mapping only depends on I + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + for (i = 1; i <= ntypes; i++) + for (j = 1; j <= ntypes; j++) + type2rhor[i][j] = map[i]; + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of setfl elements + + nz2r = setfl->nelements * (setfl->nelements+1) / 2; + memory->destroy(z2r); + memory->create(z2r,nz2r,nr+1,"pair:z2r"); + + // copy each element pair z2r to global z2r, only for I >= J + + n = 0; + for (i = 0; i < setfl->nelements; i++) + for (j = 0; j <= i; j++) { + for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m]; + n++; + } + + // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to + // set of z2r arrays only fill lower triangular Nelement matrix + // value = n = sum over rows of lower-triangular matrix until reach irow,icol + // swap indices when irow < icol to stay lower triangular + // if map = -1 (non-EAM atom in pair hybrid): + // type2z2r is not used by non-opt + // but set type2z2r to 0 since accessed by opt + + int irow,icol; + for (i = 1; i <= ntypes; i++) { + for (j = 1; j <= ntypes; j++) { + irow = map[i]; + icol = map[j]; + if (irow == -1 || icol == -1) { + type2z2r[i][j] = 0; + continue; + } + if (irow < icol) { + irow = map[j]; + icol = map[i]; + } + n = 0; + for (m = 0; m < irow; m++) n += m + 1; + n += icol; + type2z2r[i][j] = n; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template class PairEAMAlloyKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairEAMAlloyKokkos; +#endif \ No newline at end of file diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.h b/src/KOKKOS/pair_eam_alloy_kokkos.h new file mode 100755 index 0000000000..2d48f0fde5 --- /dev/null +++ b/src/KOKKOS/pair_eam_alloy_kokkos.h @@ -0,0 +1,183 @@ +/* -*- c++ -*- ---------------------------------------------------------- + + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/alloy/kk,PairEAMAlloyKokkos) +PairStyle(eam/alloy/kk/device,PairEAMAlloyKokkos) +PairStyle(eam/alloy/kk/host,PairEAMAlloyKokkos) + +#else + +#ifndef LMP_PAIR_EAM_ALLOY_KOKKOS_H +#define LMP_PAIR_EAM_ALLOY_KOKKOS_H + +#include "stdio.h" +#include "pair_kokkos.h" +#include "pair_eam.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +struct TagPairEAMAlloyPackForwardComm{}; +struct TagPairEAMAlloyUnpackForwardComm{}; +struct TagPairEAMAlloyInitialize{}; + +template +struct TagPairEAMAlloyKernelA{}; + +template +struct TagPairEAMAlloyKernelB{}; + +template +struct TagPairEAMAlloyKernelAB{}; + +template +struct TagPairEAMAlloyKernelC{}; + +// Cannot use virtual inheritance on the GPU + +template +class PairEAMAlloyKokkos : public PairEAM { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairEAMAlloyKokkos(class LAMMPS *); + virtual ~PairEAMAlloyKokkos(); + virtual void compute(int, int); + void init_style(); + void coeff(int, char **); + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyPackForwardComm, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyUnpackForwardComm, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyInitialize, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelA, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelB, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelB, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelAB, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelAB, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelC, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMAlloyKernelC, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, + int, int *); + virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); + virtual int pack_forward_comm(int, int *, double *, int, int *); + virtual void unpack_forward_comm(int, int, double *); + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + + protected: + void cleanup_copy(); + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_tagint_1d tag; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; + + DAT::tdual_ffloat_1d k_rho; + DAT::tdual_ffloat_1d k_fp; + DAT::t_ffloat_1d d_rho; + typename AT::t_ffloat_1d v_rho; + DAT::t_ffloat_1d d_fp; + HAT::t_ffloat_1d h_rho; + HAT::t_ffloat_1d h_fp; + + DAT::t_int_1d_randomread d_type2frho; + DAT::t_int_2d_randomread d_type2rhor; + DAT::t_int_2d_randomread d_type2z2r; + + typedef Kokkos::DualView tdual_ffloat_2d_n7; + typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; + typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7; + + t_ffloat_2d_n7_randomread d_frho_spline; + t_ffloat_2d_n7_randomread d_rhor_spline; + t_ffloat_2d_n7_randomread d_z2r_spline; + + virtual void file2array(); + void file2array_alloy(); + void array2spline(); + void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); + void read_file(char *); + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d_randomread d_ilist; + typename ArrayTypes::t_int_1d_randomread d_numneigh; + //NeighListKokkos k_list; + + int iswap; + int first; + typename AT::t_int_2d d_sendlist; + typename AT::t_xfloat_1d_um v_buf; + + int neighflag,newton_pair; + int nlocal,nall,eflag,vflag; + + friend void pair_virial_fdotr_compute(PairEAMAlloyKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Cannot use chosen neighbor list style with pair eam/kk/alloy + +That style is not supported by Kokkos. + +*/ diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp new file mode 100755 index 0000000000..a95deb8542 --- /dev/null +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -0,0 +1,1186 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "kokkos.h" +#include "pair_kokkos.h" +#include "pair_eam_fs_kokkos.h" +#include "atom_kokkos.h" +#include "force.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list_kokkos.h" +#include "neigh_request.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +// Cannot use virtual inheritance on the GPU, so must duplicate code + +/* ---------------------------------------------------------------------- */ + +template +PairEAMFSKokkos::PairEAMFSKokkos(LAMMPS *lmp) : PairEAM(lmp) +{ + one_coeff = 1; + manybody_flag = 1; + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairEAMFSKokkos::~PairEAMFSKokkos() +{ + if (!copymode) { + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } + + atomKK->sync(execution_space,datamask_read); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + // grow energy and fp arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) { + nmax = atom->nmax; + k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax); + k_fp = DAT::tdual_ffloat_1d("pair:fp",nmax); + d_rho = k_rho.d_view; + d_fp = k_fp.d_view; + h_rho = k_rho.h_view; + h_fp = k_fp.h_view; + } + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + v_rho = k_rho.view(); + type = atomKK->k_type.view(); + tag = atomKK->k_tag.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + int inum = list->inum; + + // Call cleanup_copy which sets allocations NULL which are destructed by the PairStyle + + k_list->clean_copy(); + copymode = 1; + + // zero out density + + if (newton_pair) + Kokkos::parallel_for(Kokkos::RangePolicy(0,nall),*this); + else + Kokkos::parallel_for(Kokkos::RangePolicy(0,nlocal),*this); + DeviceType::fence(); + + // loop over neighbors of my atoms + + EV_FLOAT ev; + + // compute kernel A + + if (neighflag == HALF || neighflag == HALFTHREAD) { + + if (neighflag == HALF) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + DeviceType::fence(); + + // communicate and sum densities (on the host) + + if (newton_pair) { + k_rho.template modify(); + k_rho.template sync(); + comm->reverse_comm_pair(this); + k_rho.template modify(); + k_rho.template sync(); + } + + // compute kernel B + + if (eflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + + } else if (neighflag == FULL) { + + // compute kernel AB + + if (eflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + } + + if (eflag) { + eng_vdwl += ev.evdwl; + ev.evdwl = 0.0; + } + + // communicate derivative of embedding function (on the device) + + comm->forward_comm_pair(this); + + // compute kernel C + + if (evflag) { + if (neighflag == HALF) { + if (newton_pair) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } else if (neighflag == FULL) { + if (newton_pair) { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } else { + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + } + } + } else { + if (neighflag == HALF) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == HALFTHREAD) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } else if (neighflag == FULL) { + if (newton_pair) { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } else { + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + } + } + } + DeviceType::fence(); + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::init_style() +{ + // convert read-in file(s) to arrays and spline them + + PairEAM::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + neighbor->requests[irequest]->full_cluster = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs"); + } + +} + +template +void PairEAMFSKokkos::file2array() +{ + file2array_fs(); + + int i,j; + int n = atom->ntypes; + + DAT::tdual_int_1d k_type2frho = DAT::tdual_int_1d("pair:type2frho",n+1); + DAT::tdual_int_2d k_type2rhor = DAT::tdual_int_2d("pair:type2rhor",n+1,n+1); + DAT::tdual_int_2d k_type2z2r = DAT::tdual_int_2d("pair:type2z2r",n+1,n+1); + + HAT::t_int_1d h_type2frho = k_type2frho.h_view; + HAT::t_int_2d h_type2rhor = k_type2rhor.h_view; + HAT::t_int_2d h_type2z2r = k_type2z2r.h_view; + + for (i = 1; i <= n; i++) { + h_type2frho[i] = type2frho[i]; + for (j = 1; j <= n; j++) { + h_type2rhor(i,j) = type2rhor[i][j]; + h_type2z2r(i,j) = type2z2r[i][j]; + } + } + k_type2frho.template modify(); + k_type2frho.template sync(); + k_type2rhor.template modify(); + k_type2rhor.template sync(); + k_type2z2r.template modify(); + k_type2z2r.template sync(); + + d_type2frho = k_type2frho.d_view; + d_type2rhor = k_type2rhor.d_view; + d_type2z2r = k_type2z2r.d_view; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::array2spline() +{ + rdr = 1.0/dr; + rdrho = 1.0/drho; + + tdual_ffloat_2d_n7 k_frho_spline = tdual_ffloat_2d_n7("pair:frho",nfrho,nrho+1); + tdual_ffloat_2d_n7 k_rhor_spline = tdual_ffloat_2d_n7("pair:rhor",nrhor,nr+1); + tdual_ffloat_2d_n7 k_z2r_spline = tdual_ffloat_2d_n7("pair:z2r",nz2r,nr+1); + + t_host_ffloat_2d_n7 h_frho_spline = k_frho_spline.h_view; + t_host_ffloat_2d_n7 h_rhor_spline = k_rhor_spline.h_view; + t_host_ffloat_2d_n7 h_z2r_spline = k_z2r_spline.h_view; + + for (int i = 0; i < nfrho; i++) + interpolate(nrho,drho,frho[i],h_frho_spline,i); + k_frho_spline.template modify(); + k_frho_spline.template sync(); + + for (int i = 0; i < nrhor; i++) + interpolate(nr,dr,rhor[i],h_rhor_spline,i); + k_rhor_spline.template modify(); + k_rhor_spline.template sync(); + + for (int i = 0; i < nz2r; i++) + interpolate(nr,dr,z2r[i],h_z2r_spline,i); + k_z2r_spline.template modify(); + k_z2r_spline.template sync(); + + d_frho_spline = k_frho_spline.d_view; + d_rhor_spline = k_rhor_spline.d_view; + d_z2r_spline = k_z2r_spline.d_view; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::interpolate(int n, double delta, double *f, t_host_ffloat_2d_n7 h_spline, int i) +{ + for (int m = 1; m <= n; m++) h_spline(i,m,6) = f[m]; + + h_spline(i,1,5) = h_spline(i,2,6) - h_spline(i,1,6); + h_spline(i,2,5) = 0.5 * (h_spline(i,3,6)-h_spline(i,1,6)); + h_spline(i,n-1,5) = 0.5 * (h_spline(i,n,6)-h_spline(i,n-2,6)); + h_spline(i,n,5) = h_spline(i,n,6) - h_spline(i,n-1,6); + + for (int m = 3; m <= n-2; m++) + h_spline(i,m,5) = ((h_spline(i,m-2,6)-h_spline(i,m+2,6)) + + 8.0*(h_spline(i,m+1,6)-h_spline(i,m-1,6))) / 12.0; + + for (int m = 1; m <= n-1; m++) { + h_spline(i,m,4) = 3.0*(h_spline(i,m+1,6)-h_spline(i,m,6)) - + 2.0*h_spline(i,m,5) - h_spline(i,m+1,5); + h_spline(i,m,3) = h_spline(i,m,5) + h_spline(i,m+1,5) - + 2.0*(h_spline(i,m+1,6)-h_spline(i,m,6)); + } + + h_spline(i,n,4) = 0.0; + h_spline(i,n,3) = 0.0; + + for (int m = 1; m <= n; m++) { + h_spline(i,m,2) = h_spline(i,m,5)/delta; + h_spline(i,m,1) = 2.0*h_spline(i,m,4)/delta; + h_spline(i,m,0) = 3.0*h_spline(i,m,3)/delta; + } +} + +/* ---------------------------------------------------------------------- */ + +template +int PairEAMFSKokkos::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d k_sendlist, int iswap_in, DAT::tdual_xfloat_1d &buf, + int pbc_flag, int *pbc) +{ + d_sendlist = k_sendlist.view(); + iswap = iswap_in; + v_buf = buf.view(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + DeviceType::fence(); + return n; +} + +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSPackForwardComm, const int &i) const { + int j = d_sendlist(iswap, i); + v_buf[i] = d_fp[j]; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::unpack_forward_comm_kokkos(int n, int first_in, DAT::tdual_xfloat_1d &buf) +{ + first = first_in; + v_buf = buf.view(); + Kokkos::parallel_for(Kokkos::RangePolicy(0,n),*this); + DeviceType::fence(); +} + +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSUnpackForwardComm, const int &i) const { + d_fp[i + first] = v_buf[i]; +} + +/* ---------------------------------------------------------------------- */ + +template +int PairEAMFSKokkos::pack_forward_comm(int n, int *list, double *buf, + int pbc_flag, int *pbc) +{ + int i,j; + + for (i = 0; i < n; i++) { + j = list[i]; + buf[i] = h_fp[j]; + } + return n; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::unpack_forward_comm(int n, int first, double *buf) +{ + for (int i = 0; i < n; i++) { + h_fp[i + first] = buf[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +int PairEAMFSKokkos::pack_reverse_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + for (i = first; i < last; i++) buf[m++] = h_rho[i]; + return m; +} + +/* ---------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::unpack_reverse_comm(int n, int *list, double *buf) +{ + int i,j,m; + + m = 0; + for (i = 0; i < n; i++) { + j = list[i]; + h_rho[j] += buf[m++]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSInitialize, const int &i) const { + d_rho[i] = 0.0; +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelA, const int &ii) const { + + // rho = density at each atom + // loop over neighbors of my atoms + + // The rho array is atomic for Half/Thread neighbor style + Kokkos::View::value> > rho = v_rho; + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + F_FLOAT rhotmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + //int j = d_neighbors_i[jj]; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + F_FLOAT p = sqrt(rsq)*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + if (NEWTON_PAIR || j < nlocal) { + const int d_type2rhor_ij = d_type2rhor(itype,jtype); + rho[j] += ((d_rhor_spline(d_type2rhor_ij,m,3)*p + d_rhor_spline(d_type2rhor_ij,m,4))*p + + d_rhor_spline(d_type2rhor_ij,m,5))*p + d_rhor_spline(d_type2rhor_ij,m,6); + } + } + + } + rho[i] += rhotmp; +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelB, const int &ii, EV_FLOAT& ev) const { + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + // if rho > rhomax (e.g. due to close approach of two atoms), + // will exceed table, so add linear term to conserve energy + + const int i = d_ilist[ii]; + const int itype = type(i); + + F_FLOAT p = d_rho[i]*rdrho + 1.0; + int m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + const int d_type2frho_i = d_type2frho[itype]; + d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); + if (EFLAG) { + F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); + if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); + if (eflag_global) ev.evdwl += phi; + if (eflag_atom) d_eatom[i] += phi; + } + +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelB, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMFSKernelB(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelAB, const int &ii, EV_FLOAT& ev) const { + + // rho = density at each atom + // loop over neighbors of my atoms + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + F_FLOAT rhotmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + //int j = d_neighbors_i[jj]; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cutforcesq) { + F_FLOAT p = sqrt(rsq)*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + rhotmp += ((d_rhor_spline(d_type2rhor_ji,m,3)*p + d_rhor_spline(d_type2rhor_ji,m,4))*p + + d_rhor_spline(d_type2rhor_ji,m,5))*p + d_rhor_spline(d_type2rhor_ji,m,6); + } + + } + d_rho[i] += rhotmp; + + // fp = derivative of embedding energy at each atom + // phi = embedding energy at each atom + // if rho > rhomax (e.g. due to close approach of two atoms), + // will exceed table, so add linear term to conserve energy + + F_FLOAT p = d_rho[i]*rdrho + 1.0; + int m = static_cast (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,1.0); + const int d_type2frho_i = d_type2frho[itype]; + d_fp[i] = (d_frho_spline(d_type2frho_i,m,0)*p + d_frho_spline(d_type2frho_i,m,1))*p + d_frho_spline(d_type2frho_i,m,2); + if (EFLAG) { + F_FLOAT phi = ((d_frho_spline(d_type2frho_i,m,3)*p + d_frho_spline(d_type2frho_i,m,4))*p + + d_frho_spline(d_type2frho_i,m,5))*p + d_frho_spline(d_type2frho_i,m,6); + if (d_rho[i] > rhomax) phi += d_fp[i] * (d_rho[i]-rhomax); + if (eflag_global) ev.evdwl += phi; + if (eflag_atom) d_eatom[i] += phi; + } + +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelAB, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMFSKernelAB(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +////Specialisation for Neighborlist types Half, HalfThread, Full +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelC, const int &ii, EV_FLOAT& ev) const { + + // The f array is atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + F_FLOAT fxtmp = 0.0; + F_FLOAT fytmp = 0.0; + F_FLOAT fztmp = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + //int j = d_neighbors_i[jj]; + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const int jtype = type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < cutforcesq) { + const F_FLOAT r = sqrt(rsq); + F_FLOAT p = r*rdr + 1.0; + int m = static_cast (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,1.0); + + // rhoip = derivative of (density at atom j due to atom i) + // rhojp = derivative of (density at atom i due to atom j) + // phi = pair potential energy + // phip = phi' + // z2 = phi * r + // z2p = (phi * r)' = (phi' r) + phi + // psip needs both fp[i] and fp[j] terms since r_ij appears in two + // terms of embed eng: Fi(sum rho_ij) and Fj(sum rho_ji) + // hence embed' = Fi(sum rho_ij) rhojp + Fj(sum rho_ji) rhoip + + const int d_type2rhor_ij = d_type2rhor(itype,jtype); + const F_FLOAT rhoip = (d_rhor_spline(d_type2rhor_ij,m,0)*p + d_rhor_spline(d_type2rhor_ij,m,1))*p + + d_rhor_spline(d_type2rhor_ij,m,2); + const int d_type2rhor_ji = d_type2rhor(jtype,itype); + const F_FLOAT rhojp = (d_rhor_spline(d_type2rhor_ji,m,0)*p + d_rhor_spline(d_type2rhor_ji,m,1))*p + + d_rhor_spline(d_type2rhor_ji,m,2); + const int d_type2z2r_ij = d_type2z2r(itype,jtype); + const F_FLOAT z2p = (d_z2r_spline(d_type2z2r_ij,m,0)*p + d_z2r_spline(d_type2z2r_ij,m,1))*p + + d_z2r_spline(d_type2z2r_ij,m,2); + const F_FLOAT z2 = ((d_z2r_spline(d_type2z2r_ij,m,3)*p + d_z2r_spline(d_type2z2r_ij,m,4))*p + + d_z2r_spline(d_type2z2r_ij,m,5))*p + d_z2r_spline(d_type2z2r_ij,m,6); + + const F_FLOAT recip = 1.0/r; + const F_FLOAT phi = z2*recip; + const F_FLOAT phip = z2p*recip - phi*recip; + const F_FLOAT psip = d_fp[i]*rhojp + d_fp[j]*rhoip + phip; + const F_FLOAT fpair = -psip*recip; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + + if ((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) && (NEWTON_PAIR || j < nlocal)) { + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; + } + + if (EVFLAG) { + if (eflag) { + ev.evdwl += (((NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD)&&(NEWTON_PAIR||(jtemplate ev_tally(ev,i,j,phi,fpair,delx,dely,delz); + } + + } + } + + a_f(i,0) += fxtmp; + a_f(i,1) += fytmp; + a_f(i,2) += fztmp; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::operator()(TagPairEAMFSKernelC, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairEAMFSKernelC(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairEAMFSKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int EFLAG = eflag; + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (EFLAG) { + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) v_eatom[i] += epairhalf; + if (NEWTON_PAIR || j < nlocal) v_eatom[j] += epairhalf; + } else { + v_eatom[i] += epairhalf; + } + } + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + if (NEIGHFLAG!=FULL) { + if (NEWTON_PAIR || i < nlocal) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + if (NEWTON_PAIR || j < nlocal) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } else { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +// Duplicate PairEAMFS functions + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs + read EAM Finnis-Sinclair file +------------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::coeff(int narg, char **arg) +{ + int i,j; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read EAM Finnis-Sinclair file + + if (fs) { + for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i]; + delete [] fs->elements; + delete [] fs->mass; + memory->destroy(fs->frho); + memory->destroy(fs->rhor); + memory->destroy(fs->z2r); + delete fs; + } + fs = new Fs(); + read_file(arg[2]); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < fs->nelements; j++) + if (strcmp(arg[i],fs->elements[j]) == 0) break; + if (j < fs->nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + // set mass of atom type if i = j + + int count = 0; + for (i = 1; i <= n; i++) { + for (j = i; j <= n; j++) { + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + if (i == j) atom->set_mass(i,fs->mass[map[i]]); + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + read a multi-element DYNAMO setfl file +------------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::read_file(char *filename) +{ + Fs *file = fs; + + // open potential file + + int me = comm->me; + FILE *fptr; + char line[MAXLINE]; + + if (me == 0) { + fptr = force->open_potential(filename); + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open EAM potential file %s",filename); + error->one(FLERR,str); + } + } + + // read and broadcast header + // extract element names from nelements line + + int n; + if (me == 0) { + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + sscanf(line,"%d",&file->nelements); + int nwords = atom->count_words(line); + if (nwords != file->nelements + 1) + error->all(FLERR,"Incorrect element names in EAM potential file"); + + char **words = new char*[file->nelements+1]; + nwords = 0; + strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + file->elements = new char*[file->nelements]; + for (int i = 0; i < file->nelements; i++) { + n = strlen(words[i]) + 1; + file->elements[i] = new char[n]; + strcpy(file->elements[i],words[i]); + } + delete [] words; + + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg %d %lg %lg", + &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); + } + + MPI_Bcast(&file->nrho,1,MPI_INT,0,world); + MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->nr,1,MPI_INT,0,world); + MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); + + file->mass = new double[file->nelements]; + memory->create(file->frho,file->nelements,file->nrho+1, + "pair:frho"); + memory->create(file->rhor,file->nelements,file->nelements, + file->nr+1,"pair:rhor"); + memory->create(file->z2r,file->nelements,file->nelements, + file->nr+1,"pair:z2r"); + + int i,j,tmp; + for (i = 0; i < file->nelements; i++) { + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg",&tmp,&file->mass[i]); + } + MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); + + if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); + MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); + + for (j = 0; j < file->nelements; j++) { + if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]); + MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + } + + for (i = 0; i < file->nelements; i++) + for (j = 0; j <= i; j++) { + if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); + MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + + // close the potential file + + if (me == 0) fclose(fptr); +} + +/* ---------------------------------------------------------------------- + copy read-in setfl potential to standard array format +------------------------------------------------------------------------- */ + +template +void PairEAMFSKokkos::file2array_fs() +{ + int i,j,m,n; + int ntypes = atom->ntypes; + + // set function params directly from fs file + + nrho = fs->nrho; + nr = fs->nr; + drho = fs->drho; + dr = fs->dr; + rhomax = (nrho-1) * drho; + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of fs elements + 1 for zero array + + nfrho = fs->nelements + 1; + memory->destroy(frho); + memory->create(frho,nfrho,nrho+1,"pair:frho"); + + // copy each element's frho to global frho + + for (i = 0; i < fs->nelements; i++) + for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m]; + + // add extra frho of zeroes for non-EAM types to point to (pair hybrid) + // this is necessary b/c fp is still computed for non-EAM atoms + + for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; + + // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to + // if atom type doesn't point to element (non-EAM atom in pair hybrid) + // then map it to last frho array of zeroes + + for (i = 1; i <= ntypes; i++) + if (map[i] >= 0) type2frho[i] = map[i]; + else type2frho[i] = nfrho-1; + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = square of # of fs elements + + nrhor = fs->nelements * fs->nelements; + memory->destroy(rhor); + memory->create(rhor,nrhor,nr+1,"pair:rhor"); + + // copy each element pair rhor to global rhor + + n = 0; + for (i = 0; i < fs->nelements; i++) + for (j = 0; j < fs->nelements; j++) { + for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m]; + n++; + } + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for fs files, there is a full NxN set of rhor arrays + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + for (i = 1; i <= ntypes; i++) + for (j = 1; j <= ntypes; j++) + type2rhor[i][j] = map[i] * fs->nelements + map[j]; + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of fs elements + + nz2r = fs->nelements * (fs->nelements+1) / 2; + memory->destroy(z2r); + memory->create(z2r,nz2r,nr+1,"pair:z2r"); + + // copy each element pair z2r to global z2r, only for I >= J + + n = 0; + for (i = 0; i < fs->nelements; i++) + for (j = 0; j <= i; j++) { + for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m]; + n++; + } + + // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to + // set of z2r arrays only fill lower triangular Nelement matrix + // value = n = sum over rows of lower-triangular matrix until reach irow,icol + // swap indices when irow < icol to stay lower triangular + // if map = -1 (non-EAM atom in pair hybrid): + // type2z2r is not used by non-opt + // but set type2z2r to 0 since accessed by opt + + int irow,icol; + for (i = 1; i <= ntypes; i++) { + for (j = 1; j <= ntypes; j++) { + irow = map[i]; + icol = map[j]; + if (irow == -1 || icol == -1) { + type2z2r[i][j] = 0; + continue; + } + if (irow < icol) { + irow = map[j]; + icol = map[i]; + } + n = 0; + for (m = 0; m < irow; m++) n += m + 1; + n += icol; + type2z2r[i][j] = n; + } + } +} + +/* ---------------------------------------------------------------------- */ + +template class PairEAMFSKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairEAMFSKokkos; +#endif \ No newline at end of file diff --git a/src/KOKKOS/pair_eam_fs_kokkos.h b/src/KOKKOS/pair_eam_fs_kokkos.h new file mode 100755 index 0000000000..9ffa121467 --- /dev/null +++ b/src/KOKKOS/pair_eam_fs_kokkos.h @@ -0,0 +1,183 @@ +/* -*- c++ -*- ---------------------------------------------------------- + + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/fs/kk,PairEAMFSKokkos) +PairStyle(eam/fs/kk/device,PairEAMFSKokkos) +PairStyle(eam/fs/kk/host,PairEAMFSKokkos) + +#else + +#ifndef LMP_PAIR_EAM_FS_KOKKOS_H +#define LMP_PAIR_EAM_FS_KOKKOS_H + +#include "stdio.h" +#include "pair_kokkos.h" +#include "pair_eam.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +struct TagPairEAMFSPackForwardComm{}; +struct TagPairEAMFSUnpackForwardComm{}; +struct TagPairEAMFSInitialize{}; + +template +struct TagPairEAMFSKernelA{}; + +template +struct TagPairEAMFSKernelB{}; + +template +struct TagPairEAMFSKernelAB{}; + +template +struct TagPairEAMFSKernelC{}; + +// Cannot use virtual inheritance on the GPU + +template +class PairEAMFSKokkos : public PairEAM { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairEAMFSKokkos(class LAMMPS *); + virtual ~PairEAMFSKokkos(); + virtual void compute(int, int); + void init_style(); + void coeff(int, char **); + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSPackForwardComm, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSUnpackForwardComm, const int&) const; + + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSInitialize, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelA, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelB, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelB, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelAB, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelAB, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelC, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairEAMFSKernelC, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, + int, int *); + virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); + virtual int pack_forward_comm(int, int *, double *, int, int *); + virtual void unpack_forward_comm(int, int, double *); + int pack_reverse_comm(int, int, double *); + void unpack_reverse_comm(int, int *, double *); + + protected: + void cleanup_copy(); + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_tagint_1d tag; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; + + DAT::tdual_ffloat_1d k_rho; + DAT::tdual_ffloat_1d k_fp; + DAT::t_ffloat_1d d_rho; + typename AT::t_ffloat_1d v_rho; + DAT::t_ffloat_1d d_fp; + HAT::t_ffloat_1d h_rho; + HAT::t_ffloat_1d h_fp; + + DAT::t_int_1d_randomread d_type2frho; + DAT::t_int_2d_randomread d_type2rhor; + DAT::t_int_2d_randomread d_type2z2r; + + typedef Kokkos::DualView tdual_ffloat_2d_n7; + typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; + typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7; + + t_ffloat_2d_n7_randomread d_frho_spline; + t_ffloat_2d_n7_randomread d_rhor_spline; + t_ffloat_2d_n7_randomread d_z2r_spline; + + virtual void file2array(); + void file2array_fs(); + void array2spline(); + void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); + void read_file(char *); + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d_randomread d_ilist; + typename ArrayTypes::t_int_1d_randomread d_numneigh; + //NeighListKokkos k_list; + + int iswap; + int first; + typename AT::t_int_2d d_sendlist; + typename AT::t_xfloat_1d_um v_buf; + + int neighflag,newton_pair; + int nlocal,nall,eflag,vflag; + + friend void pair_virial_fdotr_compute(PairEAMFSKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Cannot use chosen neighbor list style with pair eam/kk/fs + +That style is not supported by Kokkos. + +*/ diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp new file mode 100755 index 0000000000..2e72bdaa0f --- /dev/null +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -0,0 +1,908 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Stan Moore (SNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_sw_kokkos.h" +#include "kokkos.h" +#include "pair_kokkos.h" +#include "atom_kokkos.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list_kokkos.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 +#define DELTA 4 + +/* ---------------------------------------------------------------------- */ + +template +PairSWKokkos::PairSWKokkos(LAMMPS *lmp) : PairSW(lmp) +{ + THIRD = 1.0/3.0; + + respa_enable = 0; + + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +template +PairSWKokkos::~PairSWKokkos() +{ + if (!copymode) { + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + eatom = NULL; + vatom = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairSWKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } + + atomKK->sync(execution_space,datamask_read); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + tag = atomKK->k_tag.view(); + type = atomKK->k_type.view(); + nlocal = atom->nlocal; + newton_pair = force->newton_pair; + nall = atom->nlocal + atom->nghost; + + const int inum = list->inum; + const int ignum = inum + list->gnum; + NeighListKokkos* k_list = static_cast*>(list); + d_ilist = k_list->d_ilist; + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + + k_list->clean_copy(); + copymode = 1; + + EV_FLOAT ev; + EV_FLOAT ev_all; + + // loop over neighbor list of my atoms + + if (neighflag == HALF) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + } else if (neighflag == HALFTHREAD) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + } else if (neighflag == FULL) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + DeviceType::fence(); + ev_all += ev; + } + + if (eflag_global) eng_vdwl += ev_all.evdwl; + if (vflag_global) { + virial[0] += ev_all.v[0]; + virial[1] += ev_all.v[1]; + virial[2] += ev_all.v[2]; + virial[3] += ev_all.v[3]; + virial[4] += ev_all.v[4]; + virial[5] += ev_all.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::operator()(TagPairSWComputeHalf, const int &ii, EV_FLOAT& ev) const { + + // The f array is atomic + + Kokkos::View::value> > a_f = f; + + F_FLOAT delr1[3],delr2[3],fj[3],fk[3]; + F_FLOAT evdwl = 0.0; + F_FLOAT fpair = 0.0; + + const int i = d_ilist[ii]; + const tagint itag = tag[i]; + const int itype = d_map[type[i]]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + + // two-body interactions, skip half of them + + const int jnum = d_numneigh[i]; + + F_FLOAT fxtmpi = 0.0; + F_FLOAT fytmpi = 0.0; + F_FLOAT fztmpi = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const tagint jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp && x(j,1) < ytmp) continue; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue; + } + + const int jtype = d_map[type[j]]; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + const int ijparam = d_elem2param(itype,jtype,jtype); + if (rsq > d_params[ijparam].cutsq) continue; + + twobody(d_params[ijparam],rsq,fpair,eflag,evdwl); + + fxtmpi += delx*fpair; + fytmpi += dely*fpair; + fztmpi += delz*fpair; + a_f(j,0) -= delx*fpair; + a_f(j,1) -= dely*fpair; + a_f(j,2) -= delz*fpair; + + if (EVFLAG) { + if (eflag) ev.evdwl += evdwl; + if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + } + } + + const int jnumm1 = jnum - 1; + + for (int jj = 0; jj < jnumm1; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const int jtype = d_map[type[j]]; + const int ijparam = d_elem2param(itype,jtype,jtype); + delr1[0] = x(j,0) - xtmp; + delr1[1] = x(j,1) - ytmp; + delr1[2] = x(j,2) - ztmp; + const F_FLOAT rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + if (rsq1 > d_params[ijparam].cutsq) continue; + + F_FLOAT fxtmpj = 0.0; + F_FLOAT fytmpj = 0.0; + F_FLOAT fztmpj = 0.0; + + for (int kk = jj+1; kk < jnum; kk++) { + int k = d_neighbors(i,kk); + k &= NEIGHMASK; + const int ktype = d_map[type[k]]; + const int ikparam = d_elem2param(itype,ktype,ktype); + const int ijkparam = d_elem2param(itype,jtype,ktype); + + delr2[0] = x(k,0) - xtmp; + delr2[1] = x(k,1) - ytmp; + delr2[2] = x(k,2) - ztmp; + const F_FLOAT rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + + if (rsq2 > d_params[ikparam].cutsq) continue; + + threebody(d_params[ijparam],d_params[ikparam],d_params[ijkparam], + rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl); + + fxtmpi -= fj[0] + fk[0]; + fytmpi -= fj[1] + fk[1]; + fztmpi -= fj[2] + fk[2]; + fxtmpj += fj[0]; + fytmpj += fj[1]; + fztmpj += fj[2]; + a_f(k,0) += fk[0]; + a_f(k,1) += fk[1]; + a_f(k,2) += fk[2]; + + if (EVFLAG) { + if (eflag) ev.evdwl += evdwl; + if (vflag_either || eflag_atom) this->template ev_tally3(ev,i,j,k,evdwl,0.0,fj,fk,delr1,delr2); + } + } + + a_f(j,0) += fxtmpj; + a_f(j,1) += fytmpj; + a_f(j,2) += fztmpj; + } + + a_f(i,0) += fxtmpi; + a_f(i,1) += fytmpi; + a_f(i,2) += fztmpi; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::operator()(TagPairSWComputeHalf, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairSWComputeHalf(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::operator()(TagPairSWComputeFullA, const int &ii, EV_FLOAT& ev) const { + + F_FLOAT delr1[3],delr2[3],fj[3],fk[3]; + F_FLOAT evdwl = 0.0; + F_FLOAT fpair = 0.0; + + const int i = d_ilist[ii]; + + const tagint itag = tag[i]; + const int itype = d_map[type[i]]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + + // two-body interactions + + const int jnum = d_numneigh[i]; + + F_FLOAT fxtmpi = 0.0; + F_FLOAT fytmpi = 0.0; + F_FLOAT fztmpi = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const tagint jtag = tag[j]; + + const int jtype = d_map[type[j]]; + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + const int ijparam = d_elem2param(itype,jtype,jtype); + + if (rsq > d_params[ijparam].cutsq) continue; + + twobody(d_params[ijparam],rsq,fpair,eflag,evdwl); + + fxtmpi += delx*fpair; + fytmpi += dely*fpair; + fztmpi += delz*fpair; + + if (EVFLAG) { + if (eflag) ev.evdwl += 0.5*evdwl; + if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz); + } + } + + const int jnumm1 = jnum - 1; + + for (int jj = 0; jj < jnumm1; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + const int jtype = d_map[type[j]]; + const int ijparam = d_elem2param(itype,jtype,jtype); + delr1[0] = x(j,0) - xtmp; + delr1[1] = x(j,1) - ytmp; + delr1[2] = x(j,2) - ztmp; + const F_FLOAT rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + + if (rsq1 > d_params[ijparam].cutsq) continue; + + for (int kk = jj+1; kk < jnum; kk++) { + int k = d_neighbors(i,kk); + k &= NEIGHMASK; + const int ktype = d_map[type[k]]; + const int ikparam = d_elem2param(itype,ktype,ktype); + const int ijkparam = d_elem2param(itype,jtype,ktype); + + delr2[0] = x(k,0) - xtmp; + delr2[1] = x(k,1) - ytmp; + delr2[2] = x(k,2) - ztmp; + const F_FLOAT rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + + if (rsq2 > d_params[ikparam].cutsq) continue; + + threebody(d_params[ijparam],d_params[ikparam],d_params[ijkparam], + rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl); + + fxtmpi -= fj[0] + fk[0]; + fytmpi -= fj[1] + fk[1]; + fztmpi -= fj[2] + fk[2]; + + if (EVFLAG) { + if (eflag) ev.evdwl += evdwl; + if (vflag_either || eflag_atom) this->template ev_tally3(ev,i,j,k,evdwl,0.0,fj,fk,delr1,delr2); + } + } + } + + f(i,0) += fxtmpi; + f(i,1) += fytmpi; + f(i,2) += fztmpi; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::operator()(TagPairSWComputeFullA, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairSWComputeFullA(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::operator()(TagPairSWComputeFullB, const int &ii, EV_FLOAT& ev) const { + + F_FLOAT delr1[3],delr2[3],fj[3],fk[3]; + F_FLOAT evdwl = 0.0; + F_FLOAT fpair = 0.0; + + const int i = d_ilist[ii]; + + const int itype = d_map[type[i]]; + const X_FLOAT xtmpi = x(i,0); + const X_FLOAT ytmpi = x(i,1); + const X_FLOAT ztmpi = x(i,2); + + const int jnum = d_numneigh[i]; + + F_FLOAT fxtmpi = 0.0; + F_FLOAT fytmpi = 0.0; + F_FLOAT fztmpi = 0.0; + + for (int jj = 0; jj < jnum; jj++) { + int j = d_neighbors(i,jj); + j &= NEIGHMASK; + if (j >= nlocal) continue; + const int jtype = d_map[type[j]]; + const int jiparam = d_elem2param(jtype,itype,itype); + const X_FLOAT xtmpj = x(j,0); + const X_FLOAT ytmpj = x(j,1); + const X_FLOAT ztmpj = x(j,2); + + delr1[0] = xtmpi - xtmpj; + delr1[1] = ytmpi - ytmpj; + delr1[2] = ztmpi - ztmpj; + const F_FLOAT rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + + if (rsq1 > d_params[jiparam].cutsq) continue; + + const int j_jnum = d_numneigh[j]; + + for (int kk = 0; kk < j_jnum; kk++) { + int k = d_neighbors(j,kk); + k &= NEIGHMASK; + if (k == i) continue; + const int ktype = d_map[type[k]]; + const int jkparam = d_elem2param(jtype,ktype,ktype); + const int jikparam = d_elem2param(jtype,itype,ktype); + + delr2[0] = x(k,0) - xtmpj; + delr2[1] = x(k,1) - ytmpj; + delr2[2] = x(k,2) - ztmpj; + const F_FLOAT rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + + if (rsq2 > d_params[jkparam].cutsq) continue; + + if (vflag_atom) + threebody(d_params[jiparam],d_params[jkparam],d_params[jikparam], + rsq1,rsq2,delr1,delr2,fj,fk,eflag,evdwl); + else + threebodyj(d_params[jiparam],d_params[jkparam],d_params[jikparam], + rsq1,rsq2,delr1,delr2,fj); + + fxtmpi += fj[0]; + fytmpi += fj[1]; + fztmpi += fj[2]; + + if (EVFLAG) + if (vflag_atom || eflag_atom) ev_tally3_atom(ev,i,evdwl,0.0,fj,fk,delr1,delr2); + } + } + + f(i,0) += fxtmpi; + f(i,1) += fytmpi; + f(i,2) += fztmpi; +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::operator()(TagPairSWComputeFullB, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairSWComputeFullB(), ii, ev); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +template +void PairSWKokkos::coeff(int narg, char **arg) +{ + PairSW::coeff(narg,arg); + + // sync map + + int n = atom->ntypes; + + DAT::tdual_int_1d k_map = DAT::tdual_int_1d("pair:map",n+1); + HAT::t_int_1d h_map = k_map.h_view; + + for (int i = 1; i <= n; i++) + h_map[i] = map[i]; + + k_map.template modify(); + k_map.template sync(); + + d_map = k_map.d_view; +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairSWKokkos::init_style() +{ + PairSW::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + // always request a full neighbor list + + if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + if (neighflag == FULL) + neighbor->requests[irequest]->ghost = 1; + else + neighbor->requests[irequest]->ghost = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with pair sw/kk"); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairSWKokkos::setup() +{ + PairSW::setup(); + + // sync elem2param and params + + tdual_int_3d k_elem2param = tdual_int_3d("pair:elem2param",nelements,nelements,nelements); + t_host_int_3d h_elem2param = k_elem2param.h_view; + + tdual_param_1d k_params = tdual_param_1d("pair:params",nparams); + t_host_param_1d h_params = k_params.h_view; + + for (int i = 0; i < nelements; i++) + for (int j = 0; j < nelements; j++) + for (int k = 0; k < nelements; k++) + h_elem2param(i,j,k) = elem2param[i][j][k]; + + for (int m = 0; m < nparams; m++) + h_params[m] = params[m]; + + k_elem2param.template modify(); + k_elem2param.template sync(); + k_params.template modify(); + k_params.template sync(); + + d_elem2param = k_elem2param.d_view; + d_params = k_params.d_view; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::twobody(const Param& param, const F_FLOAT& rsq, F_FLOAT& fforce, + const int& eflag, F_FLOAT& eng) const +{ + F_FLOAT r,rinvsq,rp,rq,rainv,rainvsq,expsrainv; + + r = sqrt(rsq); + rinvsq = 1.0/rsq; + rp = pow(r,-param.powerp); + rq = pow(r,-param.powerq); + rainv = 1.0 / (r - param.cut); + rainvsq = rainv*rainv*r; + expsrainv = exp(param.sigma * rainv); + fforce = (param.c1*rp - param.c2*rq + + (param.c3*rp -param.c4*rq) * rainvsq) * expsrainv * rinvsq; + if (eflag) eng = (param.c5*rp - param.c6*rq) * expsrainv; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::threebody(const Param& paramij, const Param& paramik, const Param& paramijk, + const F_FLOAT& rsq1, const F_FLOAT& rsq2, + F_FLOAT *delr1, F_FLOAT *delr2, + F_FLOAT *fj, F_FLOAT *fk, const int& eflag, F_FLOAT& eng) const +{ + F_FLOAT r1,rinvsq1,rainv1,gsrainv1,gsrainvsq1,expgsrainv1; + F_FLOAT r2,rinvsq2,rainv2,gsrainv2,gsrainvsq2,expgsrainv2; + F_FLOAT rinv12,cs,delcs,delcssq,facexp,facrad,frad1,frad2; + F_FLOAT facang,facang12,csfacang,csfac1,csfac2; + + r1 = sqrt(rsq1); + rinvsq1 = 1.0/rsq1; + rainv1 = 1.0/(r1 - paramij.cut); + gsrainv1 = paramij.sigma_gamma * rainv1; + gsrainvsq1 = gsrainv1*rainv1/r1; + expgsrainv1 = exp(gsrainv1); + + r2 = sqrt(rsq2); + rinvsq2 = 1.0/rsq2; + rainv2 = 1.0/(r2 - paramik.cut); + gsrainv2 = paramik.sigma_gamma * rainv2; + gsrainvsq2 = gsrainv2*rainv2/r2; + expgsrainv2 = exp(gsrainv2); + + rinv12 = 1.0/(r1*r2); + cs = (delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]) * rinv12; + delcs = cs - paramijk.costheta; + delcssq = delcs*delcs; + + facexp = expgsrainv1*expgsrainv2; + + // facrad = sqrt(paramij.lambda_epsilon*paramik.lambda_epsilon) * + // facexp*delcssq; + + facrad = paramijk.lambda_epsilon * facexp*delcssq; + frad1 = facrad*gsrainvsq1; + frad2 = facrad*gsrainvsq2; + facang = paramijk.lambda_epsilon2 * facexp*delcs; + facang12 = rinv12*facang; + csfacang = cs*facang; + csfac1 = rinvsq1*csfacang; + + fj[0] = delr1[0]*(frad1+csfac1)-delr2[0]*facang12; + fj[1] = delr1[1]*(frad1+csfac1)-delr2[1]*facang12; + fj[2] = delr1[2]*(frad1+csfac1)-delr2[2]*facang12; + + csfac2 = rinvsq2*csfacang; + + fk[0] = delr2[0]*(frad2+csfac2)-delr1[0]*facang12; + fk[1] = delr2[1]*(frad2+csfac2)-delr1[1]*facang12; + fk[2] = delr2[2]*(frad2+csfac2)-delr1[2]*facang12; + + if (eflag) eng = facrad; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::threebodyj(const Param& paramij, const Param& paramik, const Param& paramijk, + const F_FLOAT& rsq1, const F_FLOAT& rsq2, F_FLOAT *delr1, F_FLOAT *delr2, F_FLOAT *fj) const +{ + F_FLOAT r1,rinvsq1,rainv1,gsrainv1,gsrainvsq1,expgsrainv1; + F_FLOAT r2, rainv2, gsrainv2, expgsrainv2; + F_FLOAT rinv12,cs,delcs,delcssq,facexp,facrad,frad1; + F_FLOAT facang,facang12,csfacang,csfac1; + + r1 = sqrt(rsq1); + rinvsq1 = 1.0/rsq1; + rainv1 = 1.0/(r1 - paramij.cut); + gsrainv1 = paramij.sigma_gamma * rainv1; + gsrainvsq1 = gsrainv1*rainv1/r1; + expgsrainv1 = exp(gsrainv1); + + r2 = sqrt(rsq2); + rainv2 = 1.0/(r2 - paramik.cut); + gsrainv2 = paramik.sigma_gamma * rainv2; + expgsrainv2 = exp(gsrainv2); + + rinv12 = 1.0/(r1*r2); + cs = (delr1[0]*delr2[0] + delr1[1]*delr2[1] + delr1[2]*delr2[2]) * rinv12; + delcs = cs - paramijk.costheta; + delcssq = delcs*delcs; + + facexp = expgsrainv1*expgsrainv2; + + // facrad = sqrt(paramij.lambda_epsilon*paramik.lambda_epsilon) * + // facexp*delcssq; + + facrad = paramijk.lambda_epsilon * facexp*delcssq; + frad1 = facrad*gsrainvsq1; + facang = paramijk.lambda_epsilon2 * facexp*delcs; + facang12 = rinv12*facang; + csfacang = cs*facang; + csfac1 = rinvsq1*csfacang; + + fj[0] = delr1[0]*(frad1+csfac1)-delr2[0]*facang12; + fj[1] = delr1[1]*(frad1+csfac1)-delr2[1]*facang12; + fj[2] = delr1[2]*(frad1+csfac1)-delr2[2]*facang12; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for half/thread neighbor list + + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + v_eatom[i] += epairhalf; + if (NEIGHFLAG != FULL) + v_eatom[j] += epairhalf; + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG != FULL) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + + if (NEIGHFLAG != FULL) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + called by SW and hbond potentials, newton_pair is always on + virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk + ------------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::ev_tally3(EV_FLOAT &ev, const int &i, const int &j, int &k, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const +{ + F_FLOAT epairthird,v[6]; + + const int VFLAG = vflag_either; + +// The eatom and vatom arrays are atomic for half/thread neighbor list + + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (eflag_atom) { + epairthird = THIRD * (evdwl + ecoul); + v_eatom[i] += epairthird; + if (NEIGHFLAG != FULL) { + v_eatom[j] += epairthird; + v_eatom[k] += epairthird; + } + } + + if (VFLAG) { + v[0] = drji[0]*fj[0] + drki[0]*fk[0]; + v[1] = drji[1]*fj[1] + drki[1]*fk[1]; + v[2] = drji[2]*fj[2] + drki[2]*fk[2]; + v[3] = drji[0]*fj[1] + drki[0]*fk[1]; + v[4] = drji[0]*fj[2] + drki[0]*fk[2]; + v[5] = drji[1]*fj[2] + drki[1]*fk[2]; + + if (vflag_global) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } + + if (vflag_atom) { + v_vatom(i,0) += THIRD*v[0]; v_vatom(i,1) += THIRD*v[1]; + v_vatom(i,2) += THIRD*v[2]; v_vatom(i,3) += THIRD*v[3]; + v_vatom(i,4) += THIRD*v[4]; v_vatom(i,5) += THIRD*v[5]; + + if (NEIGHFLAG != FULL) { + v_vatom(j,0) += THIRD*v[0]; v_vatom(j,1) += THIRD*v[1]; + v_vatom(j,2) += THIRD*v[2]; v_vatom(j,3) += THIRD*v[3]; + v_vatom(j,4) += THIRD*v[4]; v_vatom(j,5) += THIRD*v[5]; + + v_vatom(k,0) += THIRD*v[0]; v_vatom(k,1) += THIRD*v[1]; + v_vatom(k,2) += THIRD*v[2]; v_vatom(k,3) += THIRD*v[3]; + v_vatom(k,4) += THIRD*v[4]; v_vatom(k,5) += THIRD*v[5]; + } + } + } +} + +/* ---------------------------------------------------------------------- + tally eng_vdwl and virial into global and per-atom accumulators + called by SW and hbond potentials, newton_pair is always on + virial = riFi + rjFj + rkFk = (rj-ri) Fj + (rk-ri) Fk = drji*fj + drki*fk + ------------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairSWKokkos::ev_tally3_atom(EV_FLOAT &ev, const int &i, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const +{ + F_FLOAT epairthird,v[6]; + + const int VFLAG = vflag_either; + + if (eflag_atom) { + epairthird = THIRD * (evdwl + ecoul); + d_eatom[i] += epairthird; + } + + if (VFLAG) { + v[0] = drji[0]*fj[0] + drki[0]*fk[0]; + v[1] = drji[1]*fj[1] + drki[1]*fk[1]; + v[2] = drji[2]*fj[2] + drki[2]*fk[2]; + v[3] = drji[0]*fj[1] + drki[0]*fk[1]; + v[4] = drji[0]*fj[2] + drki[0]*fk[2]; + v[5] = drji[1]*fj[2] + drki[1]*fk[2]; + + if (vflag_atom) { + d_vatom(i,0) += THIRD*v[0]; d_vatom(i,1) += THIRD*v[1]; + d_vatom(i,2) += THIRD*v[2]; d_vatom(i,3) += THIRD*v[3]; + d_vatom(i,4) += THIRD*v[4]; d_vatom(i,5) += THIRD*v[5]; + } + } +} + +template class PairSWKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairSWKokkos; +#endif \ No newline at end of file diff --git a/src/KOKKOS/pair_sw_kokkos.h b/src/KOKKOS/pair_sw_kokkos.h new file mode 100755 index 0000000000..a73008ee1c --- /dev/null +++ b/src/KOKKOS/pair_sw_kokkos.h @@ -0,0 +1,150 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(sw/kk,PairSWKokkos) +PairStyle(sw/kk/device,PairSWKokkos) +PairStyle(sw/kk/host,PairSWKokkos) + +#else + +#ifndef LMP_PAIR_SW_KOKKOS_H +#define LMP_PAIR_SW_KOKKOS_H + +#include "pair_sw.h" +#include "pair_kokkos.h" + +template +struct TagPairSWComputeHalf{}; + +template +struct TagPairSWComputeFullA{}; + +template +struct TagPairSWComputeFullB{}; + +namespace LAMMPS_NS { + +template +class PairSWKokkos : public PairSW { + public: + enum {EnabledNeighFlags=FULL}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairSWKokkos(class LAMMPS *); + virtual ~PairSWKokkos(); + virtual void compute(int, int); + virtual void coeff(int, char **); + virtual void init_style(); + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairSWComputeHalf, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairSWComputeHalf, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairSWComputeFullA, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairSWComputeFullA, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairSWComputeFullB, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairSWComputeFullB, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally3(EV_FLOAT &ev, const int &i, const int &j, int &k, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const; + + KOKKOS_INLINE_FUNCTION + void ev_tally3_atom(EV_FLOAT &ev, const int &i, + const F_FLOAT &evdwl, const F_FLOAT &ecoul, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drki) const; + + protected: + typedef Kokkos::DualView tdual_int_3d; + typedef typename tdual_int_3d::t_dev_const_randomread t_int_3d_randomread; + typedef typename tdual_int_3d::t_host t_host_int_3d; + + t_int_3d_randomread d_elem2param; + DAT::t_int_1d_randomread d_map; + + typedef Kokkos::DualView tdual_param_1d; + typedef typename tdual_param_1d::t_dev t_param_1d; + typedef typename tdual_param_1d::t_host t_host_param_1d; + + t_param_1d d_params; + + virtual void setup(); + void twobody(const Param&, const F_FLOAT&, F_FLOAT&, const int&, F_FLOAT&) const; + void threebody(const Param&, const Param&, const Param&, const F_FLOAT&, const F_FLOAT&, F_FLOAT *, F_FLOAT *, + F_FLOAT *, F_FLOAT *, const int&, F_FLOAT&) const; + void threebodyj(const Param&, const Param&, const Param&, const F_FLOAT&, const F_FLOAT&, F_FLOAT *, F_FLOAT *, + F_FLOAT *) const; + + typename ArrayTypes::t_x_array_randomread x; + typename ArrayTypes::t_f_array f; + typename ArrayTypes::t_tagint_1d tag; + typename ArrayTypes::t_int_1d_randomread type; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; + + DAT::t_int_1d_randomread d_type2frho; + DAT::t_int_2d_randomread d_type2rhor; + DAT::t_int_2d_randomread d_type2z2r; + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d_randomread d_ilist; + typename ArrayTypes::t_int_1d_randomread d_numneigh; + //NeighListKokkos k_list; + + int neighflag,newton_pair; + int nlocal,nall,eflag,vflag; + + int inum; + + friend void pair_virial_fdotr_compute(PairSWKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp new file mode 100755 index 0000000000..d32e18e66f --- /dev/null +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -0,0 +1,1202 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Ray Shan (SNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_tersoff_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "neigh_list_kokkos.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + +/* ---------------------------------------------------------------------- */ + +template +PairTersoffKokkos::PairTersoffKokkos(LAMMPS *lmp) : PairTersoff(lmp) +{ + THIRD = 1.0/3.0; + + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairTersoffKokkos::~PairTersoffKokkos() +{ + if (!copymode) { + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffKokkos::allocate() +{ + PairTersoff::allocate(); + + int n = atom->ntypes; + + k_params = Kokkos::DualView + ("PairTersoff::paramskk",n+1,n+1,n+1); + paramskk = k_params.d_view; +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairTersoffKokkos::init_style() +{ + PairTersoff::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) { + //if (neighflag == FULL || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + if (neighflag == FULL) + neighbor->requests[irequest]->ghost = 1; + else + neighbor->requests[irequest]->ghost = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/kk"); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffKokkos::setup() +{ + PairTersoff::setup(); + + int i,j,k,m; + int n = atom->ntypes; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + for (k = 1; k <= n; k++) { + m = elem2param[i-1][j-1][k-1]; + k_params.h_view(i,j,k).powerm = params[m].powerm; + k_params.h_view(i,j,k).gamma = params[m].gamma; + k_params.h_view(i,j,k).lam3 = params[m].lam3; + k_params.h_view(i,j,k).c = params[m].c; + k_params.h_view(i,j,k).d = params[m].d; + k_params.h_view(i,j,k).h = params[m].h; + k_params.h_view(i,j,k).powern = params[m].powern; + k_params.h_view(i,j,k).beta = params[m].beta; + k_params.h_view(i,j,k).lam2 = params[m].lam2; + k_params.h_view(i,j,k).bigb = params[m].bigb; + k_params.h_view(i,j,k).bigr = params[m].bigr; + k_params.h_view(i,j,k).bigd = params[m].bigd; + k_params.h_view(i,j,k).lam1 = params[m].lam1; + k_params.h_view(i,j,k).biga = params[m].biga; + k_params.h_view(i,j,k).cutsq = params[m].cutsq; + k_params.h_view(i,j,k).c1 = params[m].c1; + k_params.h_view(i,j,k).c2 = params[m].c2; + k_params.h_view(i,j,k).c3 = params[m].c3; + k_params.h_view(i,j,k).c4 = params[m].c4; + } + + k_params.template modify(); + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } + + atomKK->sync(execution_space,datamask_read); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + tag = atomKK->k_tag.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + + const int inum = list->inum; + const int ignum = inum + list->gnum; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + k_list->clean_copy(); + copymode = 1; + + EV_FLOAT ev; + EV_FLOAT ev_all; + + if (neighflag == HALF) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + } else if (neighflag == HALFTHREAD) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + } else if (neighflag == FULL) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + DeviceType::fence(); + ev_all += ev; + } + + if (eflag_global) eng_vdwl += ev_all.evdwl; + if (vflag_global) { + virial[0] += ev_all.v[0]; + virial[1] += ev_all.v[1]; + virial[2] += ev_all.v[2]; + virial[3] += ev_all.v[3]; + virial[4] += ev_all.v[4]; + virial[5] += ev_all.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::operator()(TagPairTersoffComputeHalf, const int &ii, EV_FLOAT& ev) const { + + // The f array is atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + + const int i = d_ilist[ii]; + if (i >= nlocal) return; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + const int itag = tag(i); + + int j,k,jj,kk,jtag,jtype,ktype; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fi[3], fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + // repulsive + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + jtag = tag(j); + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp && x(j,1) < ytmp) continue; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue; + } + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq; + + if (rsq > cutsq) continue; + + const F_FLOAT r = sqrt(rsq); + const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); + const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); + const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); + const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * + (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r; + const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + + a_f(i,0) += delx*frep; + a_f(i,1) += dely*frep; + a_f(i,2) += delz*frep; + a_f(j,0) -= delx*frep; + a_f(j,1) -= dely*frep; + a_f(j,2) -= delz*frep; + + if (EVFLAG) { + if (eflag) ev.evdwl += eng; + if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,eng,frep,delx,dely,delz); + } + } + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + delx1 = xtmp - x(j,0); + dely1 = ytmp - x(j,1); + delz1 = ztmp - x(j,2); + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(itype,jtype,jtype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); + const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); + const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + a_f(i,0) += delx1*fatt; + a_f(i,1) += dely1*fatt; + a_f(i,2) += delz1*fatt; + a_f(j,0) -= delx1*fatt; + a_f(j,1) -= dely1*fatt; + a_f(j,2) -= delz1*fatt; + + if (EVFLAG) { + if (eflag) ev.evdwl += eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fi,fj,fk); + + a_f(i,0) += fi[0]; + a_f(i,1) += fi[1]; + a_f(i,2) += fi[2]; + a_f(j,0) += fj[0]; + a_f(j,1) += fj[1]; + a_f(j,2) += fj[2]; + a_f(k,0) += fk[0]; + a_f(k,1) += fk[1]; + a_f(k,2) += fk[2]; + + if (vflag_atom) { + F_FLOAT delrij[3], delrik[3]; + delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; + delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; + if (vflag_either) this->template v_tally3(ev,i,j,k,fj,fk,delrij,delrik); + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::operator()(TagPairTersoffComputeHalf, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffComputeHalf(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::operator()(TagPairTersoffComputeFullA, const int &ii, EV_FLOAT& ev) const { + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + int j,k,jj,kk,jtype,ktype; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fi[3], fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + // repulsive + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + const int jtype = type(j); + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq; + + if (rsq > cutsq) continue; + + const F_FLOAT r = sqrt(rsq); + const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); + const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); + const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); + const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * + (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r; + const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + + f(i,0) += delx*frep; + f(i,1) += dely*frep; + f(i,2) += delz*frep; + + if (EVFLAG) { + if (eflag) + ev.evdwl += 0.5*eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,frep,delx,dely,delz); + } + } + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + delx1 = xtmp - x(j,0); + dely1 = ytmp - x(j,1); + delz1 = ztmp - x(j,2); + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(itype,jtype,jtype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); + const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); + const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + f(i,0) += delx1*fatt; + f(i,1) += dely1*fatt; + f(i,2) += delz1*fatt; + + if (EVFLAG) { + if (eflag) ev.evdwl += 0.5*eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fi,fj,fk); + + f(i,0) += fi[0]; + f(i,1) += fi[1]; + f(i,2) += fi[2]; + + if (vflag_atom) { + F_FLOAT delrij[3], delrik[3]; + delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; + delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; + if (vflag_either) this->template v_tally3(ev,i,j,k,fj,fk,delrij,delrik); + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::operator()(TagPairTersoffComputeFullA, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffComputeFullA(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::operator()(TagPairTersoffComputeFullB, const int &ii, EV_FLOAT& ev) const { + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + int j,k,jj,kk,jtype,ktype,j_jnum; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + const int jnum = d_numneigh[i]; + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + if (j >= nlocal) continue; + jtype = type(j); + + delx1 = x(j,0) - xtmp; + dely1 = x(j,1) - ytmp; + delz1 = x(j,2) - ztmp; + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(jtype,itype,itype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + j_jnum = d_numneigh[j]; + + for (kk = 0; kk < j_jnum; kk++) { + k = d_neighbors(j,kk); + if (k == i) continue; + k &= NEIGHMASK; + ktype = type(k); + + delx2 = x(j,0) - x(k,0); + dely2 = x(j,1) - x(k,1); + delz2 = x(j,2) - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(jtype,itype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij); + const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij); + const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + f(i,0) -= delx1*fatt; + f(i,1) -= dely1*fatt; + f(i,2) -= delz1*fatt; + + if (EVFLAG) { + if (eflag) + ev.evdwl += 0.5 * eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < j_jnum; kk++) { + k = d_neighbors(j,kk); + if (k == i) continue; + k &= NEIGHMASK; + ktype = type(k); + + delx2 = x(j,0) - x(k,0); + dely2 = x(j,1) - x(k,1); + delz2 = x(j,2) - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(jtype,itype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fj,fk); + f(i,0) += fj[0]; + f(i,1) += fj[1]; + f(i,2) += fj[2]; + + if (vflag_atom) { + F_FLOAT delrji[3], delrjk[3]; + delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1; + delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2; + if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk); + } + + const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik); + const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij); + ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2, + rij,delx1,dely1,delz1,fk); + f(i,0) += fk[0]; + f(i,1) += fk[1]; + f(i,2) += fk[2]; + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::operator()(TagPairTersoffComputeFullB, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffComputeFullB(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos::ters_fc_k(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + const F_FLOAT ters_R = paramskk(i,j,k).bigr; + const F_FLOAT ters_D = paramskk(i,j,k).bigd; + + if (r < ters_R-ters_D) return 1.0; + if (r > ters_R+ters_D) return 0.0; + return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos::ters_dfc(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + const F_FLOAT ters_R = paramskk(i,j,k).bigr; + const F_FLOAT ters_D = paramskk(i,j,k).bigd; + + if (r < ters_R-ters_D) return 0.0; + if (r > ters_R+ters_D) return 0.0; + return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos::bondorder(const int &i, const int &j, const int &k, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const +{ + F_FLOAT arg, ex_delr; + + const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik); + + if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else arg = paramskk(i,j,k).lam3 * (rij-rik); + + if (arg > 69.0776) ex_delr = 1.e30; + else if (arg < -69.0776) ex_delr = 0.0; + else ex_delr = exp(arg); + + return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos:: + ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const +{ + const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c; + const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d; + const F_FLOAT hcth = paramskk(i,j,k).h - cos; + + return paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos:: + ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const +{ + + const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c; + const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d; + const F_FLOAT hcth = paramskk(i,j,k).h - cos; + const F_FLOAT numerator = -2.0 * ters_c * hcth; + const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth); + return paramskk(i,j,k).gamma * numerator * denominator * denominator; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos::ters_fa_k(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; + return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) + * ters_fc_k(i,j,k,r); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos::ters_dfa(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; + return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) * + (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) - ters_dfc(i,j,k,r)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos::ters_bij_k(const int &i, const int &j, + const int &k, const F_FLOAT &bo) const +{ + const F_FLOAT tmp = paramskk(i,j,k).beta * bo; + if (tmp > paramskk(i,j,k).c1) return 1.0/sqrt(tmp); + if (tmp > paramskk(i,j,k).c2) + return (1.0 - pow(tmp,-paramskk(i,j,k).powern) / (2.0*paramskk(i,j,k).powern))/sqrt(tmp); + if (tmp < paramskk(i,j,k).c4) return 1.0; + if (tmp < paramskk(i,j,k).c3) + return 1.0 - pow(tmp,paramskk(i,j,k).powern)/(2.0*paramskk(i,j,k).powern); + return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffKokkos::ters_dbij(const int &i, const int &j, + const int &k, const F_FLOAT &bo) const +{ + const F_FLOAT tmp = paramskk(i,j,k).beta * bo; + if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5*pow(tmp,-1.5); + if (tmp > paramskk(i,j,k).c2) + return paramskk(i,j,k).beta * (-0.5*pow(tmp,-1.5) * + (1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * + pow(tmp,-paramskk(i,j,k).powern))); + if (tmp < paramskk(i,j,k).c4) return 0.0; + if (tmp < paramskk(i,j,k).c3) + return -0.5*paramskk(i,j,k).beta * pow(tmp,paramskk(i,j,k).powern-1.0); + + const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern); + return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern)))*tmp_n / bo; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::ters_dthb( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const +{ + // from PairTersoff::attractive + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + //rij = sqrt(rsq1); + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + //rik = sqrt(rsq2); + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + // from PairTersoff::ters_zetaterm_d + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + // from PairTersoff::costheta_d + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi); + vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi); + vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi); + vec3_scale(prefactor,fi,fi); + + vec3_scale(fc*dgijk*ex_delr,dcosfj,fj); + vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj); + vec3_scale(prefactor,fj,fj); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::ters_dthbj( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fj, F_FLOAT *fk) const +{ + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(fc*dgijk*ex_delr,dcosfj,fj); + vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj); + vec3_scale(prefactor,fj,fj); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::ters_dthbk( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fk) const +{ + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + v_eatom[i] += epairhalf; + if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf; + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG != FULL) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + + if (NEIGHFLAG != FULL) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const +{ + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_vatom = k_vatom.view(); + + F_FLOAT v[6]; + + v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]); + v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]); + v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]); + v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]); + v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]); + v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]); + + if (vflag_global) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } + + if (vflag_atom) { + v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2]; + v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5]; + if (NEIGHFLAG != FULL) { + v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2]; + v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5]; + v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2]; + v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5]; + } + } + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffKokkos::v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const +{ + F_FLOAT v[6]; + + v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]); + v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]); + v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]); + v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]); + v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]); + v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]); + + if (vflag_global) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } + + if (vflag_atom) { + d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2]; + d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int PairTersoffKokkos::sbmask(const int& j) const { + return j >> SBBITS & 3; +} + +template class PairTersoffKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairTersoffKokkos; +#endif diff --git a/src/KOKKOS/pair_tersoff_kokkos.h b/src/KOKKOS/pair_tersoff_kokkos.h new file mode 100755 index 0000000000..ea02e374be --- /dev/null +++ b/src/KOKKOS/pair_tersoff_kokkos.h @@ -0,0 +1,220 @@ +/* -*- c++ -*- ---------------------------------------------------------- + + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/kk,PairTersoffKokkos) +PairStyle(tersoff/kk/device,PairTersoffKokkos) +PairStyle(tersoff/kk/host,PairTersoffKokkos) + +#else + +#ifndef LMP_PAIR_TERSOFF_KOKKOS_H +#define LMP_PAIR_TERSOFF_KOKKOS_H + +#include "stdio.h" +#include "pair_kokkos.h" +#include "pair_tersoff.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +struct TagPairTersoffComputeHalf{}; + +template +struct TagPairTersoffComputeFullA{}; + +template +struct TagPairTersoffComputeFullB{}; + +template +class PairTersoffKokkos : public PairTersoff { + public: + enum {EnabledNeighFlags=FULL}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairTersoffKokkos(class LAMMPS *); + virtual ~PairTersoffKokkos(); + virtual void compute(int, int); + void init_style(); + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffComputeHalf, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffComputeHalf, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffComputeFullA, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffComputeFullA, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffComputeFullB, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffComputeFullB, const int&) const; + + KOKKOS_INLINE_FUNCTION + double ters_fc_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + + KOKKOS_INLINE_FUNCTION + double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + + KOKKOS_INLINE_FUNCTION + double bondorder(const int &i, const int &j, const int &k, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const; + + KOKKOS_INLINE_FUNCTION + double ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + + KOKKOS_INLINE_FUNCTION + double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthbj(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fj, F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthbk(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + double vec3_dot(const F_FLOAT x[3], const double y[3]) const { + return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_add(const F_FLOAT x[3], const double y[3], double * const z) const { + z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_scale(const F_FLOAT k, const double x[3], double y[3]) const { + y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_scaleadd(const F_FLOAT k, const double x[3], const double y[3], double * const z) const { + z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2]; + } + + KOKKOS_INLINE_FUNCTION + int sbmask(const int& j) const; + + struct params_ters{ + params_ters(){powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0; + bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;}; + params_ters(int i){powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0; + bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;}; + F_FLOAT powerm, gamma, lam3, c, d, h, powern, beta, lam2, bigb, bigr, + bigd, lam1, biga, cutsq, c1, c2, c3, c4; + }; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + template + KOKKOS_INLINE_FUNCTION + void v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const; + + KOKKOS_INLINE_FUNCTION + void v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const; + + void allocate(); + void setup(); + + protected: + void cleanup_copy(); + + typedef Kokkos::DualView tdual_int_3d; + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const paramskk; + // hardwired to space for 15 atom types + //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_tagint_1d tag; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; + + typedef Kokkos::DualView tdual_ffloat_2d_n7; + typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; + typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7; + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d_randomread d_ilist; + typename ArrayTypes::t_int_1d_randomread d_numneigh; + //NeighListKokkos k_list; + + class AtomKokkos *atomKK; + int neighflag,newton_pair; + int nlocal,nall,eflag,vflag; + + friend void pair_virial_fdotr_compute(PairTersoffKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp new file mode 100755 index 0000000000..ff84e2d392 --- /dev/null +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -0,0 +1,1208 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Ray Shan (SNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_tersoff_mod_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "neigh_list_kokkos.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + +/* ---------------------------------------------------------------------- */ + +template +PairTersoffMODKokkos::PairTersoffMODKokkos(LAMMPS *lmp) : PairTersoffMOD(lmp) +{ + THIRD = 1.0/3.0; + + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; +} + +/* ---------------------------------------------------------------------- */ + +template +PairTersoffMODKokkos::~PairTersoffMODKokkos() +{ + if (!copymode) { + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffMODKokkos::allocate() +{ + PairTersoffMOD::allocate(); + + int n = atom->ntypes; + + k_params = Kokkos::DualView + ("PairTersoffMOD::paramskk",n+1,n+1,n+1); + paramskk = k_params.d_view; +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairTersoffMODKokkos::init_style() +{ + PairTersoffMOD::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + if (neighflag == FULL) + neighbor->requests[irequest]->ghost = 1; + else + neighbor->requests[irequest]->ghost = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/kk"); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffMODKokkos::setup() +{ + PairTersoffMOD::setup(); + + int i,j,k,m; + int n = atom->ntypes; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + for (k = 1; k <= n; k++) { + m = elem2param[i-1][j-1][k-1]; + k_params.h_view(i,j,k).powerm = params[m].powerm; + k_params.h_view(i,j,k).lam3 = params[m].lam3; + k_params.h_view(i,j,k).h = params[m].h; + k_params.h_view(i,j,k).powern = params[m].powern; + k_params.h_view(i,j,k).beta = params[m].beta; + k_params.h_view(i,j,k).lam2 = params[m].lam2; + k_params.h_view(i,j,k).bigb = params[m].bigb; + k_params.h_view(i,j,k).bigr = params[m].bigr; + k_params.h_view(i,j,k).bigd = params[m].bigd; + k_params.h_view(i,j,k).lam1 = params[m].lam1; + k_params.h_view(i,j,k).biga = params[m].biga; + k_params.h_view(i,j,k).cutsq = params[m].cutsq; + k_params.h_view(i,j,k).c1 = params[m].c1; + k_params.h_view(i,j,k).c2 = params[m].c2; + k_params.h_view(i,j,k).c3 = params[m].c3; + k_params.h_view(i,j,k).c4 = params[m].c4; + k_params.h_view(i,j,k).c5 = params[m].c5; + k_params.h_view(i,j,k).ca1 = params[m].ca1; + k_params.h_view(i,j,k).ca4 = params[m].ca4; + k_params.h_view(i,j,k).powern_del = params[m].powern_del; + } + + k_params.template modify(); + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffMODKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } + + atomKK->sync(execution_space,datamask_read); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + tag = atomKK->k_tag.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + + const int inum = list->inum; + const int ignum = inum + list->gnum; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + k_list->clean_copy(); + copymode = 1; + + EV_FLOAT ev; + EV_FLOAT ev_all; + + if (neighflag == HALF) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + } else if (neighflag == HALFTHREAD) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + } else if (neighflag == FULL) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + DeviceType::fence(); + ev_all += ev; + } + + if (eflag_global) eng_vdwl += ev_all.evdwl; + if (vflag_global) { + virial[0] += ev_all.v[0]; + virial[1] += ev_all.v[1]; + virial[2] += ev_all.v[2]; + virial[3] += ev_all.v[3]; + virial[4] += ev_all.v[4]; + virial[5] += ev_all.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf, const int &ii, EV_FLOAT& ev) const { + + // The f array is atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + + const int i = d_ilist[ii]; + if (i >= nlocal) return; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + const int itag = tag(i); + + int j,k,jj,kk,jtag,jtype,ktype; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fi[3], fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + // repulsive + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + jtag = tag(j); + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp && x(j,1) < ytmp) continue; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue; + } + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq; + + if (rsq > cutsq) continue; + + const F_FLOAT r = sqrt(rsq); + const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); + const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); + const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); + const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * + (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r; + const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + + a_f(i,0) += delx*frep; + a_f(i,1) += dely*frep; + a_f(i,2) += delz*frep; + a_f(j,0) -= delx*frep; + a_f(j,1) -= dely*frep; + a_f(j,2) -= delz*frep; + + if (EVFLAG) { + if (eflag) ev.evdwl += eng; + if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,eng,frep,delx,dely,delz); + } + } + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + delx1 = xtmp - x(j,0); + dely1 = ytmp - x(j,1); + delz1 = ztmp - x(j,2); + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(itype,jtype,jtype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); + const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); + const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + a_f(i,0) += delx1*fatt; + a_f(i,1) += dely1*fatt; + a_f(i,2) += delz1*fatt; + a_f(j,0) -= delx1*fatt; + a_f(j,1) -= dely1*fatt; + a_f(j,2) -= delz1*fatt; + + if (EVFLAG) { + if (eflag) ev.evdwl += eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fi,fj,fk); + + a_f(i,0) += fi[0]; + a_f(i,1) += fi[1]; + a_f(i,2) += fi[2]; + a_f(j,0) += fj[0]; + a_f(j,1) += fj[1]; + a_f(j,2) += fj[2]; + a_f(k,0) += fk[0]; + a_f(k,1) += fk[1]; + a_f(k,2) += fk[2]; + + if (vflag_atom) { + F_FLOAT delrij[3], delrik[3]; + delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; + delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; + if (vflag_either) this->template v_tally3(ev,i,j,k,fj,fk,delrij,delrik); + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffMODComputeHalf(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeFullA, const int &ii, EV_FLOAT& ev) const { + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + int j,k,jj,kk,jtype,ktype; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fi[3], fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + // repulsive + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + const int jtype = type(j); + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq; + + if (rsq > cutsq) continue; + + const F_FLOAT r = sqrt(rsq); + const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); + const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); + const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); + const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * + (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r; + const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + + f(i,0) += delx*frep; + f(i,1) += dely*frep; + f(i,2) += delz*frep; + + if (EVFLAG) { + if (eflag) + ev.evdwl += 0.5*eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,frep,delx,dely,delz); + } + } + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + delx1 = xtmp - x(j,0); + dely1 = ytmp - x(j,1); + delz1 = ztmp - x(j,2); + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(itype,jtype,jtype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); + const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); + const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + f(i,0) += delx1*fatt; + f(i,1) += dely1*fatt; + f(i,2) += delz1*fatt; + + if (EVFLAG) { + if (eflag) ev.evdwl += 0.5*eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fi,fj,fk); + + f(i,0) += fi[0]; + f(i,1) += fi[1]; + f(i,2) += fi[2]; + + if (vflag_atom) { + F_FLOAT delrij[3], delrik[3]; + delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; + delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; + if (vflag_either) this->template v_tally3(ev,i,j,k,fj,fk,delrij,delrik); + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeFullA, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffMODComputeFullA(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeFullB, const int &ii, EV_FLOAT& ev) const { + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + int j,k,jj,kk,jtype,ktype,j_jnum; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + const int jnum = d_numneigh[i]; + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + if (j >= nlocal) continue; + jtype = type(j); + + delx1 = x(j,0) - xtmp; + dely1 = x(j,1) - ytmp; + delz1 = x(j,2) - ztmp; + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(jtype,itype,itype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + j_jnum = d_numneigh[j]; + + for (kk = 0; kk < j_jnum; kk++) { + k = d_neighbors(j,kk); + if (k == i) continue; + k &= NEIGHMASK; + ktype = type(k); + + delx2 = x(j,0) - x(k,0); + dely2 = x(j,1) - x(k,1); + delz2 = x(j,2) - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(jtype,itype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij); + const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij); + const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + f(i,0) -= delx1*fatt; + f(i,1) -= dely1*fatt; + f(i,2) -= delz1*fatt; + + if (EVFLAG) { + if (eflag) + ev.evdwl += 0.5 * eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < j_jnum; kk++) { + k = d_neighbors(j,kk); + if (k == i) continue; + k &= NEIGHMASK; + ktype = type(k); + + delx2 = x(j,0) - x(k,0); + dely2 = x(j,1) - x(k,1); + delz2 = x(j,2) - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(jtype,itype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fj,fk); + f(i,0) += fj[0]; + f(i,1) += fj[1]; + f(i,2) += fj[2]; + + if (vflag_atom) { + F_FLOAT delrji[3], delrjk[3]; + delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1; + delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2; + if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk); + } + + const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik); + const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij); + ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2, + rij,delx1,dely1,delz1,fk); + f(i,0) += fk[0]; + f(i,1) += fk[1]; + f(i,2) += fk[2]; + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeFullB, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffMODComputeFullB(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos::ters_fc_k(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + const F_FLOAT ters_R = paramskk(i,j,k).bigr; + const F_FLOAT ters_D = paramskk(i,j,k).bigd; + + if (r < ters_R-ters_D) return 1.0; + if (r > ters_R+ters_D) return 0.0; + return 0.5*(1.0 - 1.125*sin(MY_PI2*(r - ters_R)/ters_D) - + 0.125*sin(3.0*MY_PI2*(r - ters_R)/ters_D)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos::ters_dfc(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + const F_FLOAT ters_R = paramskk(i,j,k).bigr; + const F_FLOAT ters_D = paramskk(i,j,k).bigd; + + if (r < ters_R-ters_D) return 0.0; + if (r > ters_R+ters_D) return 0.0; + return -(0.375*MY_PI4/ters_D) * (3.0*cos(MY_PI2*(r - ters_R)/ters_D) + + cos(3.0*MY_PI2*(r - ters_R)/ters_D)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos::bondorder(const int &i, const int &j, const int &k, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const +{ + F_FLOAT arg, ex_delr; + + const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik); + + if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else arg = paramskk(i,j,k).lam3 * (rij-rik); + + if (arg > 69.0776) ex_delr = 1.e30; + else if (arg < -69.0776) ex_delr = 0.0; + else ex_delr = exp(arg); + + return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos:: + ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const +{ + const F_FLOAT ters_c1 = paramskk(i,j,k).c1; + const F_FLOAT ters_c2 = paramskk(i,j,k).c2; + const F_FLOAT ters_c3 = paramskk(i,j,k).c3; + const F_FLOAT ters_c4 = paramskk(i,j,k).c4; + const F_FLOAT ters_c5 = paramskk(i,j,k).c5; + const F_FLOAT tmp_h = (paramskk(i,j,k).h - cos)*(paramskk(i,j,k).h - cos); + + return ters_c1 + (ters_c2*tmp_h/(ters_c3 + tmp_h)) * + (1.0 + ters_c4*exp(-ters_c5*tmp_h)); + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos:: + ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const +{ + const F_FLOAT ters_c2 = paramskk(i,j,k).c2; + const F_FLOAT ters_c3 = paramskk(i,j,k).c3; + const F_FLOAT ters_c4 = paramskk(i,j,k).c4; + const F_FLOAT ters_c5 = paramskk(i,j,k).c5; + const F_FLOAT tmp_h = (paramskk(i,j,k).h - cos)*(paramskk(i,j,k).h - cos); + const F_FLOAT g1 = (paramskk(i,j,k).h - cos)/(ters_c3 + tmp_h); + const F_FLOAT g2 = exp(-ters_c5*tmp_h); + + return -2.0*ters_c2*g1*((1 + ters_c4*g2)*(1 + g1*(cos - paramskk(i,j,k).h)) - + tmp_h*ters_c4*ters_c5*g2); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos::ters_fa_k(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; + return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) + * ters_fc_k(i,j,k,r); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos::ters_dfa(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; + return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) * + (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) - ters_dfc(i,j,k,r)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos::ters_bij_k(const int &i, const int &j, + const int &k, const F_FLOAT &bo) const +{ + const F_FLOAT tmp = paramskk(i,j,k).beta * bo; + if (tmp > paramskk(i,j,k).ca1) + return pow(tmp, -paramskk(i,j,k).powern/(2.0*paramskk(i,j,k).powern_del)); + if (tmp < paramskk(i,j,k).ca4) + return 1.0; + return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern_del)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffMODKokkos::ters_dbij(const int &i, const int &j, + const int &k, const F_FLOAT &bo) const +{ + const F_FLOAT tmp = paramskk(i,j,k).beta * bo; + if (tmp > paramskk(i,j,k).ca1) + return -0.5*(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)* + pow(tmp,-0.5*(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)) / bo; + if (tmp < paramskk(i,j,k).ca4) + return 0.0; + + const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern); + return -0.5 *(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)* + pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern_del)))*tmp_n / bo; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::ters_dthb( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const +{ + // from PairTersoffMOD::attractive + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + //rij = sqrt(rsq1); + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + //rik = sqrt(rsq2); + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + // from PairTersoffMOD::ters_zetaterm_d + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + // from PairTersoffMOD::costheta_d + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi); + vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi); + vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi); + vec3_scale(prefactor,fi,fi); + + vec3_scale(fc*dgijk*ex_delr,dcosfj,fj); + vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj); + vec3_scale(prefactor,fj,fj); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::ters_dthbj( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fj, F_FLOAT *fk) const +{ + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(fc*dgijk*ex_delr,dcosfj,fj); + vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj); + vec3_scale(prefactor,fj,fj); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::ters_dthbk( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fk) const +{ + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + v_eatom[i] += epairhalf; + if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf; + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG != FULL) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + + if (NEIGHFLAG != FULL) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const +{ + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_vatom = k_vatom.view(); + + F_FLOAT v[6]; + + v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]); + v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]); + v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]); + v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]); + v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]); + v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]); + + if (vflag_global) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } + + if (vflag_atom) { + v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2]; + v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5]; + if (NEIGHFLAG != FULL) { + v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2]; + v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5]; + v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2]; + v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5]; + } + } + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffMODKokkos::v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const +{ + F_FLOAT v[6]; + + v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]); + v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]); + v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]); + v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]); + v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]); + v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]); + + if (vflag_global) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } + + if (vflag_atom) { + d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2]; + d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int PairTersoffMODKokkos::sbmask(const int& j) const { + return j >> SBBITS & 3; +} + +template class PairTersoffMODKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairTersoffMODKokkos; +#endif diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.h b/src/KOKKOS/pair_tersoff_mod_kokkos.h new file mode 100755 index 0000000000..e6b66a4b22 --- /dev/null +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.h @@ -0,0 +1,220 @@ +/* -*- c++ -*- ---------------------------------------------------------- + + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/mod/kk,PairTersoffMODKokkos) +PairStyle(tersoff/mod/kk/device,PairTersoffMODKokkos) +PairStyle(tersoff/mod/kk/host,PairTersoffMODKokkos) + +#else + +#ifndef LMP_PAIR_TERSOFF_MOD_KOKKOS_H +#define LMP_PAIR_TERSOFF_MOD_KOKKOS_H + +#include "stdio.h" +#include "pair_kokkos.h" +#include "pair_tersoff_mod.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +struct TagPairTersoffMODComputeHalf{}; + +template +struct TagPairTersoffMODComputeFullA{}; + +template +struct TagPairTersoffMODComputeFullB{}; + +template +class PairTersoffMODKokkos : public PairTersoffMOD { + public: + enum {EnabledNeighFlags=FULL}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairTersoffMODKokkos(class LAMMPS *); + virtual ~PairTersoffMODKokkos(); + virtual void compute(int, int); + void init_style(); + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffMODComputeHalf, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffMODComputeHalf, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffMODComputeFullA, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffMODComputeFullA, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffMODComputeFullB, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffMODComputeFullB, const int&) const; + + KOKKOS_INLINE_FUNCTION + double ters_fc_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + + KOKKOS_INLINE_FUNCTION + double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + + KOKKOS_INLINE_FUNCTION + double bondorder(const int &i, const int &j, const int &k, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const; + + KOKKOS_INLINE_FUNCTION + double ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + + KOKKOS_INLINE_FUNCTION + double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthbj(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fj, F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthbk(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + double vec3_dot(const F_FLOAT x[3], const double y[3]) const { + return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_add(const F_FLOAT x[3], const double y[3], double * const z) const { + z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_scale(const F_FLOAT k, const double x[3], double y[3]) const { + y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_scaleadd(const F_FLOAT k, const double x[3], const double y[3], double * const z) const { + z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2]; + } + + KOKKOS_INLINE_FUNCTION + int sbmask(const int& j) const; + + struct params_ters{ + params_ters(){powerm=0;lam3=0;h=0;powern=0;beta=0;lam2=0;bigb=0;bigr=0;bigd=0; + lam1=0;biga=0;powern_del=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;c5=0;ca1=0;ca4=0;}; + params_ters(int i){powerm=0;lam3=0;h=0;powern=0;beta=0;lam2=0;bigb=0;bigr=0;bigd=0; + lam1=0;biga=0;powern_del=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;c5=0;ca1=0;ca4=0;}; + F_FLOAT powerm, lam3, h, powern, beta, lam2, bigb, bigr, bigd, + lam1, biga, powern_del, cutsq, c1, c2, c3, c4, c5, ca1, ca4; + }; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + template + KOKKOS_INLINE_FUNCTION + void v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const; + + KOKKOS_INLINE_FUNCTION + void v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const; + + void allocate(); + void setup(); + + protected: + void cleanup_copy(); + + typedef Kokkos::DualView tdual_int_3d; + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const paramskk; + // hardwired to space for 15 atom types + //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_tagint_1d tag; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; + + typedef Kokkos::DualView tdual_ffloat_2d_n7; + typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; + typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7; + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d_randomread d_ilist; + typename ArrayTypes::t_int_1d_randomread d_numneigh; + //NeighListKokkos k_list; + + class AtomKokkos *atomKK; + int neighflag,newton_pair; + int nlocal,nall,eflag,vflag; + + friend void pair_virial_fdotr_compute(PairTersoffMODKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp new file mode 100755 index 0000000000..12e69e6802 --- /dev/null +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -0,0 +1,1302 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Ray Shan (SNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_tersoff_zbl_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "neigh_list_kokkos.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + +/* ---------------------------------------------------------------------- */ + +template +PairTersoffZBLKokkos::PairTersoffZBLKokkos(LAMMPS *lmp) : PairTersoffZBL(lmp) +{ + THIRD = 1.0/3.0; + + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + + if (strcmp(update->unit_style,"metal") == 0) { + global_a_0 = 0.529; + global_epsilon_0 = 0.00552635; + global_e = 1.0; + } else if (strcmp(update->unit_style,"real") == 0) { + global_a_0 = 0.529; + global_epsilon_0 = 0.00552635 * 0.043365121; + global_e = 1.0; + } else error->all(FLERR,"Pair tersoff/zbl/kk requires metal or real units"); + +} + +/* ---------------------------------------------------------------------- */ + +template +PairTersoffZBLKokkos::~PairTersoffZBLKokkos() +{ + if (!copymode) { + memory->destroy_kokkos(k_eatom,eatom); + memory->destroy_kokkos(k_vatom,vatom); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffZBLKokkos::allocate() +{ + PairTersoffZBL::allocate(); + + int n = atom->ntypes; + + k_params = Kokkos::DualView + ("PairTersoffZBL::paramskk",n+1,n+1,n+1); + paramskk = k_params.d_view; +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template +void PairTersoffZBLKokkos::init_style() +{ + PairTersoffZBL::init_style(); + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same::value && + !Kokkos::Impl::is_same::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same::value; + + if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full_cluster = 0; + if (neighflag == FULL) + neighbor->requests[irequest]->ghost = 1; + else + neighbor->requests[irequest]->ghost = 0; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with tersoff/zbl/kk"); + } +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffZBLKokkos::setup() +{ + PairTersoffZBL::setup(); + + int i,j,k,m; + int n = atom->ntypes; + + for (i = 1; i <= n; i++) + for (j = 1; j <= n; j++) + for (k = 1; k <= n; k++) { + m = elem2param[i-1][j-1][k-1]; + k_params.h_view(i,j,k).powerm = params[m].powerm; + k_params.h_view(i,j,k).gamma = params[m].gamma; + k_params.h_view(i,j,k).lam3 = params[m].lam3; + k_params.h_view(i,j,k).c = params[m].c; + k_params.h_view(i,j,k).d = params[m].d; + k_params.h_view(i,j,k).h = params[m].h; + k_params.h_view(i,j,k).powern = params[m].powern; + k_params.h_view(i,j,k).beta = params[m].beta; + k_params.h_view(i,j,k).lam2 = params[m].lam2; + k_params.h_view(i,j,k).bigb = params[m].bigb; + k_params.h_view(i,j,k).bigr = params[m].bigr; + k_params.h_view(i,j,k).bigd = params[m].bigd; + k_params.h_view(i,j,k).lam1 = params[m].lam1; + k_params.h_view(i,j,k).biga = params[m].biga; + k_params.h_view(i,j,k).cutsq = params[m].cutsq; + k_params.h_view(i,j,k).c1 = params[m].c1; + k_params.h_view(i,j,k).c2 = params[m].c2; + k_params.h_view(i,j,k).c3 = params[m].c3; + k_params.h_view(i,j,k).c4 = params[m].c4; + k_params.h_view(i,j,k).Z_i = params[m].Z_i; + k_params.h_view(i,j,k).Z_j = params[m].Z_j; + k_params.h_view(i,j,k).ZBLcut = params[m].ZBLcut; + k_params.h_view(i,j,k).ZBLexpscale = params[m].ZBLexpscale; + } + + k_params.template modify(); + +} + +/* ---------------------------------------------------------------------- */ + +template +void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memory->destroy_kokkos(k_eatom,eatom); + memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.d_view; + } + if (vflag_atom) { + memory->destroy_kokkos(k_vatom,vatom); + memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.d_view; + } + + atomKK->sync(execution_space,datamask_read); + k_params.template sync(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view(); + f = atomKK->k_f.view(); + type = atomKK->k_type.view(); + tag = atomKK->k_tag.view(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + + const int inum = list->inum; + const int ignum = inum + list->gnum; + NeighListKokkos* k_list = static_cast*>(list); + d_numneigh = k_list->d_numneigh; + d_neighbors = k_list->d_neighbors; + d_ilist = k_list->d_ilist; + + k_list->clean_copy(); + copymode = 1; + + EV_FLOAT ev; + EV_FLOAT ev_all; + + if (neighflag == HALF) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + } else if (neighflag == HALFTHREAD) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + } else if (neighflag == FULL) { + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + DeviceType::fence(); + ev_all += ev; + + if (evflag) + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); + else + Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + DeviceType::fence(); + ev_all += ev; + } + + if (eflag_global) eng_vdwl += ev_all.evdwl; + if (vflag_global) { + virial[0] += ev_all.v[0]; + virial[1] += ev_all.v[1]; + virial[2] += ev_all.v[2]; + virial[3] += ev_all.v[3]; + virial[4] += ev_all.v[4]; + virial[5] += ev_all.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify(); + k_eatom.template sync(); + } + + if (vflag_atom) { + k_vatom.template modify(); + k_vatom.template sync(); + } + + copymode = 0; +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf, const int &ii, EV_FLOAT& ev) const { + + // The f array is atomic for Half/Thread neighbor style + Kokkos::View::value> > a_f = f; + + const int i = d_ilist[ii]; + if (i >= nlocal) return; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + const int itag = tag(i); + + int j,k,jj,kk,jtag,jtype,ktype; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fi[3], fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + // repulsive + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + jtag = tag(j); + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x(j,2) < ztmp) continue; + if (x(j,2) == ztmp && x(j,1) < ytmp) continue; + if (x(j,2) == ztmp && x(j,1) == ytmp && x(j,0) < xtmp) continue; + } + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq; + + if (rsq > cutsq) continue; + + // Tersoff repulsive portion + + const F_FLOAT r = sqrt(rsq); + const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); + const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); + const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); + const F_FLOAT frep_t = paramskk(itype,jtype,jtype).biga * tmp_exp * + (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1); + const F_FLOAT eng_t = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + + // ZBL repulsive portion + + const F_FLOAT esq = pow(global_e,2.0); + const F_FLOAT a_ij = (0.8854*global_a_0) / + (pow(paramskk(itype,jtype,jtype).Z_i,0.23) + pow(paramskk(itype,jtype,jtype).Z_j,0.23)); + const F_FLOAT premult = (paramskk(itype,jtype,jtype).Z_i * paramskk(itype,jtype,jtype).Z_j * esq)/ + (4.0*MY_PI*global_epsilon_0); + const F_FLOAT r_ov_a = r/a_ij; + const F_FLOAT phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + + 0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a); + const F_FLOAT dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) - + 0.9423*0.5099*exp(-0.9423*r_ov_a) - + 0.4029*0.2802*exp(-0.4029*r_ov_a) - + 0.2016*0.02817*exp(-0.2016*r_ov_a)); + const F_FLOAT frep_z = premult*-phi/rsq + premult*dphi/r; + const F_FLOAT eng_z = premult*(1.0/r)*phi; + + // combine two parts with smoothing by Fermi-like function + + F_FLOAT frep, eng; + frep = -(-fermi_d_k(itype,jtype,jtype,r) * eng_z + + (1.0 - fermi_k(itype,jtype,jtype,r))*frep_z + + fermi_d_k(itype,jtype,jtype,r)*eng_t + fermi_k(itype,jtype,jtype,r)*frep_t) / r; + + if (eflag) + eng = (1.0 - fermi_k(itype,jtype,jtype,r)) * eng_z + + fermi_k(itype,jtype,jtype,r) * eng_t; + + a_f(i,0) += delx*frep; + a_f(i,1) += dely*frep; + a_f(i,2) += delz*frep; + a_f(j,0) -= delx*frep; + a_f(j,1) -= dely*frep; + a_f(j,2) -= delz*frep; + + if (EVFLAG) { + if (eflag) ev.evdwl += eng; + if (vflag_either || eflag_atom) this->template ev_tally(ev,i,j,eng,frep,delx,dely,delz); + } + } + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + delx1 = xtmp - x(j,0); + dely1 = ytmp - x(j,1); + delz1 = ztmp - x(j,2); + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(itype,jtype,jtype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); + const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); + const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + a_f(i,0) += delx1*fatt; + a_f(i,1) += dely1*fatt; + a_f(i,2) += delz1*fatt; + a_f(j,0) -= delx1*fatt; + a_f(j,1) -= dely1*fatt; + a_f(j,2) -= delz1*fatt; + + if (EVFLAG) { + if (eflag) ev.evdwl += eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fi,fj,fk); + + a_f(i,0) += fi[0]; + a_f(i,1) += fi[1]; + a_f(i,2) += fi[2]; + a_f(j,0) += fj[0]; + a_f(j,1) += fj[1]; + a_f(j,2) += fj[2]; + a_f(k,0) += fk[0]; + a_f(k,1) += fk[1]; + a_f(k,2) += fk[2]; + + if (vflag_atom) { + F_FLOAT delrij[3], delrik[3]; + delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; + delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; + if (vflag_either) this->template v_tally3(ev,i,j,k,fj,fk,delrij,delrik); + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffZBLComputeHalf(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullA, const int &ii, EV_FLOAT& ev) const { + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + int j,k,jj,kk,jtype,ktype; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fi[3], fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); + const int jnum = d_numneigh[i]; + + // repulsive + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + const int jtype = type(j); + + const X_FLOAT delx = xtmp - x(j,0); + const X_FLOAT dely = ytmp - x(j,1); + const X_FLOAT delz = ztmp - x(j,2); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq; + + if (rsq > cutsq) continue; + + // Tersoff repulsive portion + + const F_FLOAT r = sqrt(rsq); + const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); + const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); + const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); + const F_FLOAT frep_t = paramskk(itype,jtype,jtype).biga * tmp_exp * + (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1); + const F_FLOAT eng_t = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + + // ZBL repulsive portion + + const F_FLOAT esq = pow(global_e,2.0); + const F_FLOAT a_ij = (0.8854*global_a_0) / + (pow(paramskk(itype,jtype,jtype).Z_i,0.23) + pow(paramskk(itype,jtype,jtype).Z_j,0.23)); + const F_FLOAT premult = (paramskk(itype,jtype,jtype).Z_i * paramskk(itype,jtype,jtype).Z_j * esq)/ + (4.0*MY_PI*global_epsilon_0); + const F_FLOAT r_ov_a = r/a_ij; + const F_FLOAT phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + + 0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a); + const F_FLOAT dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) - + 0.9423*0.5099*exp(-0.9423*r_ov_a) - + 0.4029*0.2802*exp(-0.4029*r_ov_a) - + 0.2016*0.02817*exp(-0.2016*r_ov_a)); + const F_FLOAT frep_z = premult*-phi/rsq + premult*dphi/r; + const F_FLOAT eng_z = premult*(1.0/r)*phi; + + // combine two parts with smoothing by Fermi-like function + + F_FLOAT frep, eng; + frep = -(-fermi_d_k(itype,jtype,jtype,r) * eng_z + + (1.0 - fermi_k(itype,jtype,jtype,r))*frep_z + + fermi_d_k(itype,jtype,jtype,r)*eng_t + fermi_k(itype,jtype,jtype,r)*frep_t) / r; + + if (eflag) + eng = (1.0 - fermi_k(itype,jtype,jtype,r)) * eng_z + + fermi_k(itype,jtype,jtype,r) * eng_t; + + f(i,0) += delx*frep; + f(i,1) += dely*frep; + f(i,2) += delz*frep; + + if (EVFLAG) { + if (eflag) + ev.evdwl += 0.5*eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,frep,delx,dely,delz); + } + } + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + jtype = type(j); + + delx1 = xtmp - x(j,0); + dely1 = ytmp - x(j,1); + delz1 = ztmp - x(j,2); + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(itype,jtype,jtype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); + const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); + const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + f(i,0) += delx1*fatt; + f(i,1) += dely1*fatt; + f(i,2) += delz1*fatt; + + if (EVFLAG) { + if (eflag) ev.evdwl += 0.5*eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = d_neighbors(i,kk); + k &= NEIGHMASK; + ktype = type(k); + + delx2 = xtmp - x(k,0); + dely2 = ytmp - x(k,1); + delz2 = ztmp - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(itype,jtype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fi,fj,fk); + + f(i,0) += fi[0]; + f(i,1) += fi[1]; + f(i,2) += fi[2]; + + if (vflag_atom) { + F_FLOAT delrij[3], delrik[3]; + delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; + delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; + if (vflag_either) this->template v_tally3(ev,i,j,k,fj,fk,delrij,delrik); + } + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullA, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffZBLComputeFullA(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullB, const int &ii, EV_FLOAT& ev) const { + + const int i = d_ilist[ii]; + const X_FLOAT xtmp = x(i,0); + const X_FLOAT ytmp = x(i,1); + const X_FLOAT ztmp = x(i,2); + const int itype = type(i); + + int j,k,jj,kk,jtype,ktype,j_jnum; + F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; + F_FLOAT fj[3], fk[3]; + X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; + + const int jnum = d_numneigh[i]; + + // attractive: bond order + + for (jj = 0; jj < jnum; jj++) { + j = d_neighbors(i,jj); + j &= NEIGHMASK; + if (j >= nlocal) continue; + jtype = type(j); + + delx1 = x(j,0) - xtmp; + dely1 = x(j,1) - ytmp; + delz1 = x(j,2) - ztmp; + rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; + cutsq1 = paramskk(jtype,itype,itype).cutsq; + + bo_ij = 0.0; + if (rsq1 > cutsq1) continue; + rij = sqrt(rsq1); + + j_jnum = d_numneigh[j]; + + for (kk = 0; kk < j_jnum; kk++) { + k = d_neighbors(j,kk); + if (k == i) continue; + k &= NEIGHMASK; + ktype = type(k); + + delx2 = x(j,0) - x(k,0); + dely2 = x(j,1) - x(k,1); + delz2 = x(j,2) - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(jtype,itype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + + } + + // attractive: pairwise potential and force + + const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij); + const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij); + const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij); + const F_FLOAT fatt = -0.5*bij * dfa / rij; + const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij); + const F_FLOAT eng = 0.5*bij * fa; + + f(i,0) -= delx1*fatt; + f(i,1) -= dely1*fatt; + f(i,2) -= delz1*fatt; + + if (EVFLAG) { + if (eflag) + ev.evdwl += 0.5 * eng; + if (vflag_either || eflag_atom) + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + } + + // attractive: three-body force + + for (kk = 0; kk < j_jnum; kk++) { + k = d_neighbors(j,kk); + if (k == i) continue; + k &= NEIGHMASK; + ktype = type(k); + + delx2 = x(j,0) - x(k,0); + dely2 = x(j,1) - x(k,1); + delz2 = x(j,2) - x(k,2); + rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; + cutsq2 = paramskk(jtype,itype,ktype).cutsq; + + if (rsq2 > cutsq2) continue; + rik = sqrt(rsq2); + ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1, + rik,delx2,dely2,delz2,fj,fk); + f(i,0) += fj[0]; + f(i,1) += fj[1]; + f(i,2) += fj[2]; + + if (vflag_atom) { + F_FLOAT delrji[3], delrjk[3]; + delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1; + delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2; + if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk); + } + + const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik); + const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij); + ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2, + rij,delx1,dely1,delz1,fk); + f(i,0) += fk[0]; + f(i,1) += fk[1]; + f(i,2) += fk[2]; + } + } +} + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullB, const int &ii) const { + EV_FLOAT ev; + this->template operator()(TagPairTersoffZBLComputeFullB(), ii, ev); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::ters_fc_k(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + const F_FLOAT ters_R = paramskk(i,j,k).bigr; + const F_FLOAT ters_D = paramskk(i,j,k).bigd; + + if (r < ters_R-ters_D) return 1.0; + if (r > ters_R+ters_D) return 0.0; + return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::ters_dfc(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + const F_FLOAT ters_R = paramskk(i,j,k).bigr; + const F_FLOAT ters_D = paramskk(i,j,k).bigd; + + if (r < ters_R-ters_D) return 0.0; + if (r > ters_R+ters_D) return 0.0; + return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::bondorder(const int &i, const int &j, const int &k, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const +{ + F_FLOAT arg, ex_delr; + + const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik); + + if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else arg = paramskk(i,j,k).lam3 * (rij-rik); + + if (arg > 69.0776) ex_delr = 1.e30; + else if (arg < -69.0776) ex_delr = 0.0; + else ex_delr = exp(arg); + + return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos:: + ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const +{ + const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c; + const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d; + const F_FLOAT hcth = paramskk(i,j,k).h - cos; + + return paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos:: + ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const +{ + + const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c; + const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d; + const F_FLOAT hcth = paramskk(i,j,k).h - cos; + const F_FLOAT numerator = -2.0 * ters_c * hcth; + const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth); + return paramskk(i,j,k).gamma * numerator * denominator * denominator; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::ters_fa_k(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; + return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) + * ters_fc_k(i,j,k,r) * fermi_k(i,j,k,r); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::ters_dfa(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; + return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) * + (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) * fermi_k(i,j,k,r) - + ters_dfc(i,j,k,r) * fermi_k(i,j,k,r) - ters_fc_k(i,j,k,r) * + fermi_d_k(i,j,k,r)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::ters_bij_k(const int &i, const int &j, + const int &k, const F_FLOAT &bo) const +{ + const F_FLOAT tmp = paramskk(i,j,k).beta * bo; + if (tmp > paramskk(i,j,k).c1) return 1.0/sqrt(tmp); + if (tmp > paramskk(i,j,k).c2) + return (1.0 - pow(tmp,-paramskk(i,j,k).powern) / (2.0*paramskk(i,j,k).powern))/sqrt(tmp); + if (tmp < paramskk(i,j,k).c4) return 1.0; + if (tmp < paramskk(i,j,k).c3) + return 1.0 - pow(tmp,paramskk(i,j,k).powern)/(2.0*paramskk(i,j,k).powern); + return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern)); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::ters_dbij(const int &i, const int &j, + const int &k, const F_FLOAT &bo) const +{ + const F_FLOAT tmp = paramskk(i,j,k).beta * bo; + if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5*pow(tmp,-1.5); + if (tmp > paramskk(i,j,k).c2) + return paramskk(i,j,k).beta * (-0.5*pow(tmp,-1.5) * + (1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * + pow(tmp,-paramskk(i,j,k).powern))); + if (tmp < paramskk(i,j,k).c4) return 0.0; + if (tmp < paramskk(i,j,k).c3) + return -0.5*paramskk(i,j,k).beta * pow(tmp,paramskk(i,j,k).powern-1.0); + + const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern); + return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern)))*tmp_n / bo; +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::ters_dthb( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const +{ + // from PairTersoffZBL::attractive + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + //rij = sqrt(rsq1); + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + //rik = sqrt(rsq2); + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + // from PairTersoffZBL::ters_zetaterm_d + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + // from PairTersoffZBL::costheta_d + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(-dfc*gijk*ex_delr,rik_hat,fi); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfi,fi,fi); + vec3_scaleadd(fc*gijk*dex_delr,rik_hat,fi,fi); + vec3_scaleadd(-fc*gijk*dex_delr,rij_hat,fi,fi); + vec3_scale(prefactor,fi,fi); + + vec3_scale(fc*dgijk*ex_delr,dcosfj,fj); + vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj); + vec3_scale(prefactor,fj,fj); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::ters_dthbj( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fj, F_FLOAT *fk) const +{ + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(fc*dgijk*ex_delr,dcosfj,fj); + vec3_scaleadd(fc*gijk*dex_delr,rij_hat,fj,fj); + vec3_scale(prefactor,fj,fj); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::ters_dthbk( + const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fk) const +{ + F_FLOAT rij_hat[3],rik_hat[3]; + F_FLOAT rijinv,rikinv; + F_FLOAT delrij[3], delrik[3]; + + delrij[0] = dx1; delrij[1] = dy1; delrij[2] = dz1; + delrik[0] = dx2; delrik[1] = dy2; delrik[2] = dz2; + + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; + F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; + + fc = ters_fc_k(i,j,k,rik); + dfc = ters_dfc(i,j,k,rik); + if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); + else tmp = paramskk(i,j,k).lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (int(paramskk(i,j,k).powerm) == 3) + dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + + cos = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(i,j,k,cos); + dgijk = ters_dgijk(i,j,k,cos); + + vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); + vec3_scale(rijinv,dcosfj,dcosfj); + vec3_scaleadd(-cos,rik_hat,rij_hat,dcosfk); + vec3_scale(rikinv,dcosfk,dcosfk); + vec3_add(dcosfj,dcosfk,dcosfi); + vec3_scale(-1.0,dcosfi,dcosfi); + + vec3_scale(dfc*gijk*ex_delr,rik_hat,fk); + vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); + vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); + vec3_scale(prefactor,fk,fk); + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::fermi_k(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + return 1.0 / (1.0 + exp(-paramskk(i,j,k).ZBLexpscale * + (r - paramskk(i,j,k).ZBLcut))); +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +double PairTersoffZBLKokkos::fermi_d_k(const int &i, const int &j, + const int &k, const F_FLOAT &r) const +{ + return paramskk(i,j,k).ZBLexpscale * exp(-paramskk(i,j,k).ZBLexpscale * + (r - paramskk(i,j,k).ZBLcut)) / + pow(1.0 + exp(-paramskk(i,j,k).ZBLexpscale * + (r - paramskk(i,j,k).ZBLcut)),2.0); +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const +{ + const int VFLAG = vflag_either; + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_eatom = k_eatom.view(); + Kokkos::View::value> > v_vatom = k_vatom.view(); + + if (eflag_atom) { + const E_FLOAT epairhalf = 0.5 * epair; + v_eatom[i] += epairhalf; + if (NEIGHFLAG != FULL) v_eatom[j] += epairhalf; + } + + if (VFLAG) { + const E_FLOAT v0 = delx*delx*fpair; + const E_FLOAT v1 = dely*dely*fpair; + const E_FLOAT v2 = delz*delz*fpair; + const E_FLOAT v3 = delx*dely*fpair; + const E_FLOAT v4 = delx*delz*fpair; + const E_FLOAT v5 = dely*delz*fpair; + + if (vflag_global) { + if (NEIGHFLAG != FULL) { + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; + } else { + ev.v[0] += 0.5*v0; + ev.v[1] += 0.5*v1; + ev.v[2] += 0.5*v2; + ev.v[3] += 0.5*v3; + ev.v[4] += 0.5*v4; + ev.v[5] += 0.5*v5; + } + } + + if (vflag_atom) { + v_vatom(i,0) += 0.5*v0; + v_vatom(i,1) += 0.5*v1; + v_vatom(i,2) += 0.5*v2; + v_vatom(i,3) += 0.5*v3; + v_vatom(i,4) += 0.5*v4; + v_vatom(i,5) += 0.5*v5; + + if (NEIGHFLAG != FULL) { + v_vatom(j,0) += 0.5*v0; + v_vatom(j,1) += 0.5*v1; + v_vatom(j,2) += 0.5*v2; + v_vatom(j,3) += 0.5*v3; + v_vatom(j,4) += 0.5*v4; + v_vatom(j,5) += 0.5*v5; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const +{ + + // The eatom and vatom arrays are atomic for Half/Thread neighbor style + Kokkos::View::value> > v_vatom = k_vatom.view(); + + F_FLOAT v[6]; + + v[0] = THIRD * (drij[0]*fj[0] + drik[0]*fk[0]); + v[1] = THIRD * (drij[1]*fj[1] + drik[1]*fk[1]); + v[2] = THIRD * (drij[2]*fj[2] + drik[2]*fk[2]); + v[3] = THIRD * (drij[0]*fj[1] + drik[0]*fk[1]); + v[4] = THIRD * (drij[0]*fj[2] + drik[0]*fk[2]); + v[5] = THIRD * (drij[1]*fj[2] + drik[1]*fk[2]); + + if (vflag_global) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } + + if (vflag_atom) { + v_vatom(i,0) += v[0]; v_vatom(i,1) += v[1]; v_vatom(i,2) += v[2]; + v_vatom(i,3) += v[3]; v_vatom(i,4) += v[4]; v_vatom(i,5) += v[5]; + if (NEIGHFLAG != FULL) { + v_vatom(j,0) += v[0]; v_vatom(j,1) += v[1]; v_vatom(j,2) += v[2]; + v_vatom(j,3) += v[3]; v_vatom(j,4) += v[4]; v_vatom(j,5) += v[5]; + v_vatom(k,0) += v[0]; v_vatom(k,1) += v[1]; v_vatom(k,2) += v[2]; + v_vatom(k,3) += v[3]; v_vatom(k,4) += v[4]; v_vatom(k,5) += v[5]; + } + } + +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +void PairTersoffZBLKokkos::v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const +{ + F_FLOAT v[6]; + + v[0] = THIRD * (drji[0]*fj[0] + drjk[0]*fk[0]); + v[1] = THIRD * (drji[1]*fj[1] + drjk[1]*fk[1]); + v[2] = THIRD * (drji[2]*fj[2] + drjk[2]*fk[2]); + v[3] = THIRD * (drji[0]*fj[1] + drjk[0]*fk[1]); + v[4] = THIRD * (drji[0]*fj[2] + drjk[0]*fk[2]); + v[5] = THIRD * (drji[1]*fj[2] + drjk[1]*fk[2]); + + if (vflag_global) { + ev.v[0] += v[0]; + ev.v[1] += v[1]; + ev.v[2] += v[2]; + ev.v[3] += v[3]; + ev.v[4] += v[4]; + ev.v[5] += v[5]; + } + + if (vflag_atom) { + d_vatom(i,0) += v[0]; d_vatom(i,1) += v[1]; d_vatom(i,2) += v[2]; + d_vatom(i,3) += v[3]; d_vatom(i,4) += v[4]; d_vatom(i,5) += v[5]; + } +} + +/* ---------------------------------------------------------------------- */ + +template +KOKKOS_INLINE_FUNCTION +int PairTersoffZBLKokkos::sbmask(const int& j) const { + return j >> SBBITS & 3; +} + +template class PairTersoffZBLKokkos; +#ifdef KOKKOS_HAVE_CUDA +template class PairTersoffZBLKokkos; +#endif diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.h b/src/KOKKOS/pair_tersoff_zbl_kokkos.h new file mode 100755 index 0000000000..7bc88759c7 --- /dev/null +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.h @@ -0,0 +1,231 @@ +/* -*- c++ -*- ---------------------------------------------------------- + + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/zbl/kk,PairTersoffZBLKokkos) +PairStyle(tersoff/zbl/kk/device,PairTersoffZBLKokkos) +PairStyle(tersoff/zbl/kk/host,PairTersoffZBLKokkos) + +#else + +#ifndef LMP_PAIR_TERSOFF_ZBL_KOKKOS_H +#define LMP_PAIR_TERSOFF_ZBL_KOKKOS_H + +#include "stdio.h" +#include "pair_kokkos.h" +#include "pair_tersoff_zbl.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template +struct TagPairTersoffZBLComputeHalf{}; + +template +struct TagPairTersoffZBLComputeFullA{}; + +template +struct TagPairTersoffZBLComputeFullB{}; + +template +class PairTersoffZBLKokkos : public PairTersoffZBL { + public: + enum {EnabledNeighFlags=FULL}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes AT; + typedef EV_FLOAT value_type; + + PairTersoffZBLKokkos(class LAMMPS *); + virtual ~PairTersoffZBLKokkos(); + virtual void compute(int, int); + void init_style(); + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffZBLComputeHalf, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffZBLComputeHalf, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffZBLComputeFullA, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffZBLComputeFullA, const int&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffZBLComputeFullB, const int&, EV_FLOAT&) const; + + template + KOKKOS_INLINE_FUNCTION + void operator()(TagPairTersoffZBLComputeFullB, const int&) const; + + KOKKOS_INLINE_FUNCTION + double ters_fc_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + + KOKKOS_INLINE_FUNCTION + double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + + KOKKOS_INLINE_FUNCTION + double bondorder(const int &i, const int &j, const int &k, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const; + + KOKKOS_INLINE_FUNCTION + double ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + + KOKKOS_INLINE_FUNCTION + double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthbj(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fj, F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthbk(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + double vec3_dot(const F_FLOAT x[3], const double y[3]) const { + return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_add(const F_FLOAT x[3], const double y[3], double * const z) const { + z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_scale(const F_FLOAT k, const double x[3], double y[3]) const { + y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; + } + + KOKKOS_INLINE_FUNCTION + void vec3_scaleadd(const F_FLOAT k, const double x[3], const double y[3], double * const z) const { + z[0] = k*x[0]+y[0]; z[1] = k*x[1]+y[1]; z[2] = k*x[2]+y[2]; + } + + KOKKOS_INLINE_FUNCTION + int sbmask(const int& j) const; + + struct params_ters{ + params_ters(){powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0; + bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;Z_i=0;Z_j=0;ZBLcut=0;ZBLexpscale=0;}; + params_ters(int i){powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0; + bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;Z_i=0;Z_j=0;ZBLcut=0;ZBLexpscale=0;}; + F_FLOAT powerm, gamma, lam3, c, d, h, powern, beta, lam2, bigb, bigr, + bigd, lam1, biga, cutsq, c1, c2, c3, c4, Z_i, Z_j, ZBLcut, ZBLexpscale; + }; + + template + KOKKOS_INLINE_FUNCTION + void ev_tally(EV_FLOAT &ev, const int &i, const int &j, + const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, + const F_FLOAT &dely, const F_FLOAT &delz) const; + + template + KOKKOS_INLINE_FUNCTION + void v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const; + + KOKKOS_INLINE_FUNCTION + void v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const; + + void allocate(); + void setup(); + + KOKKOS_INLINE_FUNCTION + double fermi_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + KOKKOS_INLINE_FUNCTION + double fermi_d_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + + protected: + void cleanup_copy(); + + typedef Kokkos::DualView tdual_int_3d; + Kokkos::DualView k_params; + typename Kokkos::DualView::t_dev_const paramskk; + // hardwired to space for 15 atom types + //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + + typename AT::t_x_array_randomread x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + typename AT::t_tagint_1d tag; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + DAT::t_efloat_1d d_eatom; + DAT::t_virial_array d_vatom; + + typedef Kokkos::DualView tdual_ffloat_2d_n7; + typedef typename tdual_ffloat_2d_n7::t_dev_const_randomread t_ffloat_2d_n7_randomread; + typedef typename tdual_ffloat_2d_n7::t_host t_host_ffloat_2d_n7; + + typename ArrayTypes::t_neighbors_2d d_neighbors; + typename ArrayTypes::t_int_1d_randomread d_ilist; + typename ArrayTypes::t_int_1d_randomread d_numneigh; + //NeighListKokkos k_list; + + class AtomKokkos *atomKK; + int neighflag,newton_pair; + int nlocal,nall,eflag,vflag; + + // ZBL + F_FLOAT global_a_0; // Bohr radius for Coulomb repulsion + F_FLOAT global_epsilon_0; // permittivity of vacuum for Coulomb repulsion + F_FLOAT global_e; // proton charge (negative of electron charge) + + friend void pair_virial_fdotr_compute(PairTersoffZBLKokkos*); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/