From 44f67330e238b6ce0418d5c378b0f6b116eadc98 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 13 Apr 2022 13:50:49 -0600 Subject: [PATCH] Port changes to other flavors of Tersoff --- src/KOKKOS/pair_tersoff_kokkos.cpp | 9 +- src/KOKKOS/pair_tersoff_mod_kokkos.cpp | 697 +++++++--------------- src/KOKKOS/pair_tersoff_mod_kokkos.h | 113 ++-- src/KOKKOS/pair_tersoff_zbl_kokkos.cpp | 771 +++++++------------------ src/KOKKOS/pair_tersoff_zbl_kokkos.h | 118 ++-- 5 files changed, 499 insertions(+), 1209 deletions(-) diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index ee1803de06..95800c54f7 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -686,7 +686,7 @@ double PairTersoffKokkos::ters_dbij(const Param& param, const F_FLOA return param.beta * (factor * // error in negligible 2nd term fixed 2/21/2022 // (1.0 - 0.5*(1.0 + 1.0/(2.0*param.powern)) * - (1.0 - (1.0 + 1.0/(2.0*param.powern)) * + (1.0 - (1.0 + 1.0/(2.0*param.powern)) * pow(tmp,-param.powern))); if (tmp < param.c4) return 0.0; if (tmp < param.c3) @@ -936,8 +936,6 @@ void PairTersoffKokkos::ev_tally(EV_FLOAT &ev, const int &i, const i const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - const int VFLAG = vflag_either; - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); @@ -952,7 +950,7 @@ void PairTersoffKokkos::ev_tally(EV_FLOAT &ev, const int &i, const i a_eatom[j] += epairhalf; } - if (VFLAG) { + if (vflag_either) { const E_FLOAT v0 = delx*delx*fpair; const E_FLOAT v1 = dely*dely*fpair; const E_FLOAT v2 = delz*delz*fpair; @@ -1043,7 +1041,8 @@ void PairTersoffKokkos::v_tally3(EV_FLOAT &ev, template KOKKOS_INLINE_FUNCTION void PairTersoffKokkos::v_tally3_atom(EV_FLOAT &ev, const int &i, const int & /*j*/, - const int & /*k*/, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const + const int & /*k*/, F_FLOAT *fj, F_FLOAT *fk, + F_FLOAT *drji, F_FLOAT *drjk) const { F_FLOAT v[6]; diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index 2f354a93a9..2609fdaa02 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -65,18 +65,29 @@ PairTersoffMODKokkos::~PairTersoffMODKokkos() } } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ template -void PairTersoffMODKokkos::allocate() +void PairTersoffMODKokkos::coeff(int narg, char **arg) { - PairTersoffMOD::allocate(); + PairTersoffMOD::coeff(narg,arg); + + // sync map int n = atom->ntypes; - k_params = Kokkos::DualView - ("PairTersoffMOD::paramskk",n+1,n+1,n+1); - paramskk = k_params.template view(); + DAT::tdual_int_1d k_map = DAT::tdual_int_1d("pair:map",n+1); + HAT::t_int_1d h_map = k_map.h_view; + + for (int i = 1; i <= n; i++) + h_map[i] = map[i]; + + k_map.template modify(); + k_map.template sync(); + + d_map = k_map.template view(); } /* ---------------------------------------------------------------------- @@ -95,8 +106,11 @@ void PairTersoffMODKokkos::init_style() request->set_kokkos_host(std::is_same::value && !std::is_same::value); request->set_kokkos_device(std::is_same::value); + // always request a full neighbor list + request->enable_full(); + if (neighflag == FULL) - error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/mod/kk"); + error->all(FLERR,"Must use half neighbor list style with pair tersoff/kk"); } /* ---------------------------------------------------------------------- */ @@ -106,37 +120,29 @@ void PairTersoffMODKokkos::setup_params() { PairTersoffMOD::setup_params(); - int i,j,k,m; - int n = atom->ntypes; + // sync elem3param and params - for (i = 1; i <= n; i++) - for (j = 1; j <= n; j++) - for (k = 1; k <= n; k++) { - m = elem3param[map[i]][map[j]][map[k]]; - k_params.h_view(i,j,k).powerm = params[m].powerm; - k_params.h_view(i,j,k).lam3 = params[m].lam3; - k_params.h_view(i,j,k).h = params[m].h; - k_params.h_view(i,j,k).powern = params[m].powern; - k_params.h_view(i,j,k).beta = params[m].beta; - k_params.h_view(i,j,k).lam2 = params[m].lam2; - k_params.h_view(i,j,k).bigb = params[m].bigb; - k_params.h_view(i,j,k).bigr = params[m].bigr; - k_params.h_view(i,j,k).bigd = params[m].bigd; - k_params.h_view(i,j,k).lam1 = params[m].lam1; - k_params.h_view(i,j,k).biga = params[m].biga; - k_params.h_view(i,j,k).cutsq = params[m].cutsq; - k_params.h_view(i,j,k).c1 = params[m].c1; - k_params.h_view(i,j,k).c2 = params[m].c2; - k_params.h_view(i,j,k).c3 = params[m].c3; - k_params.h_view(i,j,k).c4 = params[m].c4; - k_params.h_view(i,j,k).c5 = params[m].c5; - k_params.h_view(i,j,k).ca1 = params[m].ca1; - k_params.h_view(i,j,k).ca4 = params[m].ca4; - k_params.h_view(i,j,k).powern_del = params[m].powern_del; - } + tdual_int_3d k_elem3param = tdual_int_3d("pair:elem3param",nelements,nelements,nelements); + t_host_int_3d h_elem3param = k_elem3param.h_view; - k_params.template modify(); + tdual_param_1d k_params = tdual_param_1d("pair:params",nparams); + t_host_param_1d h_params = k_params.h_view; + for (int i = 0; i < nelements; i++) + for (int j = 0; j < nelements; j++) + for (int k = 0; k < nelements; k++) + h_elem3param(i,j,k) = elem3param[i][j][k]; + + for (int m = 0; m < nparams; m++) + h_params[m] = params[m]; + + k_elem3param.modify_host(); + k_elem3param.template sync(); + k_params.modify_host(); + k_params.template sync(); + + d_elem3param = k_elem3param.template view(); + d_params = k_params.template view(); } /* ---------------------------------------------------------------------- */ @@ -147,8 +153,6 @@ void PairTersoffMODKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL) no_virial_fdotr_compute = 1; - ev_init(eflag,vflag,0); // reallocate per-atom arrays if necessary @@ -165,7 +169,6 @@ void PairTersoffMODKokkos::compute(int eflag_in, int vflag_in) } atomKK->sync(execution_space,datamask_read); - k_params.template sync(); if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); else atomKK->modified(execution_space,F_MASK); @@ -204,37 +207,25 @@ void PairTersoffMODKokkos::compute(int eflag_in, int vflag_in) int max_neighs = d_neighbors.extent(1); - if (((int)d_neighbors_short.extent(1) != max_neighs) || - ((int)d_neighbors_short.extent(0) != ignum)) { - d_neighbors_short = Kokkos::View("Tersoff::neighbors_short",ignum,max_neighs); + if (((int)d_neighbors_short.extent(1) < max_neighs) || + ((int)d_neighbors_short.extent(0) < ignum)) { + d_neighbors_short = Kokkos::View("Tersoff::neighbors_short",ignum*1.2,max_neighs); } - if ((int)d_numneigh_short.extent(0)!=ignum) - d_numneigh_short = Kokkos::View("Tersoff::numneighs_short",ignum); - Kokkos::parallel_for(Kokkos::RangePolicy(0,neighflag==FULL?ignum:inum), *this); + if ((int)d_numneigh_short.extent(0) < ignum) + d_numneigh_short = Kokkos::View("Tersoff::numneighs_short",ignum*1.2); + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum), *this); if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - ev_all += ev; - } else if (neighflag == FULL) { - if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - ev_all += ev; - - if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); - else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); ev_all += ev; } @@ -286,6 +277,7 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeShortN const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); + const F_FLOAT cutmax_sq = cutmax*cutmax; const int jnum = d_numneigh[i]; int inside = 0; @@ -298,12 +290,12 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeShortN const X_FLOAT delz = ztmp - x(j,2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if (rsq < cutmax*cutmax) { - d_neighbors_short(i,inside) = j; + if (rsq < cutmax_sq) { + d_neighbors_short(ii,inside) = j; inside++; } } - d_numneigh_short(i) = inside; + d_numneigh_short(ii) = inside; } /* ---------------------------------------------------------------------- */ @@ -311,25 +303,25 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeShortN template template KOKKOS_INLINE_FUNCTION -void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf, const int &ii, EV_FLOAT& ev) const { +void PairTersoffMODKokkos::operator()(TagPairTersoffMODCompute, const int &ii, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial - auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); - auto a_f = v_f.template access>(); + const auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + const auto a_f = v_f.template access>(); const int i = d_ilist[ii]; if (i >= nlocal) return; const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); - const int itype = type(i); + const int itype = d_map(type(i)); const tagint itag = tag(i); F_FLOAT fi[3], fj[3], fk[3]; //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); - const int jnum = d_numneigh_short[i]; + const int jnum = d_numneigh_short[ii]; // repulsive @@ -338,9 +330,9 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf jtag) { @@ -357,17 +349,18 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf cutsq) continue; const F_FLOAT r = sqrt(rsq); - const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); - const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); - const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); - const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * - (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r; - const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + const F_FLOAT tmp_fce = ters_fc_k(d_params(iparam_ij),r); + const F_FLOAT tmp_fcd = ters_dfc(d_params(iparam_ij),r); + const F_FLOAT tmp_exp = exp(-d_params(iparam_ij).lam1 * r); + const F_FLOAT frep = -d_params(iparam_ij).biga * tmp_exp * + (tmp_fcd - tmp_fce*d_params(iparam_ij).lam1) / r; + const F_FLOAT eng = tmp_fce * d_params(iparam_ij).biga * tmp_exp; f_x += delx*frep; f_y += dely*frep; @@ -385,15 +378,16 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf cutsq1) continue; @@ -401,28 +395,29 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf cutsq2) continue; const F_FLOAT rik = sqrt(rsq2); - bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + bo_ij += bondorder(d_params(iparam_ijk),rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); } // attractive: pairwise potential and force - const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); - const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); - const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); + const F_FLOAT fa = ters_fa_k(d_params(iparam_ij),rij); + const F_FLOAT dfa = ters_dfa(d_params(iparam_ij),rij); + const F_FLOAT bij = ters_bij_k(d_params(iparam_ij),bo_ij); const F_FLOAT fatt = -0.5*bij * dfa / rij; - const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); + const F_FLOAT prefactor = 0.5*fa * ters_dbij(d_params(iparam_ij),bo_ij); f_x += delx1*fatt; f_y += dely1*fatt; @@ -435,26 +430,27 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalftemplate ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); + this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); } // attractive: three-body force for (int kk = 0; kk < jnum; kk++) { if (jj == kk) continue; - int k = d_neighbors_short(i,kk); + int k = d_neighbors_short(ii,kk); k &= NEIGHMASK; - const int ktype = type(k); + const int ktype = d_map(type(k)); const F_FLOAT delx2 = xtmp - x(k,0); const F_FLOAT dely2 = ytmp - x(k,1); const F_FLOAT delz2 = ztmp - x(k,2); const F_FLOAT rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - const F_FLOAT cutsq2 = paramskk(itype,jtype,ktype).cutsq; + const int iparam_ijk = d_elem3param(itype,jtype,ktype); + const F_FLOAT cutsq2 = d_params(iparam_ijk).cutsq; if (rsq2 > cutsq2) continue; const F_FLOAT rik = sqrt(rsq2); - ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, + ters_dthb(d_params(iparam_ijk),prefactor,rij,delx1,dely1,delz1, rik,delx2,dely2,delz2,fi,fj,fk); f_x += fi[0]; @@ -474,6 +470,7 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalftemplate v_tally3(ev,i,j,k,fj,fk,delrij,delrik); } } + a_f(j,0) += fj_x; a_f(j,1) += fj_y; a_f(j,2) += fj_z; @@ -486,309 +483,19 @@ void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf template KOKKOS_INLINE_FUNCTION -void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeHalf, const int &ii) const { +void PairTersoffMODKokkos::operator()(TagPairTersoffMODCompute, const int &ii) const { EV_FLOAT ev; - this->template operator()(TagPairTersoffMODComputeHalf(), ii, ev); -} - -/* ---------------------------------------------------------------------- */ - -template -template -KOKKOS_INLINE_FUNCTION -void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeFullA, const int &ii, EV_FLOAT& ev) const { - - const int i = d_ilist[ii]; - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); - const int itype = type(i); - - int j,k,jj,kk,jtype,ktype; - F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; - F_FLOAT fi[3], fj[3], fk[3]; - X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; - - //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); - const int jnum = d_numneigh_short[i]; - - // repulsive - - F_FLOAT f_x = 0.0; - F_FLOAT f_y = 0.0; - F_FLOAT f_z = 0.0; - for (jj = 0; jj < jnum; jj++) { - j = d_neighbors_short(i,jj); - j &= NEIGHMASK; - const int jtype = type(j); - - const X_FLOAT delx = xtmp - x(j,0); - const X_FLOAT dely = ytmp - x(j,1); - const X_FLOAT delz = ztmp - x(j,2); - const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq; - - if (rsq > cutsq) continue; - - const F_FLOAT r = sqrt(rsq); - const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); - const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); - const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); - const F_FLOAT frep = -paramskk(itype,jtype,jtype).biga * tmp_exp * - (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1) / r; - const F_FLOAT eng = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; - - f_x += delx*frep; - f_y += dely*frep; - f_z += delz*frep; - - if (EVFLAG) { - if (eflag) - ev.evdwl += 0.5*eng; - if (vflag_either || eflag_atom) - this->template ev_tally(ev,i,j,eng,frep,delx,dely,delz); - } - } - - // attractive: bond order - - for (jj = 0; jj < jnum; jj++) { - j = d_neighbors_short(i,jj); - j &= NEIGHMASK; - jtype = type(j); - - delx1 = xtmp - x(j,0); - dely1 = ytmp - x(j,1); - delz1 = ztmp - x(j,2); - rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; - cutsq1 = paramskk(itype,jtype,jtype).cutsq; - - bo_ij = 0.0; - if (rsq1 > cutsq1) continue; - rij = sqrt(rsq1); - - for (kk = 0; kk < jnum; kk++) { - if (jj == kk) continue; - k = d_neighbors_short(i,kk); - k &= NEIGHMASK; - ktype = type(k); - - delx2 = xtmp - x(k,0); - dely2 = ytmp - x(k,1); - delz2 = ztmp - x(k,2); - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - cutsq2 = paramskk(itype,jtype,ktype).cutsq; - - if (rsq2 > cutsq2) continue; - rik = sqrt(rsq2); - bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); - } - - // attractive: pairwise potential and force - - const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); - const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); - const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); - const F_FLOAT fatt = -0.5*bij * dfa / rij; - const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); - const F_FLOAT eng = 0.5*bij * fa; - - f_x += delx1*fatt; - f_y += dely1*fatt; - f_z += delz1*fatt; - - if (EVFLAG) { - if (eflag) ev.evdwl += 0.5*eng; - if (vflag_either || eflag_atom) - this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); - } - - // attractive: three-body force - - for (kk = 0; kk < jnum; kk++) { - if (jj == kk) continue; - k = d_neighbors_short(i,kk); - k &= NEIGHMASK; - ktype = type(k); - - delx2 = xtmp - x(k,0); - dely2 = ytmp - x(k,1); - delz2 = ztmp - x(k,2); - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - cutsq2 = paramskk(itype,jtype,ktype).cutsq; - - if (rsq2 > cutsq2) continue; - rik = sqrt(rsq2); - ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, - rik,delx2,dely2,delz2,fi,fj,fk); - - f_x += fi[0]; - f_y += fi[1]; - f_z += fi[2]; - - if (vflag_either) { - F_FLOAT delrij[3], delrik[3]; - delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; - delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; - if (vflag_either) this->template v_tally3(ev,i,j,k,fj,fk,delrij,delrik); - } - } - } - f(i,0) += f_x; - f(i,1) += f_y; - f(i,2) += f_z; -} - -template -template -KOKKOS_INLINE_FUNCTION -void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeFullA, const int &ii) const { - EV_FLOAT ev; - this->template operator()(TagPairTersoffMODComputeFullA(), ii, ev); -} - -/* ---------------------------------------------------------------------- */ - -template -template -KOKKOS_INLINE_FUNCTION -void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeFullB, const int &ii, EV_FLOAT& ev) const { - - const int i = d_ilist[ii]; - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); - const int itype = type(i); - - int j,k,jj,kk,jtype,ktype,j_jnum; - F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; - F_FLOAT fj[3], fk[3]; - X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; - - const int jnum = d_numneigh_short[i]; - - F_FLOAT f_x = 0.0; - F_FLOAT f_y = 0.0; - F_FLOAT f_z = 0.0; - - // attractive: bond order - - for (jj = 0; jj < jnum; jj++) { - j = d_neighbors_short(i,jj); - j &= NEIGHMASK; - if (j >= nlocal) continue; - jtype = type(j); - - delx1 = x(j,0) - xtmp; - dely1 = x(j,1) - ytmp; - delz1 = x(j,2) - ztmp; - rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; - cutsq1 = paramskk(jtype,itype,itype).cutsq; - - bo_ij = 0.0; - if (rsq1 > cutsq1) continue; - rij = sqrt(rsq1); - - j_jnum = d_numneigh_short[j]; - - for (kk = 0; kk < j_jnum; kk++) { - k = d_neighbors_short(j,kk); - if (k == i) continue; - k &= NEIGHMASK; - ktype = type(k); - - delx2 = x(j,0) - x(k,0); - dely2 = x(j,1) - x(k,1); - delz2 = x(j,2) - x(k,2); - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - cutsq2 = paramskk(jtype,itype,ktype).cutsq; - - if (rsq2 > cutsq2) continue; - rik = sqrt(rsq2); - bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); - - } - - // attractive: pairwise potential and force - - const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij); - const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij); - const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij); - const F_FLOAT fatt = -0.5*bij * dfa / rij; - const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij); - const F_FLOAT eng = 0.5*bij * fa; - - f_x -= delx1*fatt; - f_y -= dely1*fatt; - f_z -= delz1*fatt; - - if (EVFLAG) { - if (eflag) - ev.evdwl += 0.5 * eng; - if (vflag_either || eflag_atom) - this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); - } - - // attractive: three-body force - - for (kk = 0; kk < j_jnum; kk++) { - k = d_neighbors_short(j,kk); - if (k == i) continue; - k &= NEIGHMASK; - ktype = type(k); - - delx2 = x(j,0) - x(k,0); - dely2 = x(j,1) - x(k,1); - delz2 = x(j,2) - x(k,2); - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - cutsq2 = paramskk(jtype,itype,ktype).cutsq; - - if (rsq2 > cutsq2) continue; - rik = sqrt(rsq2); - ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1, - rik,delx2,dely2,delz2,fj,fk); - f_x += fj[0]; - f_y += fj[1]; - f_z += fj[2]; - - if (vflag_either) { - F_FLOAT delrji[3], delrjk[3]; - delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1; - delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2; - if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk); - } - - const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik); - const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij); - ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2, - rij,delx1,dely1,delz1,fk); - f_x += fk[0]; - f_y += fk[1]; - f_z += fk[2]; - } - } - f(i,0) += f_x; - f(i,1) += f_y; - f(i,2) += f_z; -} - -template -template -KOKKOS_INLINE_FUNCTION -void PairTersoffMODKokkos::operator()(TagPairTersoffMODComputeFullB, const int &ii) const { - EV_FLOAT ev; - this->template operator()(TagPairTersoffMODComputeFullB(), ii, ev); + this->template operator()(TagPairTersoffMODCompute(), ii, ev); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffMODKokkos::ters_fc_k(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffMODKokkos::ters_fc_k(const Param& param, const F_FLOAT &r) const { - const F_FLOAT ters_R = paramskk(i,j,k).bigr; - const F_FLOAT ters_D = paramskk(i,j,k).bigd; + const F_FLOAT ters_R = param.bigr; + const F_FLOAT ters_D = param.bigd; if (r < ters_R-ters_D) return 1.0; if (r > ters_R+ters_D) return 0.0; @@ -800,11 +507,10 @@ double PairTersoffMODKokkos::ters_fc_k(const int &i, const int &j, template KOKKOS_INLINE_FUNCTION -double PairTersoffMODKokkos::ters_dfc(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffMODKokkos::ters_dfc(const Param& param, const F_FLOAT &r) const { - const F_FLOAT ters_R = paramskk(i,j,k).bigr; - const F_FLOAT ters_D = paramskk(i,j,k).bigd; + const F_FLOAT ters_R = param.bigr; + const F_FLOAT ters_D = param.bigd; if (r < ters_R-ters_D) return 0.0; if (r > ters_R+ters_D) return 0.0; @@ -816,7 +522,7 @@ double PairTersoffMODKokkos::ters_dfc(const int &i, const int &j, template KOKKOS_INLINE_FUNCTION -double PairTersoffMODKokkos::bondorder(const int &i, const int &j, const int &k, +double PairTersoffMODKokkos::bondorder(const Param& param, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const { @@ -824,14 +530,15 @@ double PairTersoffMODKokkos::bondorder(const int &i, const int &j, c const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik); - if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); - else arg = paramskk(i,j,k).lam3 * (rij-rik); + const F_FLOAT paramtmp = param.lam3 * (rij-rik); + if (int(param.powerm) == 3) arg = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); + else arg = paramtmp; if (arg > 69.0776) ex_delr = 1.e30; else if (arg < -69.0776) ex_delr = 0.0; else ex_delr = exp(arg); - return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr; + return ters_fc_k(param,rik) * ters_gijk(param,costheta) * ex_delr; } /* ---------------------------------------------------------------------- */ @@ -839,14 +546,14 @@ double PairTersoffMODKokkos::bondorder(const int &i, const int &j, c template KOKKOS_INLINE_FUNCTION double PairTersoffMODKokkos:: - ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const + ters_gijk(const Param& param, const F_FLOAT &cos) const { - const F_FLOAT ters_c1 = paramskk(i,j,k).c1; - const F_FLOAT ters_c2 = paramskk(i,j,k).c2; - const F_FLOAT ters_c3 = paramskk(i,j,k).c3; - const F_FLOAT ters_c4 = paramskk(i,j,k).c4; - const F_FLOAT ters_c5 = paramskk(i,j,k).c5; - const F_FLOAT tmp_h = (paramskk(i,j,k).h - cos)*(paramskk(i,j,k).h - cos); + const F_FLOAT ters_c1 = param.c1; + const F_FLOAT ters_c2 = param.c2; + const F_FLOAT ters_c3 = param.c3; + const F_FLOAT ters_c4 = param.c4; + const F_FLOAT ters_c5 = param.c5; + const F_FLOAT tmp_h = (param.h - cos)*(param.h - cos); return ters_c1 + (ters_c2*tmp_h/(ters_c3 + tmp_h)) * (1.0 + ters_c4*exp(-ters_c5*tmp_h)); @@ -858,17 +565,17 @@ double PairTersoffMODKokkos:: template KOKKOS_INLINE_FUNCTION double PairTersoffMODKokkos:: - ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const + ters_dgijk(const Param& param, const F_FLOAT &cos) const { - const F_FLOAT ters_c2 = paramskk(i,j,k).c2; - const F_FLOAT ters_c3 = paramskk(i,j,k).c3; - const F_FLOAT ters_c4 = paramskk(i,j,k).c4; - const F_FLOAT ters_c5 = paramskk(i,j,k).c5; - const F_FLOAT tmp_h = (paramskk(i,j,k).h - cos)*(paramskk(i,j,k).h - cos); - const F_FLOAT g1 = (paramskk(i,j,k).h - cos)/(ters_c3 + tmp_h); + const F_FLOAT ters_c2 = param.c2; + const F_FLOAT ters_c3 = param.c3; + const F_FLOAT ters_c4 = param.c4; + const F_FLOAT ters_c5 = param.c5; + const F_FLOAT tmp_h = (param.h - cos)*(param.h - cos); + const F_FLOAT g1 = (param.h - cos)/(ters_c3 + tmp_h); const F_FLOAT g2 = exp(-ters_c5*tmp_h); - return -2.0*ters_c2*g1*((1 + ters_c4*g2)*(1 + g1*(cos - paramskk(i,j,k).h)) - + return -2.0*ters_c2*g1*((1 + ters_c4*g2)*(1 + g1*(cos - param.h)) - tmp_h*ters_c4*ters_c5*g2); } @@ -876,58 +583,54 @@ double PairTersoffMODKokkos:: template KOKKOS_INLINE_FUNCTION -double PairTersoffMODKokkos::ters_fa_k(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffMODKokkos::ters_fa_k(const Param& param, const F_FLOAT &r) const { - if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; - return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) - * ters_fc_k(i,j,k,r); + if (r > param.bigr + param.bigd) return 0.0; + return -param.bigb * exp(-param.lam2 * r) + * ters_fc_k(param,r); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffMODKokkos::ters_dfa(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffMODKokkos::ters_dfa(const Param& param, const F_FLOAT &r) const { - if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; - return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) * - (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) - ters_dfc(i,j,k,r)); + if (r > param.bigr + param.bigd) return 0.0; + return param.bigb * exp(-param.lam2 * r) * + (param.lam2 * ters_fc_k(param,r) - ters_dfc(param,r)); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffMODKokkos::ters_bij_k(const int &i, const int &j, - const int &k, const F_FLOAT &bo) const +double PairTersoffMODKokkos::ters_bij_k(const Param& param, const F_FLOAT &bo) const { - const F_FLOAT tmp = paramskk(i,j,k).beta * bo; - if (tmp > paramskk(i,j,k).ca1) - return pow(tmp, -paramskk(i,j,k).powern/(2.0*paramskk(i,j,k).powern_del)); - if (tmp < paramskk(i,j,k).ca4) + const F_FLOAT tmp = param.beta * bo; + if (tmp > param.ca1) + return pow(tmp, -param.powern/(2.0*param.powern_del)); + if (tmp < param.ca4) return 1.0; - return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern_del)); + return pow(1.0 + pow(tmp,param.powern), -1.0/(2.0*param.powern_del)); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffMODKokkos::ters_dbij(const int &i, const int &j, - const int &k, const F_FLOAT &bo) const +double PairTersoffMODKokkos::ters_dbij(const Param& param, const F_FLOAT &bo) const { - const F_FLOAT tmp = paramskk(i,j,k).beta * bo; - if (tmp > paramskk(i,j,k).ca1) - return -0.5*(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)* - pow(tmp,-0.5*(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)) / bo; - if (tmp < paramskk(i,j,k).ca4) + const F_FLOAT tmp = param.beta * bo; + if (tmp > param.ca1) + return -0.5*(param.powern/param.powern_del)* + pow(tmp,-0.5*(param.powern/param.powern_del)) / bo; + if (tmp < param.ca4) return 0.0; - const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern); - return -0.5 *(paramskk(i,j,k).powern/paramskk(i,j,k).powern_del)* - pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern_del)))*tmp_n / bo; + const F_FLOAT tmp_n = pow(tmp,param.powern); + return -0.5 *(param.powern/param.powern_del)* + pow(1.0+tmp_n, -1.0-(1.0/(2.0*param.powern_del)))*tmp_n / bo; } /* ---------------------------------------------------------------------- */ @@ -935,7 +638,7 @@ double PairTersoffMODKokkos::ters_dbij(const int &i, const int &j, template KOKKOS_INLINE_FUNCTION void PairTersoffMODKokkos::ters_dthb( - const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const Param& param, const F_FLOAT &prefactor, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const @@ -960,22 +663,23 @@ void PairTersoffMODKokkos::ters_dthb( F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; - fc = ters_fc_k(i,j,k,rik); - dfc = ters_dfc(i,j,k,rik); - if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); - else tmp = paramskk(i,j,k).lam3 * (rij-rik); + fc = ters_fc_k(param,rik); + dfc = ters_dfc(param,rik); + + const F_FLOAT paramtmp = param.lam3 * (rij-rik); + if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); if (tmp > 69.0776) ex_delr = 1.e30; else if (tmp < -69.0776) ex_delr = 0.0; else ex_delr = exp(tmp); - if (int(paramskk(i,j,k).powerm) == 3) - dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; - else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + if (int(param.powerm) == 3) + dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(rij-rik,2.0)*ex_delr; + else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); - gijk = ters_gijk(i,j,k,cos); - dgijk = ters_dgijk(i,j,k,cos); + gijk = ters_gijk(param,cos); + dgijk = ters_dgijk(param,cos); // from PairTersoffMOD::costheta_d vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); @@ -1007,7 +711,7 @@ void PairTersoffMODKokkos::ters_dthb( template KOKKOS_INLINE_FUNCTION void PairTersoffMODKokkos::ters_dthbj( - const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const Param& param, const F_FLOAT &prefactor, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, F_FLOAT *fj, F_FLOAT *fk) const @@ -1028,22 +732,23 @@ void PairTersoffMODKokkos::ters_dthbj( F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; - fc = ters_fc_k(i,j,k,rik); - dfc = ters_dfc(i,j,k,rik); - if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); - else tmp = paramskk(i,j,k).lam3 * (rij-rik); + fc = ters_fc_k(param,rik); + dfc = ters_dfc(param,rik); + const F_FLOAT paramtmp = param.lam3 * (rij-rik); + if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); + else tmp = param.lam3 * (rij-rik); if (tmp > 69.0776) ex_delr = 1.e30; else if (tmp < -69.0776) ex_delr = 0.0; else ex_delr = exp(tmp); - if (int(paramskk(i,j,k).powerm) == 3) - dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; - else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + if (int(param.powerm) == 3) + dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(param.lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); - gijk = ters_gijk(i,j,k,cos); - dgijk = ters_dgijk(i,j,k,cos); + gijk = ters_gijk(param,cos); + dgijk = ters_dgijk(param,cos); vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); vec3_scale(rijinv,dcosfj,dcosfj); @@ -1068,7 +773,7 @@ void PairTersoffMODKokkos::ters_dthbj( template KOKKOS_INLINE_FUNCTION void PairTersoffMODKokkos::ters_dthbk( - const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const Param& param, const F_FLOAT &prefactor, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, F_FLOAT *fk) const @@ -1089,22 +794,22 @@ void PairTersoffMODKokkos::ters_dthbk( F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; - fc = ters_fc_k(i,j,k,rik); - dfc = ters_dfc(i,j,k,rik); - if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); - else tmp = paramskk(i,j,k).lam3 * (rij-rik); + fc = ters_fc_k(param,rik); + dfc = ters_dfc(param,rik); + const F_FLOAT paramtmp = param.lam3 * (rij-rik); + if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); if (tmp > 69.0776) ex_delr = 1.e30; else if (tmp < -69.0776) ex_delr = 0.0; else ex_delr = exp(tmp); - if (int(paramskk(i,j,k).powerm) == 3) - dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; - else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + if (int(param.powerm) == 3) + dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(param.lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); - gijk = ters_gijk(i,j,k,cos); - dgijk = ters_dgijk(i,j,k,cos); + gijk = ters_gijk(param,cos); + dgijk = ters_dgijk(param,cos); vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); vec3_scale(rijinv,dcosfj,dcosfj); @@ -1117,7 +822,6 @@ void PairTersoffMODKokkos::ters_dthbk( vec3_scaleadd(fc*dgijk*ex_delr,dcosfk,fk,fk); vec3_scaleadd(-fc*gijk*dex_delr,rik_hat,fk,fk); vec3_scale(prefactor,fk,fk); - } /* ---------------------------------------------------------------------- */ @@ -1129,8 +833,6 @@ void PairTersoffMODKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - const int VFLAG = vflag_either; - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); @@ -1142,10 +844,10 @@ void PairTersoffMODKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons if (eflag_atom) { const E_FLOAT epairhalf = 0.5 * epair; a_eatom[i] += epairhalf; - if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf; + a_eatom[j] += epairhalf; } - if (VFLAG) { + if (vflag_either) { const E_FLOAT v0 = delx*delx*fpair; const E_FLOAT v1 = dely*dely*fpair; const E_FLOAT v2 = delz*delz*fpair; @@ -1154,21 +856,12 @@ void PairTersoffMODKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons const E_FLOAT v5 = dely*delz*fpair; if (vflag_global) { - if (NEIGHFLAG != FULL) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; - } else { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; } if (vflag_atom) { @@ -1179,14 +872,12 @@ void PairTersoffMODKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons a_vatom(i,4) += 0.5*v4; a_vatom(i,5) += 0.5*v5; - if (NEIGHFLAG != FULL) { - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; - } + a_vatom(j,0) += 0.5*v0; + a_vatom(j,1) += 0.5*v1; + a_vatom(j,2) += 0.5*v2; + a_vatom(j,3) += 0.5*v3; + a_vatom(j,4) += 0.5*v4; + a_vatom(j,5) += 0.5*v5; } } } @@ -1232,14 +923,13 @@ void PairTersoffMODKokkos::v_tally3(EV_FLOAT &ev, const int &i, cons a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2]; a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5]; - if (NEIGHFLAG != FULL) { - a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2]; - a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5]; - a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2]; - a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5]; - } - } + a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2]; + a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5]; + + a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2]; + a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5]; + } } /* ---------------------------------------------------------------------- */ @@ -1289,4 +979,3 @@ template class PairTersoffMODKokkos; template class PairTersoffMODKokkos; #endif } - diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.h b/src/KOKKOS/pair_tersoff_mod_kokkos.h index ec7eb1ce92..669792b9eb 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.h +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.h @@ -30,20 +30,14 @@ PairStyle(tersoff/mod/kk/host,PairTersoffMODKokkos); namespace LAMMPS_NS { template -struct TagPairTersoffMODComputeHalf{}; - -template -struct TagPairTersoffMODComputeFullA{}; - -template -struct TagPairTersoffMODComputeFullB{}; +struct TagPairTersoffMODCompute{}; struct TagPairTersoffMODComputeShortNeigh{}; template class PairTersoffMODKokkos : public PairTersoffMOD { public: - enum {EnabledNeighFlags=FULL}; + enum {EnabledNeighFlags=HALF|HALFTHREAD}; enum {COUL_FLAG=0}; typedef DeviceType device_type; typedef ArrayTypes AT; @@ -52,81 +46,66 @@ class PairTersoffMODKokkos : public PairTersoffMOD { PairTersoffMODKokkos(class LAMMPS *); ~PairTersoffMODKokkos() override; void compute(int, int) override; + void coeff(int, char **) override; void init_style() override; template KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffMODComputeHalf, const int&, EV_FLOAT&) const; + void operator()(TagPairTersoffMODCompute, const int&, EV_FLOAT&) const; template KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffMODComputeHalf, const int&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffMODComputeFullA, const int&, EV_FLOAT&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffMODComputeFullA, const int&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffMODComputeFullB, const int&, EV_FLOAT&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffMODComputeFullB, const int&) const; + void operator()(TagPairTersoffMODCompute, const int&) const; KOKKOS_INLINE_FUNCTION void operator()(TagPairTersoffMODComputeShortNeigh, const int&) const; KOKKOS_INLINE_FUNCTION - double ters_fc_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double ters_fc_k(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double ters_dfc(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double ters_fa_k(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double ters_dfa(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + double ters_bij_k(const Param& param, const F_FLOAT &bo) const; KOKKOS_INLINE_FUNCTION - double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + double ters_dbij(const Param& param, const F_FLOAT &bo) const; KOKKOS_INLINE_FUNCTION - double bondorder(const int &i, const int &j, const int &k, - const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, - const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const; + double bondorder(const Param& param, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const; KOKKOS_INLINE_FUNCTION - double ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + double ters_gijk(const Param& param, const F_FLOAT &cos) const; KOKKOS_INLINE_FUNCTION - double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + double ters_dgijk(const Param& param, const F_FLOAT &cos) const; KOKKOS_INLINE_FUNCTION - void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, - const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, - const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, - F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const; + void ters_dthb(const Param& param, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const; KOKKOS_INLINE_FUNCTION - void ters_dthbj(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, - const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, - const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, - F_FLOAT *fj, F_FLOAT *fk) const; + void ters_dthbj(const Param& param, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fj, F_FLOAT *fk) const; KOKKOS_INLINE_FUNCTION - void ters_dthbk(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, - const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, - const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, - F_FLOAT *fk) const; + void ters_dthbk(const Param& param, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fk) const; KOKKOS_INLINE_FUNCTION double vec3_dot(const F_FLOAT x[3], const double y[3]) const { @@ -151,17 +130,6 @@ class PairTersoffMODKokkos : public PairTersoffMOD { KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const; - struct params_ters { - KOKKOS_INLINE_FUNCTION - params_ters() {powerm=0;lam3=0;h=0;powern=0;beta=0;lam2=0;bigb=0;bigr=0;bigd=0; - lam1=0;biga=0;powern_del=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;c5=0;ca1=0;ca4=0;}; - KOKKOS_INLINE_FUNCTION - params_ters(int /*i*/) {powerm=0;lam3=0;h=0;powern=0;beta=0;lam2=0;bigb=0;bigr=0;bigd=0; - lam1=0;biga=0;powern_del=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;c5=0;ca1=0;ca4=0;}; - F_FLOAT powerm, lam3, h, powern, beta, lam2, bigb, bigr, bigd, - lam1, biga, powern_del, cutsq, c1, c2, c3, c4, c5, ca1, ca4; - }; - template KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, @@ -171,24 +139,27 @@ class PairTersoffMODKokkos : public PairTersoffMOD { template KOKKOS_INLINE_FUNCTION void v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, - F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const; + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const; KOKKOS_INLINE_FUNCTION void v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, - F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const; + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const; - void allocate() override; void setup_params() override; protected: - using KKDeviceType = typename KKDevice::value; - typedef Kokkos::DualView tdual_int_3d; - Kokkos::DualView k_params; - typename Kokkos::DualView::t_dev_const_um paramskk; - // hardwired to space for 12 atom types - //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typedef typename tdual_int_3d::t_dev_const_randomread t_int_3d_randomread; + typedef typename tdual_int_3d::t_host t_host_int_3d; + + t_int_3d_randomread d_elem3param; + typename AT::t_int_1d_randomread d_map; + + typedef Kokkos::DualView tdual_param_1d; + typedef typename tdual_param_1d::t_dev t_param_1d; + typedef typename tdual_param_1d::t_host t_host_param_1d; + + t_param_1d d_params; int inum; typename AT::t_x_array_randomread x; @@ -203,6 +174,7 @@ class PairTersoffMODKokkos : public PairTersoffMOD { int need_dup; + using KKDeviceType = typename KKDevice::value; template using DupScatterView = KKScatterView; @@ -213,6 +185,7 @@ class PairTersoffMODKokkos : public PairTersoffMOD { DupScatterView dup_f; DupScatterView dup_eatom; DupScatterView dup_vatom; + NonDupScatterView ndup_f; NonDupScatterView ndup_eatom; NonDupScatterView ndup_vatom; diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index 56c73cc3ce..15ea3177fa 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -78,18 +78,29 @@ PairTersoffZBLKokkos::~PairTersoffZBLKokkos() } } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ template -void PairTersoffZBLKokkos::allocate() +void PairTersoffZBLKokkos::coeff(int narg, char **arg) { - PairTersoffZBL::allocate(); + PairTersoff::coeff(narg,arg); + + // sync map int n = atom->ntypes; - k_params = Kokkos::DualView - ("PairTersoffZBL::paramskk",n+1,n+1,n+1); - paramskk = k_params.template view(); + DAT::tdual_int_1d k_map = DAT::tdual_int_1d("pair:map",n+1); + HAT::t_int_1d h_map = k_map.h_view; + + for (int i = 1; i <= n; i++) + h_map[i] = map[i]; + + k_map.template modify(); + k_map.template sync(); + + d_map = k_map.template view(); } /* ---------------------------------------------------------------------- @@ -108,9 +119,11 @@ void PairTersoffZBLKokkos::init_style() request->set_kokkos_host(std::is_same::value && !std::is_same::value); request->set_kokkos_device(std::is_same::value); + // always request a full neighbor list request->enable_full(); + if (neighflag == FULL) - error->all(FLERR,"Cannot (yet) use full neighbor list style with tersoff/zbl/kk"); + error->all(FLERR,"Must use half neighbor list style with pair tersoff/kk"); } /* ---------------------------------------------------------------------- */ @@ -120,40 +133,29 @@ void PairTersoffZBLKokkos::setup_params() { PairTersoffZBL::setup_params(); - int i,j,k,m; - int n = atom->ntypes; + // sync elem3param and params - for (i = 1; i <= n; i++) - for (j = 1; j <= n; j++) - for (k = 1; k <= n; k++) { - m = elem3param[map[i]][map[j]][map[k]]; - k_params.h_view(i,j,k).powerm = params[m].powerm; - k_params.h_view(i,j,k).gamma = params[m].gamma; - k_params.h_view(i,j,k).lam3 = params[m].lam3; - k_params.h_view(i,j,k).c = params[m].c; - k_params.h_view(i,j,k).d = params[m].d; - k_params.h_view(i,j,k).h = params[m].h; - k_params.h_view(i,j,k).powern = params[m].powern; - k_params.h_view(i,j,k).beta = params[m].beta; - k_params.h_view(i,j,k).lam2 = params[m].lam2; - k_params.h_view(i,j,k).bigb = params[m].bigb; - k_params.h_view(i,j,k).bigr = params[m].bigr; - k_params.h_view(i,j,k).bigd = params[m].bigd; - k_params.h_view(i,j,k).lam1 = params[m].lam1; - k_params.h_view(i,j,k).biga = params[m].biga; - k_params.h_view(i,j,k).cutsq = params[m].cutsq; - k_params.h_view(i,j,k).c1 = params[m].c1; - k_params.h_view(i,j,k).c2 = params[m].c2; - k_params.h_view(i,j,k).c3 = params[m].c3; - k_params.h_view(i,j,k).c4 = params[m].c4; - k_params.h_view(i,j,k).Z_i = params[m].Z_i; - k_params.h_view(i,j,k).Z_j = params[m].Z_j; - k_params.h_view(i,j,k).ZBLcut = params[m].ZBLcut; - k_params.h_view(i,j,k).ZBLexpscale = params[m].ZBLexpscale; - } + tdual_int_3d k_elem3param = tdual_int_3d("pair:elem3param",nelements,nelements,nelements); + t_host_int_3d h_elem3param = k_elem3param.h_view; - k_params.template modify(); + tdual_param_1d k_params = tdual_param_1d("pair:params",nparams); + t_host_param_1d h_params = k_params.h_view; + for (int i = 0; i < nelements; i++) + for (int j = 0; j < nelements; j++) + for (int k = 0; k < nelements; k++) + h_elem3param(i,j,k) = elem3param[i][j][k]; + + for (int m = 0; m < nparams; m++) + h_params[m] = params[m]; + + k_elem3param.modify_host(); + k_elem3param.template sync(); + k_params.modify_host(); + k_params.template sync(); + + d_elem3param = k_elem3param.template view(); + d_params = k_params.template view(); } /* ---------------------------------------------------------------------- */ @@ -164,8 +166,6 @@ void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) eflag = eflag_in; vflag = vflag_in; - if (neighflag == FULL) no_virial_fdotr_compute = 1; - ev_init(eflag,vflag,0); // reallocate per-atom arrays if necessary @@ -182,7 +182,6 @@ void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) } atomKK->sync(execution_space,datamask_read); - k_params.template sync(); if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); else atomKK->modified(execution_space,F_MASK); @@ -221,37 +220,25 @@ void PairTersoffZBLKokkos::compute(int eflag_in, int vflag_in) int max_neighs = d_neighbors.extent(1); - if (((int)d_neighbors_short.extent(1) != max_neighs) || - ((int)d_neighbors_short.extent(0) != ignum)) { - d_neighbors_short = Kokkos::View("Tersoff::neighbors_short",ignum,max_neighs); + if (((int)d_neighbors_short.extent(1) < max_neighs) || + ((int)d_neighbors_short.extent(0) < ignum)) { + d_neighbors_short = Kokkos::View("Tersoff::neighbors_short",ignum*1.2,max_neighs); } - if ((int)d_numneigh_short.extent(0)!=ignum) - d_numneigh_short = Kokkos::View("Tersoff::numneighs_short",ignum); - Kokkos::parallel_for(Kokkos::RangePolicy(0,neighflag==FULL?ignum:inum), *this); + if ((int)d_numneigh_short.extent(0) < ignum) + d_numneigh_short = Kokkos::View("Tersoff::numneighs_short",ignum*1.2); + Kokkos::parallel_for(Kokkos::RangePolicy(0,inum), *this); if (neighflag == HALF) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); ev_all += ev; } else if (neighflag == HALFTHREAD) { if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); + Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - ev_all += ev; - } else if (neighflag == FULL) { - if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,inum),*this,ev); - else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); - ev_all += ev; - - if (evflag) - Kokkos::parallel_reduce(Kokkos::RangePolicy >(0,ignum),*this,ev); - else - Kokkos::parallel_for(Kokkos::RangePolicy >(0,ignum),*this); + Kokkos::parallel_for(Kokkos::RangePolicy >(0,inum),*this); ev_all += ev; } @@ -303,6 +290,7 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeShortN const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); + const F_FLOAT cutmax_sq = cutmax*cutmax; const int jnum = d_numneigh[i]; int inside = 0; @@ -315,12 +303,12 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeShortN const X_FLOAT delz = ztmp - x(j,2); const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - if (rsq < cutmax*cutmax) { - d_neighbors_short(i,inside) = j; + if (rsq < cutmax_sq) { + d_neighbors_short(ii,inside) = j; inside++; } } - d_numneigh_short(i) = inside; + d_numneigh_short(ii) = inside; } /* ---------------------------------------------------------------------- */ @@ -328,25 +316,25 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeShortN template template KOKKOS_INLINE_FUNCTION -void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf, const int &ii, EV_FLOAT& ev) const { +void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLCompute, const int &ii, EV_FLOAT& ev) const { // The f array is duplicated for OpenMP, atomic for CUDA, and neither for Serial - auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); - auto a_f = v_f.template access>(); + const auto v_f = ScatterViewHelper,decltype(dup_f),decltype(ndup_f)>::get(dup_f,ndup_f); + const auto a_f = v_f.template access>(); const int i = d_ilist[ii]; if (i >= nlocal) return; const X_FLOAT xtmp = x(i,0); const X_FLOAT ytmp = x(i,1); const X_FLOAT ztmp = x(i,2); - const int itype = type(i); + const int itype = d_map(type(i)); const tagint itag = tag(i); F_FLOAT fi[3], fj[3], fk[3]; //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); - const int jnum = d_numneigh_short[i]; + const int jnum = d_numneigh_short[ii]; // repulsive @@ -355,9 +343,9 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf jtag) { @@ -374,26 +362,27 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf cutsq) continue; // Tersoff repulsive portion const F_FLOAT r = sqrt(rsq); - const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); - const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); - const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); - const F_FLOAT frep_t = paramskk(itype,jtype,jtype).biga * tmp_exp * - (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1); - const F_FLOAT eng_t = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; + const F_FLOAT tmp_fce = ters_fc_k(d_params(iparam_ij),r); + const F_FLOAT tmp_fcd = ters_dfc(d_params(iparam_ij),r); + const F_FLOAT tmp_exp = exp(-d_params(iparam_ij).lam1 * r); + const F_FLOAT frep_t = d_params(iparam_ij).biga * tmp_exp * + (tmp_fcd - tmp_fce*d_params(iparam_ij).lam1); + const F_FLOAT eng_t = tmp_fce * d_params(iparam_ij).biga * tmp_exp; // ZBL repulsive portion const F_FLOAT esq = pow(global_e,2.0); const F_FLOAT a_ij = (0.8854*global_a_0) / - (pow(paramskk(itype,jtype,jtype).Z_i,0.23) + pow(paramskk(itype,jtype,jtype).Z_j,0.23)); - const F_FLOAT premult = (paramskk(itype,jtype,jtype).Z_i * paramskk(itype,jtype,jtype).Z_j * esq)/ + (pow(d_params(iparam_ij).Z_i,0.23) + pow(d_params(iparam_ij).Z_j,0.23)); + const F_FLOAT premult = (d_params(iparam_ij).Z_i * d_params(iparam_ij).Z_j * esq)/ (4.0*MY_PI*global_epsilon_0); const F_FLOAT r_ov_a = r/a_ij; const F_FLOAT phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + @@ -408,13 +397,13 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf::operator()(TagPairTersoffZBLComputeHalf cutsq1) continue; @@ -448,28 +438,29 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf cutsq2) continue; const F_FLOAT rik = sqrt(rsq2); - bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); + bo_ij += bondorder(d_params(iparam_ijk),rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); } // attractive: pairwise potential and force - const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); - const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); - const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); + const F_FLOAT fa = ters_fa_k(d_params(iparam_ij),rij); + const F_FLOAT dfa = ters_dfa(d_params(iparam_ij),rij); + const F_FLOAT bij = ters_bij_k(d_params(iparam_ij),bo_ij); const F_FLOAT fatt = -0.5*bij * dfa / rij; - const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); + const F_FLOAT prefactor = 0.5*fa * ters_dbij(d_params(iparam_ij),bo_ij); f_x += delx1*fatt; f_y += dely1*fatt; @@ -489,19 +480,20 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf cutsq2) continue; const F_FLOAT rik = sqrt(rsq2); - ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, + ters_dthb(d_params(iparam_ijk),prefactor,rij,delx1,dely1,delz1, rik,delx2,dely2,delz2,fi,fj,fk); f_x += fi[0]; @@ -521,6 +513,7 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalftemplate v_tally3(ev,i,j,k,fj,fk,delrij,delrik); } } + a_f(j,0) += fj_x; a_f(j,1) += fj_y; a_f(j,2) += fj_z; @@ -533,339 +526,19 @@ void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf template KOKKOS_INLINE_FUNCTION -void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeHalf, const int &ii) const { +void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLCompute, const int &ii) const { EV_FLOAT ev; - this->template operator()(TagPairTersoffZBLComputeHalf(), ii, ev); -} - -/* ---------------------------------------------------------------------- */ - -template -template -KOKKOS_INLINE_FUNCTION -void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullA, const int &ii, EV_FLOAT& ev) const { - - const int i = d_ilist[ii]; - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); - const int itype = type(i); - - int j,k,jj,kk,jtype,ktype; - F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; - F_FLOAT fi[3], fj[3], fk[3]; - X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; - - //const AtomNeighborsConst d_neighbors_i = k_list.get_neighbors_const(i); - const int jnum = d_numneigh[i]; - - // repulsive - - F_FLOAT f_x = 0.0; - F_FLOAT f_y = 0.0; - F_FLOAT f_z = 0.0; - for (jj = 0; jj < jnum; jj++) { - j = d_neighbors_short(i,jj); - j &= NEIGHMASK; - const int jtype = type(j); - - const X_FLOAT delx = xtmp - x(j,0); - const X_FLOAT dely = ytmp - x(j,1); - const X_FLOAT delz = ztmp - x(j,2); - const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; - const F_FLOAT cutsq = paramskk(itype,jtype,jtype).cutsq; - - if (rsq > cutsq) continue; - - // Tersoff repulsive portion - - const F_FLOAT r = sqrt(rsq); - const F_FLOAT tmp_fce = ters_fc_k(itype,jtype,jtype,r); - const F_FLOAT tmp_fcd = ters_dfc(itype,jtype,jtype,r); - const F_FLOAT tmp_exp = exp(-paramskk(itype,jtype,jtype).lam1 * r); - const F_FLOAT frep_t = paramskk(itype,jtype,jtype).biga * tmp_exp * - (tmp_fcd - tmp_fce*paramskk(itype,jtype,jtype).lam1); - const F_FLOAT eng_t = tmp_fce * paramskk(itype,jtype,jtype).biga * tmp_exp; - - // ZBL repulsive portion - - const F_FLOAT esq = pow(global_e,2.0); - const F_FLOAT a_ij = (0.8854*global_a_0) / - (pow(paramskk(itype,jtype,jtype).Z_i,0.23) + pow(paramskk(itype,jtype,jtype).Z_j,0.23)); - const F_FLOAT premult = (paramskk(itype,jtype,jtype).Z_i * paramskk(itype,jtype,jtype).Z_j * esq)/ - (4.0*MY_PI*global_epsilon_0); - const F_FLOAT r_ov_a = r/a_ij; - const F_FLOAT phi = 0.1818*exp(-3.2*r_ov_a) + 0.5099*exp(-0.9423*r_ov_a) + - 0.2802*exp(-0.4029*r_ov_a) + 0.02817*exp(-0.2016*r_ov_a); - const F_FLOAT dphi = (1.0/a_ij) * (-3.2*0.1818*exp(-3.2*r_ov_a) - - 0.9423*0.5099*exp(-0.9423*r_ov_a) - - 0.4029*0.2802*exp(-0.4029*r_ov_a) - - 0.2016*0.02817*exp(-0.2016*r_ov_a)); - const F_FLOAT frep_z = premult*-phi/rsq + premult*dphi/r; - const F_FLOAT eng_z = premult*(1.0/r)*phi; - - // combine two parts with smoothing by Fermi-like function - - F_FLOAT frep, eng; - frep = -(-fermi_d_k(itype,jtype,jtype,r) * eng_z + - (1.0 - fermi_k(itype,jtype,jtype,r))*frep_z + - fermi_d_k(itype,jtype,jtype,r)*eng_t + fermi_k(itype,jtype,jtype,r)*frep_t) / r; - - if (eflag) - eng = (1.0 - fermi_k(itype,jtype,jtype,r)) * eng_z + - fermi_k(itype,jtype,jtype,r) * eng_t; - - f_x += delx*frep; - f_y += dely*frep; - f_z += delz*frep; - - if (EVFLAG) { - if (eflag) - ev.evdwl += 0.5*eng; - if (vflag_either || eflag_atom) - this->template ev_tally(ev,i,j,eng,frep,delx,dely,delz); - } - } - - // attractive: bond order - - for (jj = 0; jj < jnum; jj++) { - j = d_neighbors_short(i,jj); - j &= NEIGHMASK; - jtype = type(j); - - delx1 = xtmp - x(j,0); - dely1 = ytmp - x(j,1); - delz1 = ztmp - x(j,2); - rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; - cutsq1 = paramskk(itype,jtype,jtype).cutsq; - - bo_ij = 0.0; - if (rsq1 > cutsq1) continue; - rij = sqrt(rsq1); - - for (kk = 0; kk < jnum; kk++) { - if (jj == kk) continue; - k = d_neighbors_short(i,kk); - k &= NEIGHMASK; - ktype = type(k); - - delx2 = xtmp - x(k,0); - dely2 = ytmp - x(k,1); - delz2 = ztmp - x(k,2); - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - cutsq2 = paramskk(itype,jtype,ktype).cutsq; - - if (rsq2 > cutsq2) continue; - rik = sqrt(rsq2); - bo_ij += bondorder(itype,jtype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); - } - - // attractive: pairwise potential and force - - const F_FLOAT fa = ters_fa_k(itype,jtype,jtype,rij); - const F_FLOAT dfa = ters_dfa(itype,jtype,jtype,rij); - const F_FLOAT bij = ters_bij_k(itype,jtype,jtype,bo_ij); - const F_FLOAT fatt = -0.5*bij * dfa / rij; - const F_FLOAT prefactor = 0.5*fa * ters_dbij(itype,jtype,jtype,bo_ij); - const F_FLOAT eng = 0.5*bij * fa; - - f_x += delx1*fatt; - f_y += dely1*fatt; - f_z += delz1*fatt; - - if (EVFLAG) { - if (eflag) ev.evdwl += 0.5*eng; - if (vflag_either || eflag_atom) - this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); - } - - // attractive: three-body force - - for (kk = 0; kk < jnum; kk++) { - if (jj == kk) continue; - k = d_neighbors_short(i,kk); - k &= NEIGHMASK; - ktype = type(k); - - delx2 = xtmp - x(k,0); - dely2 = ytmp - x(k,1); - delz2 = ztmp - x(k,2); - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - cutsq2 = paramskk(itype,jtype,ktype).cutsq; - - if (rsq2 > cutsq2) continue; - rik = sqrt(rsq2); - ters_dthb(itype,jtype,ktype,prefactor,rij,delx1,dely1,delz1, - rik,delx2,dely2,delz2,fi,fj,fk); - - f_x += fi[0]; - f_y += fi[1]; - f_z += fi[2]; - - if (vflag_either) { - F_FLOAT delrij[3], delrik[3]; - delrij[0] = -delx1; delrij[1] = -dely1; delrij[2] = -delz1; - delrik[0] = -delx2; delrik[1] = -dely2; delrik[2] = -delz2; - if (vflag_either) this->template v_tally3(ev,i,j,k,fj,fk,delrij,delrik); - } - } - } - f(i,0) += f_x; - f(i,1) += f_y; - f(i,2) += f_z; -} - -template -template -KOKKOS_INLINE_FUNCTION -void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullA, const int &ii) const { - EV_FLOAT ev; - this->template operator()(TagPairTersoffZBLComputeFullA(), ii, ev); -} - -/* ---------------------------------------------------------------------- */ - -template -template -KOKKOS_INLINE_FUNCTION -void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullB, const int &ii, EV_FLOAT& ev) const { - - const int i = d_ilist[ii]; - const X_FLOAT xtmp = x(i,0); - const X_FLOAT ytmp = x(i,1); - const X_FLOAT ztmp = x(i,2); - const int itype = type(i); - - int j,k,jj,kk,jtype,ktype,j_jnum; - F_FLOAT rsq1, cutsq1, rsq2, cutsq2, rij, rik, bo_ij; - F_FLOAT fj[3], fk[3]; - X_FLOAT delx1, dely1, delz1, delx2, dely2, delz2; - - const int jnum = d_numneigh_short[i]; - - F_FLOAT f_x = 0.0; - F_FLOAT f_y = 0.0; - F_FLOAT f_z = 0.0; - - // attractive: bond order - - for (jj = 0; jj < jnum; jj++) { - j = d_neighbors_short(i,jj); - j &= NEIGHMASK; - if (j >= nlocal) continue; - jtype = type(j); - - delx1 = x(j,0) - xtmp; - dely1 = x(j,1) - ytmp; - delz1 = x(j,2) - ztmp; - rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1; - cutsq1 = paramskk(jtype,itype,itype).cutsq; - - bo_ij = 0.0; - if (rsq1 > cutsq1) continue; - rij = sqrt(rsq1); - - j_jnum = d_numneigh_short[j]; - - for (kk = 0; kk < j_jnum; kk++) { - k = d_neighbors_short(j,kk); - if (k == i) continue; - k &= NEIGHMASK; - ktype = type(k); - - delx2 = x(j,0) - x(k,0); - dely2 = x(j,1) - x(k,1); - delz2 = x(j,2) - x(k,2); - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - cutsq2 = paramskk(jtype,itype,ktype).cutsq; - - if (rsq2 > cutsq2) continue; - rik = sqrt(rsq2); - bo_ij += bondorder(jtype,itype,ktype,rij,delx1,dely1,delz1,rik,delx2,dely2,delz2); - - } - - // attractive: pairwise potential and force - - const F_FLOAT fa = ters_fa_k(jtype,itype,itype,rij); - const F_FLOAT dfa = ters_dfa(jtype,itype,itype,rij); - const F_FLOAT bij = ters_bij_k(jtype,itype,itype,bo_ij); - const F_FLOAT fatt = -0.5*bij * dfa / rij; - const F_FLOAT prefactor = 0.5*fa * ters_dbij(jtype,itype,itype,bo_ij); - const F_FLOAT eng = 0.5*bij * fa; - - f_x -= delx1*fatt; - f_y -= dely1*fatt; - f_z -= delz1*fatt; - - if (EVFLAG) { - if (eflag) - ev.evdwl += 0.5 * eng; - if (vflag_either || eflag_atom) - this->template ev_tally(ev,i,j,eng,fatt,delx1,dely1,delz1); - } - - // attractive: three-body force - - for (kk = 0; kk < j_jnum; kk++) { - k = d_neighbors_short(j,kk); - if (k == i) continue; - k &= NEIGHMASK; - ktype = type(k); - - delx2 = x(j,0) - x(k,0); - dely2 = x(j,1) - x(k,1); - delz2 = x(j,2) - x(k,2); - rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2; - cutsq2 = paramskk(jtype,itype,ktype).cutsq; - - if (rsq2 > cutsq2) continue; - rik = sqrt(rsq2); - ters_dthbj(jtype,itype,ktype,prefactor,rij,delx1,dely1,delz1, - rik,delx2,dely2,delz2,fj,fk); - f_x += fj[0]; - f_y += fj[1]; - f_z += fj[2]; - - if (vflag_either) { - F_FLOAT delrji[3], delrjk[3]; - delrji[0] = -delx1; delrji[1] = -dely1; delrji[2] = -delz1; - delrjk[0] = -delx2; delrjk[1] = -dely2; delrjk[2] = -delz2; - if (vflag_either) v_tally3_atom(ev,i,j,k,fj,fk,delrji,delrjk); - } - - const F_FLOAT fa_jk = ters_fa_k(jtype,ktype,itype,rik); - const F_FLOAT prefactor_jk = 0.5*fa_jk * ters_dbij(jtype,ktype,itype,bo_ij); - ters_dthbk(jtype,ktype,itype,prefactor_jk,rik,delx2,dely2,delz2, - rij,delx1,dely1,delz1,fk); - f_x += fk[0]; - f_y += fk[1]; - f_z += fk[2]; - } - } - f(i,0) += f_x; - f(i,1) += f_y; - f(i,2) += f_z; -} - -template -template -KOKKOS_INLINE_FUNCTION -void PairTersoffZBLKokkos::operator()(TagPairTersoffZBLComputeFullB, const int &ii) const { - EV_FLOAT ev; - this->template operator()(TagPairTersoffZBLComputeFullB(), ii, ev); + this->template operator()(TagPairTersoffZBLCompute(), ii, ev); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::ters_fc_k(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffZBLKokkos::ters_fc_k(const Param& param, const F_FLOAT &r) const { - const F_FLOAT ters_R = paramskk(i,j,k).bigr; - const F_FLOAT ters_D = paramskk(i,j,k).bigd; + const F_FLOAT ters_R = param.bigr; + const F_FLOAT ters_D = param.bigd; if (r < ters_R-ters_D) return 1.0; if (r > ters_R+ters_D) return 0.0; @@ -876,11 +549,10 @@ double PairTersoffZBLKokkos::ters_fc_k(const int &i, const int &j, template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::ters_dfc(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffZBLKokkos::ters_dfc(const Param& param, const F_FLOAT &r) const { - const F_FLOAT ters_R = paramskk(i,j,k).bigr; - const F_FLOAT ters_D = paramskk(i,j,k).bigd; + const F_FLOAT ters_R = param.bigr; + const F_FLOAT ters_D = param.bigd; if (r < ters_R-ters_D) return 0.0; if (r > ters_R+ters_D) return 0.0; @@ -891,7 +563,7 @@ double PairTersoffZBLKokkos::ters_dfc(const int &i, const int &j, template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::bondorder(const int &i, const int &j, const int &k, +double PairTersoffZBLKokkos::bondorder(const Param& param, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const { @@ -899,14 +571,15 @@ double PairTersoffZBLKokkos::bondorder(const int &i, const int &j, c const F_FLOAT costheta = (dx1*dx2 + dy1*dy2 + dz1*dz2)/(rij*rik); - if (int(paramskk(i,j,k).powerm) == 3) arg = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); - else arg = paramskk(i,j,k).lam3 * (rij-rik); + const F_FLOAT paramtmp = param.lam3 * (rij-rik); + if (int(param.powerm) == 3) arg = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); + else arg = paramtmp; if (arg > 69.0776) ex_delr = 1.e30; else if (arg < -69.0776) ex_delr = 0.0; else ex_delr = exp(arg); - return ters_fc_k(i,j,k,rik) * ters_gijk(i,j,k,costheta) * ex_delr; + return ters_fc_k(param,rik) * ters_gijk(param,costheta) * ex_delr; } /* ---------------------------------------------------------------------- */ @@ -914,13 +587,13 @@ double PairTersoffZBLKokkos::bondorder(const int &i, const int &j, c template KOKKOS_INLINE_FUNCTION double PairTersoffZBLKokkos:: - ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const + ters_gijk(const Param& param, const F_FLOAT &cos) const { - const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c; - const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d; - const F_FLOAT hcth = paramskk(i,j,k).h - cos; + const F_FLOAT ters_c = param.c * param.c; + const F_FLOAT ters_d = param.d * param.d; + const F_FLOAT hcth = param.h - cos; - return paramskk(i,j,k).gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth)); + return param.gamma*(1.0 + ters_c/ters_d - ters_c/(ters_d+hcth*hcth)); } /* ---------------------------------------------------------------------- */ @@ -928,81 +601,77 @@ double PairTersoffZBLKokkos:: template KOKKOS_INLINE_FUNCTION double PairTersoffZBLKokkos:: - ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const + ters_dgijk(const Param& param, const F_FLOAT &cos) const { - - const F_FLOAT ters_c = paramskk(i,j,k).c * paramskk(i,j,k).c; - const F_FLOAT ters_d = paramskk(i,j,k).d * paramskk(i,j,k).d; - const F_FLOAT hcth = paramskk(i,j,k).h - cos; + const F_FLOAT ters_c = param.c * param.c; + const F_FLOAT ters_d = param.d * param.d; + const F_FLOAT hcth = param.h - cos; const F_FLOAT numerator = -2.0 * ters_c * hcth; const F_FLOAT denominator = 1.0/(ters_d + hcth*hcth); - return paramskk(i,j,k).gamma * numerator * denominator * denominator; + return param.gamma * numerator * denominator * denominator; } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::ters_fa_k(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffZBLKokkos::ters_fa_k(const Param& param, const F_FLOAT &r) const { - if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; - return -paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) - * ters_fc_k(i,j,k,r) * fermi_k(i,j,k,r); + if (r > param.bigr + param.bigd) return 0.0; + return -param.bigb * exp(-param.lam2 * r) + * ters_fc_k(param,r) * fermi_k(param,r); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::ters_dfa(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffZBLKokkos::ters_dfa(const Param& param, const F_FLOAT &r) const { - if (r > paramskk(i,j,k).bigr + paramskk(i,j,k).bigd) return 0.0; - return paramskk(i,j,k).bigb * exp(-paramskk(i,j,k).lam2 * r) * - (paramskk(i,j,k).lam2 * ters_fc_k(i,j,k,r) * fermi_k(i,j,k,r) - - ters_dfc(i,j,k,r) * fermi_k(i,j,k,r) - ters_fc_k(i,j,k,r) * - fermi_d_k(i,j,k,r)); + if (r > param.bigr + param.bigd) return 0.0; + return param.bigb * exp(-param.lam2 * r) * + (param.lam2 * ters_fc_k(param,r) * fermi_k(param,r) - + ters_dfc(param,r) * fermi_k(param,r) - ters_fc_k(param,r) * + fermi_d_k(param,r)); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::ters_bij_k(const int &i, const int &j, - const int &k, const F_FLOAT &bo) const +double PairTersoffZBLKokkos::ters_bij_k(const Param& param, const F_FLOAT &bo) const { - const F_FLOAT tmp = paramskk(i,j,k).beta * bo; - if (tmp > paramskk(i,j,k).c1) return 1.0/sqrt(tmp); - if (tmp > paramskk(i,j,k).c2) - return (1.0 - pow(tmp,-paramskk(i,j,k).powern) / (2.0*paramskk(i,j,k).powern))/sqrt(tmp); - if (tmp < paramskk(i,j,k).c4) return 1.0; - if (tmp < paramskk(i,j,k).c3) - return 1.0 - pow(tmp,paramskk(i,j,k).powern)/(2.0*paramskk(i,j,k).powern); - return pow(1.0 + pow(tmp,paramskk(i,j,k).powern), -1.0/(2.0*paramskk(i,j,k).powern)); + const F_FLOAT tmp = param.beta * bo; + if (tmp > param.c1) return 1.0/sqrt(tmp); + if (tmp > param.c2) + return (1.0 - pow(tmp,-param.powern) / (2.0*param.powern))/sqrt(tmp); + if (tmp < param.c4) return 1.0; + if (tmp < param.c3) + return 1.0 - pow(tmp,param.powern)/(2.0*param.powern); + return pow(1.0 + pow(tmp,param.powern), -1.0/(2.0*param.powern)); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::ters_dbij(const int &i, const int &j, - const int &k, const F_FLOAT &bo) const +double PairTersoffZBLKokkos::ters_dbij(const Param& param, const F_FLOAT &bo) const { - const F_FLOAT tmp = paramskk(i,j,k).beta * bo; - if (tmp > paramskk(i,j,k).c1) return paramskk(i,j,k).beta * -0.5*pow(tmp,-1.5); - if (tmp > paramskk(i,j,k).c2) - return paramskk(i,j,k).beta * (-0.5*pow(tmp,-1.5) * + const F_FLOAT tmp = param.beta * bo; + const F_FLOAT factor = -0.5/sqrt(tmp*tmp*tmp); //pow(tmp,-1.5) + if (tmp > param.c1) return param.beta * factor; + if (tmp > param.c2) + return param.beta * (factor * // error in negligible 2nd term fixed 2/21/2022 - //(1.0 - 0.5*(1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * - (1.0 - (1.0 + 1.0/(2.0*paramskk(i,j,k).powern)) * - pow(tmp,-paramskk(i,j,k).powern))); - if (tmp < paramskk(i,j,k).c4) return 0.0; - if (tmp < paramskk(i,j,k).c3) - return -0.5*paramskk(i,j,k).beta * pow(tmp,paramskk(i,j,k).powern-1.0); + // (1.0 - 0.5*(1.0 + 1.0/(2.0*param.powern)) * + (1.0 - (1.0 + 1.0/(2.0*param.powern)) * + pow(tmp,-param.powern))); + if (tmp < param.c4) return 0.0; + if (tmp < param.c3) + return -0.5*param.beta * pow(tmp,param.powern-1.0); - const F_FLOAT tmp_n = pow(tmp,paramskk(i,j,k).powern); - return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*paramskk(i,j,k).powern)))*tmp_n / bo; + const F_FLOAT tmp_n = pow(tmp,param.powern); + return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*param.powern)))*tmp_n / bo; } /* ---------------------------------------------------------------------- */ @@ -1010,7 +679,7 @@ double PairTersoffZBLKokkos::ters_dbij(const int &i, const int &j, template KOKKOS_INLINE_FUNCTION void PairTersoffZBLKokkos::ters_dthb( - const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const Param& param, const F_FLOAT &prefactor, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const @@ -1035,22 +704,23 @@ void PairTersoffZBLKokkos::ters_dthb( F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; - fc = ters_fc_k(i,j,k,rik); - dfc = ters_dfc(i,j,k,rik); - if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); - else tmp = paramskk(i,j,k).lam3 * (rij-rik); + fc = ters_fc_k(param,rik); + dfc = ters_dfc(param,rik); + + const F_FLOAT paramtmp = param.lam3 * (rij-rik); + if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); if (tmp > 69.0776) ex_delr = 1.e30; else if (tmp < -69.0776) ex_delr = 0.0; else ex_delr = exp(tmp); - if (int(paramskk(i,j,k).powerm) == 3) - dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; - else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + if (int(param.powerm) == 3) + dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(rij-rik,2.0)*ex_delr; + else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); - gijk = ters_gijk(i,j,k,cos); - dgijk = ters_dgijk(i,j,k,cos); + gijk = ters_gijk(param,cos); + dgijk = ters_dgijk(param,cos); // from PairTersoffZBL::costheta_d vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); @@ -1082,7 +752,7 @@ void PairTersoffZBLKokkos::ters_dthb( template KOKKOS_INLINE_FUNCTION void PairTersoffZBLKokkos::ters_dthbj( - const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const Param& param, const F_FLOAT &prefactor, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, F_FLOAT *fj, F_FLOAT *fk) const @@ -1103,22 +773,23 @@ void PairTersoffZBLKokkos::ters_dthbj( F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; - fc = ters_fc_k(i,j,k,rik); - dfc = ters_dfc(i,j,k,rik); - if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); - else tmp = paramskk(i,j,k).lam3 * (rij-rik); + fc = ters_fc_k(param,rik); + dfc = ters_dfc(param,rik); + const F_FLOAT paramtmp = param.lam3 * (rij-rik); + if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); + else tmp = param.lam3 * (rij-rik); if (tmp > 69.0776) ex_delr = 1.e30; else if (tmp < -69.0776) ex_delr = 0.0; else ex_delr = exp(tmp); - if (int(paramskk(i,j,k).powerm) == 3) - dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; - else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + if (int(param.powerm) == 3) + dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(param.lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); - gijk = ters_gijk(i,j,k,cos); - dgijk = ters_dgijk(i,j,k,cos); + gijk = ters_gijk(param,cos); + dgijk = ters_dgijk(param,cos); vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); vec3_scale(rijinv,dcosfj,dcosfj); @@ -1143,7 +814,7 @@ void PairTersoffZBLKokkos::ters_dthbj( template KOKKOS_INLINE_FUNCTION void PairTersoffZBLKokkos::ters_dthbk( - const int &i, const int &j, const int &k, const F_FLOAT &prefactor, + const Param& param, const F_FLOAT &prefactor, const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, F_FLOAT *fk) const @@ -1164,22 +835,22 @@ void PairTersoffZBLKokkos::ters_dthbk( F_FLOAT gijk,dgijk,ex_delr,dex_delr,fc,dfc,cos,tmp; F_FLOAT dcosfi[3],dcosfj[3],dcosfk[3]; - fc = ters_fc_k(i,j,k,rik); - dfc = ters_dfc(i,j,k,rik); - if (int(paramskk(i,j,k).powerm) == 3) tmp = pow(paramskk(i,j,k).lam3 * (rij-rik),3.0); - else tmp = paramskk(i,j,k).lam3 * (rij-rik); + fc = ters_fc_k(param,rik); + dfc = ters_dfc(param,rik); + const F_FLOAT paramtmp = param.lam3 * (rij-rik); + if (int(param.powerm) == 3) tmp = paramtmp*paramtmp*paramtmp;//pow(param.lam3 * (rij-rik),3.0); if (tmp > 69.0776) ex_delr = 1.e30; else if (tmp < -69.0776) ex_delr = 0.0; else ex_delr = exp(tmp); - if (int(paramskk(i,j,k).powerm) == 3) - dex_delr = 3.0*pow(paramskk(i,j,k).lam3,3.0) * pow(rij-rik,2.0)*ex_delr; - else dex_delr = paramskk(i,j,k).lam3 * ex_delr; + if (int(param.powerm) == 3) + dex_delr = 3.0*paramtmp*paramtmp*param.lam3*ex_delr;//pow(param.lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else dex_delr = param.lam3 * ex_delr; cos = vec3_dot(rij_hat,rik_hat); - gijk = ters_gijk(i,j,k,cos); - dgijk = ters_dgijk(i,j,k,cos); + gijk = ters_gijk(param,cos); + dgijk = ters_dgijk(param,cos); vec3_scaleadd(-cos,rij_hat,rik_hat,dcosfj); vec3_scale(rijinv,dcosfj,dcosfj); @@ -1199,24 +870,22 @@ void PairTersoffZBLKokkos::ters_dthbk( template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::fermi_k(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffZBLKokkos::fermi_k(const Param& param, const F_FLOAT &r) const { - return 1.0 / (1.0 + exp(-paramskk(i,j,k).ZBLexpscale * - (r - paramskk(i,j,k).ZBLcut))); + return 1.0 / (1.0 + exp(-param.ZBLexpscale * + (r - param.ZBLcut))); } /* ---------------------------------------------------------------------- */ template KOKKOS_INLINE_FUNCTION -double PairTersoffZBLKokkos::fermi_d_k(const int &i, const int &j, - const int &k, const F_FLOAT &r) const +double PairTersoffZBLKokkos::fermi_d_k(const Param& param, const F_FLOAT &r) const { - return paramskk(i,j,k).ZBLexpscale * exp(-paramskk(i,j,k).ZBLexpscale * - (r - paramskk(i,j,k).ZBLcut)) / - pow(1.0 + exp(-paramskk(i,j,k).ZBLexpscale * - (r - paramskk(i,j,k).ZBLcut)),2.0); + return param.ZBLexpscale * exp(-param.ZBLexpscale * + (r - param.ZBLcut)) / + pow(1.0 + exp(-param.ZBLexpscale * + (r - param.ZBLcut)),2.0); } /* ---------------------------------------------------------------------- */ @@ -1228,8 +897,6 @@ void PairTersoffZBLKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const { - const int VFLAG = vflag_either; - // The eatom and vatom arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial auto v_eatom = ScatterViewHelper,decltype(dup_eatom),decltype(ndup_eatom)>::get(dup_eatom,ndup_eatom); @@ -1241,10 +908,10 @@ void PairTersoffZBLKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons if (eflag_atom) { const E_FLOAT epairhalf = 0.5 * epair; a_eatom[i] += epairhalf; - if (NEIGHFLAG != FULL) a_eatom[j] += epairhalf; + a_eatom[j] += epairhalf; } - if (VFLAG) { + if (vflag_either) { const E_FLOAT v0 = delx*delx*fpair; const E_FLOAT v1 = dely*dely*fpair; const E_FLOAT v2 = delz*delz*fpair; @@ -1253,21 +920,12 @@ void PairTersoffZBLKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons const E_FLOAT v5 = dely*delz*fpair; if (vflag_global) { - if (NEIGHFLAG != FULL) { - ev.v[0] += v0; - ev.v[1] += v1; - ev.v[2] += v2; - ev.v[3] += v3; - ev.v[4] += v4; - ev.v[5] += v5; - } else { - ev.v[0] += 0.5*v0; - ev.v[1] += 0.5*v1; - ev.v[2] += 0.5*v2; - ev.v[3] += 0.5*v3; - ev.v[4] += 0.5*v4; - ev.v[5] += 0.5*v5; - } + ev.v[0] += v0; + ev.v[1] += v1; + ev.v[2] += v2; + ev.v[3] += v3; + ev.v[4] += v4; + ev.v[5] += v5; } if (vflag_atom) { @@ -1278,14 +936,12 @@ void PairTersoffZBLKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons a_vatom(i,4) += 0.5*v4; a_vatom(i,5) += 0.5*v5; - if (NEIGHFLAG != FULL) { - a_vatom(j,0) += 0.5*v0; - a_vatom(j,1) += 0.5*v1; - a_vatom(j,2) += 0.5*v2; - a_vatom(j,3) += 0.5*v3; - a_vatom(j,4) += 0.5*v4; - a_vatom(j,5) += 0.5*v5; - } + a_vatom(j,0) += 0.5*v0; + a_vatom(j,1) += 0.5*v1; + a_vatom(j,2) += 0.5*v2; + a_vatom(j,3) += 0.5*v3; + a_vatom(j,4) += 0.5*v4; + a_vatom(j,5) += 0.5*v5; } } } @@ -1295,7 +951,8 @@ void PairTersoffZBLKokkos::ev_tally(EV_FLOAT &ev, const int &i, cons template template KOKKOS_INLINE_FUNCTION -void PairTersoffZBLKokkos::v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, +void PairTersoffZBLKokkos::v_tally3(EV_FLOAT &ev, + const int &i, const int &j, const int &k, F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const { // The vatom array is duplicated for OpenMP, atomic for CUDA, and neither for Serial @@ -1331,14 +988,13 @@ void PairTersoffZBLKokkos::v_tally3(EV_FLOAT &ev, const int &i, cons a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2]; a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5]; - if (NEIGHFLAG != FULL) { - a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2]; - a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5]; - a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2]; - a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5]; - } - } + a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2]; + a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5]; + + a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2]; + a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5]; + } } /* ---------------------------------------------------------------------- */ @@ -1387,4 +1043,3 @@ template class PairTersoffZBLKokkos; template class PairTersoffZBLKokkos; #endif } - diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.h b/src/KOKKOS/pair_tersoff_zbl_kokkos.h index 2cad44e200..bf4ddb642c 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.h +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.h @@ -30,20 +30,14 @@ PairStyle(tersoff/zbl/kk/host,PairTersoffZBLKokkos); namespace LAMMPS_NS { template -struct TagPairTersoffZBLComputeHalf{}; - -template -struct TagPairTersoffZBLComputeFullA{}; - -template -struct TagPairTersoffZBLComputeFullB{}; +struct TagPairTersoffZBLCompute{}; struct TagPairTersoffZBLComputeShortNeigh{}; template class PairTersoffZBLKokkos : public PairTersoffZBL { public: - enum {EnabledNeighFlags=FULL}; + enum {EnabledNeighFlags=HALF|HALFTHREAD}; enum {COUL_FLAG=0}; typedef DeviceType device_type; typedef ArrayTypes AT; @@ -52,80 +46,66 @@ class PairTersoffZBLKokkos : public PairTersoffZBL { PairTersoffZBLKokkos(class LAMMPS *); ~PairTersoffZBLKokkos() override; void compute(int, int) override; + void coeff(int, char **) override; void init_style() override; template KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffZBLComputeHalf, const int&, EV_FLOAT&) const; + void operator()(TagPairTersoffZBLCompute, const int&, EV_FLOAT&) const; template KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffZBLComputeHalf, const int&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffZBLComputeFullA, const int&, EV_FLOAT&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffZBLComputeFullA, const int&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffZBLComputeFullB, const int&, EV_FLOAT&) const; - - template - KOKKOS_INLINE_FUNCTION - void operator()(TagPairTersoffZBLComputeFullB, const int&) const; + void operator()(TagPairTersoffZBLCompute, const int&) const; KOKKOS_INLINE_FUNCTION void operator()(TagPairTersoffZBLComputeShortNeigh, const int&) const; - KOKKOS_INLINE_FUNCTION - double ters_fc_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_dfc(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double ters_fc_k(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_fa_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double ters_dfc(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_dfa(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double ters_fa_k(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_bij_k(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + double ters_dfa(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double ters_dbij(const int &i, const int &j, const int &k, const F_FLOAT &bo) const; + double ters_bij_k(const Param& param, const F_FLOAT &bo) const; KOKKOS_INLINE_FUNCTION - double bondorder(const int &i, const int &j, const int &k, - const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, - const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const; + double ters_dbij(const Param& param, const F_FLOAT &bo) const; KOKKOS_INLINE_FUNCTION - double ters_gijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + double bondorder(const Param& param, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2) const; KOKKOS_INLINE_FUNCTION - double ters_dgijk(const int &i, const int &j, const int &k, const F_FLOAT &cos) const; + double ters_gijk(const Param& param, const F_FLOAT &cos) const; KOKKOS_INLINE_FUNCTION - void ters_dthb(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, - const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, - const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, - F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const; + double ters_dgijk(const Param& param, const F_FLOAT &cos) const; KOKKOS_INLINE_FUNCTION - void ters_dthbj(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, - const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, - const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, - F_FLOAT *fj, F_FLOAT *fk) const; + void ters_dthb(const Param& param, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fi, F_FLOAT *fj, F_FLOAT *fk) const; KOKKOS_INLINE_FUNCTION - void ters_dthbk(const int &i, const int &j, const int &k, const F_FLOAT &prefactor, - const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, - const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, - F_FLOAT *fk) const; + void ters_dthbj(const Param& param, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fj, F_FLOAT *fk) const; + + KOKKOS_INLINE_FUNCTION + void ters_dthbk(const Param& param, const F_FLOAT &prefactor, + const F_FLOAT &rij, const F_FLOAT &dx1, const F_FLOAT &dy1, const F_FLOAT &dz1, + const F_FLOAT &rik, const F_FLOAT &dx2, const F_FLOAT &dy2, const F_FLOAT &dz2, + F_FLOAT *fk) const; KOKKOS_INLINE_FUNCTION double vec3_dot(const F_FLOAT x[3], const double y[3]) const { @@ -150,17 +130,6 @@ class PairTersoffZBLKokkos : public PairTersoffZBL { KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const; - struct params_ters { - KOKKOS_INLINE_FUNCTION - params_ters() {powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0; - bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;Z_i=0;Z_j=0;ZBLcut=0;ZBLexpscale=0;}; - KOKKOS_INLINE_FUNCTION - params_ters(int /*i*/) {powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0; - bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;Z_i=0;Z_j=0;ZBLcut=0;ZBLexpscale=0;}; - F_FLOAT powerm, gamma, lam3, c, d, h, powern, beta, lam2, bigb, bigr, - bigd, lam1, biga, cutsq, c1, c2, c3, c4, Z_i, Z_j, ZBLcut, ZBLexpscale; - }; - template KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, @@ -170,28 +139,33 @@ class PairTersoffZBLKokkos : public PairTersoffZBL { template KOKKOS_INLINE_FUNCTION void v_tally3(EV_FLOAT &ev, const int &i, const int &j, const int &k, - F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const; + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drij, F_FLOAT *drik) const; KOKKOS_INLINE_FUNCTION void v_tally3_atom(EV_FLOAT &ev, const int &i, const int &j, const int &k, - F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const; + F_FLOAT *fj, F_FLOAT *fk, F_FLOAT *drji, F_FLOAT *drjk) const; - void allocate() override; void setup_params() override; KOKKOS_INLINE_FUNCTION - double fermi_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double fermi_k(const Param& param, const F_FLOAT &r) const; KOKKOS_INLINE_FUNCTION - double fermi_d_k(const int &i, const int &j, const int &k, const F_FLOAT &r) const; + double fermi_d_k(const Param& param, const F_FLOAT &r) const; protected: typedef Kokkos::DualView tdual_int_3d; - Kokkos::DualView k_params; - typename Kokkos::DualView::t_dev_const_um paramskk; - // hardwired to space for 12 atom types - //params_ters m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typedef typename tdual_int_3d::t_dev_const_randomread t_int_3d_randomread; + typedef typename tdual_int_3d::t_host t_host_int_3d; + + t_int_3d_randomread d_elem3param; + typename AT::t_int_1d_randomread d_map; + + typedef Kokkos::DualView tdual_param_1d; + typedef typename tdual_param_1d::t_dev t_param_1d; + typedef typename tdual_param_1d::t_host t_host_param_1d; + + t_param_1d d_params; int inum; typename AT::t_x_array_randomread x; @@ -238,7 +212,7 @@ class PairTersoffZBLKokkos : public PairTersoffZBL { Kokkos::View d_numneigh_short; // ZBL - F_FLOAT global_a_0; // Bohr radius for Coulomb repulsion + F_FLOAT global_a_0; // Bohr radius for Coulomb repulsion F_FLOAT global_epsilon_0; // permittivity of vacuum for Coulomb repulsion F_FLOAT global_e; // proton charge (negative of electron charge)