Added const to each of the snaKK routines; removed extra snaKK copies

This commit is contained in:
Evan Weinberg
2024-11-19 10:24:27 -08:00
parent 58d70366c2
commit c9754e5fd3
3 changed files with 127 additions and 155 deletions

View File

@ -624,7 +624,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int i = d_ilist[ii + chunk_offset];
const int itype = type[i];
const int ielem = d_map[itype];
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
@ -633,18 +632,18 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
}
if (quadraticflag) {
const auto idxb_max = my_sna.idxb_max;
const auto idxb_max = snaKK.idxb_max;
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
const auto idxb = icoeff % idxb_max;
const auto idx_chem = icoeff / idxb_max;
real_type bveci = my_sna.blist(ii, idx_chem, idxb);
real_type bveci = snaKK.blist(ii, idx_chem, idxb);
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bveci;
k++;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
const auto jdxb = jcoeff % idxb_max;
const auto jdx_chem = jcoeff / idxb_max;
real_type bvecj = my_sna.blist(ii, jdx_chem, jdxb);
real_type bvecj = snaKK.blist(ii, jdx_chem, jdxb);
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj;
d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci;
k++;
@ -657,8 +656,6 @@ template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeigh>::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// extract atom number
int ii = team.team_rank() + team.league_rank() * team.team_size();
if (ii >= chunk_size) return;
@ -725,20 +722,20 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const F_FLOAT dy = x(j,1) - ytmp;
const F_FLOAT dz = x(j,2) - ztmp;
const int jelem = d_map[jtype];
my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[jelem])*rcutfac);
my_sna.inside(ii,offset) = j;
snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
snaKK.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[jelem])*rcutfac);
snaKK.inside(ii,offset) = j;
if (switchinnerflag) {
my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
}
if (chemflag)
my_sna.element(ii,offset) = jelem;
snaKK.element(ii,offset) = jelem;
else
my_sna.element(ii,offset) = 0;
snaKK.element(ii,offset) = 0;
}
offset++;
}
@ -748,7 +745,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int ii = iatom_mod + iatom_div * vector_length;
if (ii >= chunk_size) return;
@ -756,13 +752,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int ninside = d_ninside(ii);
if (jnbor >= ninside) return;
my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div);
snaKK.compute_cayley_klein(iatom_mod,jnbor,iatom_div);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int ii = iatom_mod + iatom_div * vector_length;
if (ii >= chunk_size) return;
@ -770,13 +765,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
int itype = type(ii);
int ielem = d_map[itype];
my_sna.pre_ui(iatom_mod, j, ielem, iatom_div);
snaKK.pre_ui(iatom_mod, j, ielem, iatom_div);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiSmall,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiSmall>::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// extract flattened atom_div / neighbor number / bend location
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui;
@ -795,7 +789,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
my_sna.compute_ui_small(team, iatom_mod, jbend, jj, iatom_div);
snaKK.compute_ui_small(team, iatom_mod, jbend, jj, iatom_div);
});
}
@ -803,7 +797,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiLarge,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiLarge>::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// extract flattened atom_div / neighbor number / bend location
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui;
@ -820,7 +813,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
my_sna.compute_ui_large(team,iatom_mod, jj, iatom_div);
snaKK.compute_ui_large(team,iatom_mod, jj, iatom_div);
});
}
@ -829,21 +822,20 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return;
if (idxu > my_sna.idxu_max) return;
if (idxu > snaKK.idxu_max) return;
int elem_count = chemflag ? nelements : 1;
for (int ielem = 0; ielem < elem_count; ielem++) {
const FullHalfMapper mapper = my_sna.idxu_full_half[idxu];
const FullHalfMapper mapper = snaKK.idxu_full_half[idxu];
auto utot_re = my_sna.ulisttot_re_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div);
auto utot_im = my_sna.ulisttot_im_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div);
auto utot_re = snaKK.ulisttot_re_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div);
auto utot_im = snaKK.ulisttot_im_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div);
if (mapper.flip_sign == 1) {
utot_im = -utot_im;
@ -851,11 +843,11 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
utot_re = -utot_re;
}
my_sna.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im };
snaKK.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im };
if (mapper.flip_sign == 0) {
my_sna.ylist_pack_re(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.;
my_sna.ylist_pack_im(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.;
snaKK.ylist_pack_re(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.;
snaKK.ylist_pack_im(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.;
}
}
}
@ -863,72 +855,67 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return;
if (jjz >= my_sna.idxz_max) return;
if (jjz >= snaKK.idxz_max) return;
my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
snaKK.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYiWithZlist,const int iatom_mod, const int jjz, const int iatom_div) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return;
if (jjz >= my_sna.idxz_max) return;
if (jjz >= snaKK.idxz_max) return;
my_sna.compute_yi_with_zlist(iatom_mod,jjz,iatom_div,d_beta_pack);
snaKK.compute_yi_with_zlist(iatom_mod,jjz,iatom_div,d_beta_pack);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return;
if (jjz >= my_sna.idxz_max) return;
if (jjz >= snaKK.idxz_max) return;
my_sna.compute_zi(iatom_mod,jjz,iatom_div);
snaKK.compute_zi(iatom_mod,jjz,iatom_div);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return;
if (jjb >= my_sna.idxb_max) return;
if (jjb >= snaKK.idxb_max) return;
my_sna.compute_bi(iatom_mod,jjb,iatom_div);
snaKK.compute_bi(iatom_mod,jjb,iatom_div);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int iatom = iatom_mod + iatom_div * vector_length;
if (iatom >= chunk_size) return;
if (idxb >= my_sna.idxb_max) return;
if (idxb >= snaKK.idxb_max) return;
const int ntriples = my_sna.ntriples;
const int ntriples = snaKK.ntriples;
for (int itriple = 0; itriple < ntriples; itriple++) {
const real_type blocal = my_sna.blist_pack(iatom_mod, idxb, itriple, iatom_div);
const real_type blocal = snaKK.blist_pack(iatom_mod, idxb, itriple, iatom_div);
my_sna.blist(iatom, itriple, idxb) = blocal;
snaKK.blist(iatom, itriple, idxb) = blocal;
}
}
@ -937,7 +924,6 @@ template<class DeviceType, typename real_type, int vector_length>
template<int dir>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeFusedDeidrjSmall<dir>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrjSmall<dir> >::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// extract flattened atom_div / neighbor number / bend location
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj;
@ -956,7 +942,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
my_sna.template compute_fused_deidrj_small<dir>(team, iatom_mod, jbend, jj, iatom_div);
snaKK.template compute_fused_deidrj_small<dir>(team, iatom_mod, jbend, jj, iatom_div);
});
@ -966,7 +952,6 @@ template<class DeviceType, typename real_type, int vector_length>
template<int dir>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeFusedDeidrjLarge<dir>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrjLarge<dir> >::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// extract flattened atom_div / neighbor number / bend location
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj;
@ -983,7 +968,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
my_sna.template compute_fused_deidrj_large<dir>(team, iatom_mod, jj, iatom_div);
snaKK.template compute_fused_deidrj_large<dir>(team, iatom_mod, jj, iatom_div);
});
}
@ -1003,7 +988,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int i = d_ilist[ii + chunk_offset];
const int itype = type[i];
const int ielem = d_map[itype];
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
@ -1011,18 +995,18 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
d_beta(icoeff,ii) = d_coeffi[icoeff+1];
if (quadraticflag) {
const auto idxb_max = my_sna.idxb_max;
const auto idxb_max = snaKK.idxb_max;
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
const auto idxb = icoeff % idxb_max;
const auto idx_chem = icoeff / idxb_max;
real_type bveci = my_sna.blist(ii,idx_chem,idxb);
real_type bveci = snaKK.blist(ii,idx_chem,idxb);
d_beta(icoeff,ii) += d_coeffi[k]*bveci;
k++;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
const auto jdxb = jcoeff % idxb_max;
const auto jdx_chem = jcoeff / idxb_max;
real_type bvecj = my_sna.blist(ii,jdx_chem,jdxb);
real_type bvecj = snaKK.blist(ii,jdx_chem,jdxb);
d_beta(icoeff,ii) += d_coeffi[k]*bvecj;
d_beta(jcoeff,ii) += d_coeffi[k]*bveci;
k++;
@ -1035,10 +1019,8 @@ template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeighCPU>::member_type& team) const {
int ii = team.league_rank();
const int i = d_ilist[ii + chunk_offset];
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const double xtmp = x(i,0);
const double ytmp = x(i,1);
const double ztmp = x(i,2);
@ -1088,20 +1070,20 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
if (rsq < rnd_cutsq(itype,jtype)) {
if (final) {
my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[jelem])*rcutfac);
my_sna.inside(ii,offset) = j;
snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
snaKK.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[jelem])*rcutfac);
snaKK.inside(ii,offset) = j;
if (switchinnerflag) {
my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
}
if (chemflag)
my_sna.element(ii,offset) = jelem;
snaKK.element(ii,offset) = jelem;
else
my_sna.element(ii,offset) = 0;
snaKK.element(ii,offset) = 0;
}
offset++;
}
@ -1112,7 +1094,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// Extract the atom number
const int ii = team.team_rank() + team.team_size() * team.league_rank();
@ -1120,7 +1101,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
int itype = type(ii);
int ielem = d_map[itype];
my_sna.pre_ui_cpu(team,ii,ielem);
snaKK.pre_ui_cpu(team,ii,ielem);
}
@ -1128,7 +1109,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// Extract the atom number
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
@ -1139,13 +1119,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
my_sna.compute_ui_cpu(team,ii,jj);
snaKK.compute_ui_cpu(team,ii,jj);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
if (iatom >= chunk_size) return;
@ -1156,8 +1135,8 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
// De-symmetrize ulisttot
for (int ielem = 0; ielem < elem_count; ielem++) {
const int jju_half = my_sna.idxu_half_block(j);
const int jju = my_sna.idxu_block(j);
const int jju_half = snaKK.idxu_half_block(j);
const int jju = snaKK.idxu_block(j);
for (int mb = 0; 2*mb <= j; mb++) {
for (int ma = 0; ma <= j; ma++) {
@ -1168,13 +1147,13 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int idxu = jju + idxu_shift;
// Load ulist
auto utot = my_sna.ulisttot(idxu_half, ielem, iatom);
auto utot = snaKK.ulisttot(idxu_half, ielem, iatom);
// Store
my_sna.ulisttot_full(idxu, ielem, iatom) = utot;
snaKK.ulisttot_full(idxu, ielem, iatom) = utot;
// Zero Yi
my_sna.ylist(idxu_half, ielem, iatom) = {0., 0.};
snaKK.ylist(idxu_half, ielem, iatom) = {0., 0.};
// Symmetric term
const int sign_factor = (((ma+mb)%2==0)?1:-1);
@ -1186,7 +1165,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
utot.re = -utot.re;
}
my_sna.ulisttot_full(idxu_flip, ielem, iatom) = utot;
snaKK.ulisttot_full(idxu_flip, ielem, iatom) = utot;
}
}
}
@ -1195,29 +1174,25 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
my_sna.compute_yi_cpu(ii,d_beta);
snaKK.compute_yi_cpu(ii,d_beta);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
my_sna.compute_zi_cpu(ii);
snaKK.compute_zi_cpu(ii);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
int ii = team.league_rank();
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
my_sna.compute_bi_cpu(team,ii);
snaKK.compute_bi_cpu(team,ii);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// Extract the atom number
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
@ -1228,13 +1203,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
my_sna.compute_duidrj_cpu(team,ii,jj);
snaKK.compute_duidrj_cpu(team,ii,jj);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU>::member_type& team) const {
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
// Extract the atom number
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
@ -1245,7 +1219,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int ninside = d_ninside(ii);
if (jj >= ninside) return;
my_sna.compute_deidrj_cpu(team,ii,jj);
snaKK.compute_deidrj_cpu(team,ii,jj);
}
/* ----------------------------------------------------------------------
@ -1265,17 +1239,15 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
const int i = d_ilist[ii + chunk_offset];
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
const int ninside = d_ninside(ii);
for (int jj = 0; jj < ninside; jj++) {
int j = my_sna.inside(ii,jj);
int j = snaKK.inside(ii,jj);
F_FLOAT fij[3];
fij[0] = my_sna.dedr(ii,jj,0);
fij[1] = my_sna.dedr(ii,jj,1);
fij[2] = my_sna.dedr(ii,jj,2);
fij[0] = snaKK.dedr(ii,jj,0);
fij[1] = snaKK.dedr(ii,jj,1);
fij[2] = snaKK.dedr(ii,jj,2);
a_f(i,0) += fij[0];
a_f(i,1) += fij[1];
@ -1288,8 +1260,8 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
if (vflag_either) {
v_tally_xyz<NEIGHFLAG>(ev,i,j,
fij[0],fij[1],fij[2],
-my_sna.rij(ii,jj,0),-my_sna.rij(ii,jj,1),
-my_sna.rij(ii,jj,2));
-snaKK.rij(ii,jj,0),-snaKK.rij(ii,jj,1),
-snaKK.rij(ii,jj,2));
}
}
@ -1316,7 +1288,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
const auto idxb = icoeff % idxb_max;
const auto idx_chem = icoeff / idxb_max;
evdwl += d_coeffi[icoeff+1]*my_sna.blist(ii,idx_chem,idxb);
evdwl += d_coeffi[icoeff+1]*snaKK.blist(ii,idx_chem,idxb);
}
// quadratic contributions
@ -1325,12 +1297,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
const auto idxb = icoeff % idxb_max;
const auto idx_chem = icoeff / idxb_max;
real_type bveci = my_sna.blist(ii,idx_chem,idxb);
real_type bveci = snaKK.blist(ii,idx_chem,idxb);
evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
auto jdxb = jcoeff % idxb_max;
auto jdx_chem = jcoeff / idxb_max;
auto bvecj = my_sna.blist(ii,jdx_chem,jdxb);
auto bvecj = snaKK.blist(ii,jdx_chem,jdxb);
evdwl += d_coeffi[k++]*bveci*bvecj;
}
}

View File

@ -187,84 +187,84 @@ class SNAKokkos {
// functions for bispectrum coefficients, GPU only
KOKKOS_INLINE_FUNCTION
void compute_cayley_klein(const int&, const int&, const int&);
void compute_cayley_klein(const int&, const int&, const int&) const;
KOKKOS_INLINE_FUNCTION
void pre_ui(const int&, const int&, const int&, const int&); // ForceSNAP
void pre_ui(const int&, const int&, const int&, const int&) const; // ForceSNAP
// version of the code with parallelism over j_bend
KOKKOS_INLINE_FUNCTION
void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); // ForceSNAP
void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int) const; // ForceSNAP
// version of the code without parallelism over j_bend
KOKKOS_INLINE_FUNCTION
void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); // ForceSNAP
void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_zi(const int&, const int&, const int&); // ForceSNAP
void compute_zi(const int&, const int&, const int&) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi(int,int,int,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi_with_zlist(int,int,int,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_bi(const int&, const int&, const int&); // ForceSNAP
void compute_bi(const int&, const int&, const int&) const; // ForceSNAP
// functions for derivatives, GPU only
// version of the code with parallelism over j_bend
template<int dir>
KOKKOS_INLINE_FUNCTION
void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); //ForceSNAP
void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int) const; //ForceSNAP
// version of the code without parallelism over j_bend
template<int dir>
KOKKOS_INLINE_FUNCTION
void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); //ForceSNAP
void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int) const; //ForceSNAP
// core "evaluation" functions that get plugged into "compute" functions
// plugged into compute_ui_small, compute_ui_large
KOKKOS_FORCEINLINE_FUNCTION
void evaluate_ui_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&, const int&,
const int&, const int&, const int&);
const int&, const int&, const int&) const;
// plugged into compute_zi, compute_yi
KOKKOS_FORCEINLINE_FUNCTION
complex evaluate_zi(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&,
const int&, const int&, const int&, const int&, const real_type*);
const int&, const int&, const int&, const int&, const real_type*) const;
// plugged into compute_yi, compute_yi_with_zlist
KOKKOS_FORCEINLINE_FUNCTION
real_type evaluate_beta_scaled(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &);
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &) const;
// plugged into compute_fused_deidrj_small, compute_fused_deidrj_large
KOKKOS_FORCEINLINE_FUNCTION
real_type evaluate_duidrj_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&,
const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&,
const int&, const int&, const int&, const int&);
const int&, const int&, const int&, const int&) const;
// functions for bispectrum coefficients, CPU only
KOKKOS_INLINE_FUNCTION
void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_zi_cpu(const int&); // ForceSNAP
void compute_zi_cpu(const int&) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi_cpu(int,
const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
const Kokkos::View<real_type**, DeviceType> &beta) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int) const; // ForceSNAP
// functions for derivatives, CPU only
KOKKOS_INLINE_FUNCTION
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int) const; //ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
real_type compute_sfac(real_type, real_type, real_type, real_type); // add_uarraytot, compute_duarray
real_type compute_sfac(real_type, real_type, real_type, real_type) const; // add_uarraytot, compute_duarray
KOKKOS_INLINE_FUNCTION
real_type compute_dsfac(real_type, real_type, real_type, real_type); // compute_duarray
real_type compute_dsfac(real_type, real_type, real_type, real_type) const; // compute_duarray
KOKKOS_INLINE_FUNCTION
void compute_s_dsfac(const real_type, const real_type, const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
void compute_s_dsfac(const real_type, const real_type, const real_type, const real_type, real_type&, real_type&) const; // compute_cayley_klein
#ifdef TIMING_INFO
double* timers;
@ -365,12 +365,12 @@ class SNAKokkos {
void init_rootpqarray(); // init()
KOKKOS_INLINE_FUNCTION
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, const real_type&, const real_type&, int); // compute_ui
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, const real_type&, const real_type&, int) const; // compute_ui
KOKKOS_INLINE_FUNCTION
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
const real_type&, const real_type&, const real_type&,
const real_type&, const real_type&); // compute_ui_cpu
const real_type&, const real_type&) const; // compute_ui_cpu
inline
@ -382,7 +382,7 @@ class SNAKokkos {
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
const real_type&, const real_type&, const real_type&, const real_type&, const real_type&,
const real_type&, const real_type&);
const real_type&, const real_type&) const;
// Sets the style for the switching function
// 0 = none

View File

@ -371,7 +371,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div) const
{
const int iatom = iatom_mod + vector_length * iatom_div;
const real_type x = rij(iatom,jnbor,0);
@ -460,7 +460,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div)
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div) const
{
for (int jelem = 0; jelem < nelements; jelem++) {
@ -494,7 +494,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mo
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) const
{
// get shared memory offset
@ -525,7 +525,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_small(const typ
// and some amount of load imbalance, at the expense of reducing parallelism
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div) const
{
// get shared memory offset
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
@ -558,7 +558,7 @@ template<class DeviceType, typename real_type, int vector_length>
KOKKOS_FORCEINLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::evaluate_ui_jbend(const WignerWrapper<real_type, vector_length>& ulist_wrapper,
const complex& a, const complex& b, const real_type& sfac, const int& jelem,
const int& iatom_mod, const int& j_bend, const int& iatom_div)
const int& iatom_mod, const int& j_bend, const int& iatom_div) const
{
// utot(j,ma,mb) = 0 for all j,ma,ma
@ -664,7 +664,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::evaluate_ui_jbend(const Wi
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div) const
{
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, idxcg;
@ -692,7 +692,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iato
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div) const
{
// for j1 = 0,...,twojmax
// for j2 = 0,twojmax
@ -786,7 +786,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iato
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi(int iatom_mod, int jjz, int iatom_div,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const
{
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju_half, idxcg;
@ -827,7 +827,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi(int iatom_mod,
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_with_zlist(int iatom_mod, int jjz, int iatom_div,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const
{
int j1, j2, j, jju_half;
idxz(jjz).get_yi_with_zlist(j1, j2, j, jju_half);
@ -859,7 +859,7 @@ template<class DeviceType, typename real_type, int vector_length>
KOKKOS_FORCEINLINE_FUNCTION
typename SNAKokkos<DeviceType, real_type, vector_length>::complex SNAKokkos<DeviceType, real_type, vector_length>::evaluate_zi(const int& j1, const int& j2, const int& j,
const int& ma1min, const int& ma2max, const int& mb1min, const int& mb2max, const int& na, const int& nb,
const int& iatom_mod, const int& elem1, const int& elem2, const int& iatom_div, const real_type* cgblock) {
const int& iatom_mod, const int& elem1, const int& elem2, const int& iatom_div, const real_type* cgblock) const {
complex ztmp = complex::zero();
@ -911,7 +911,7 @@ template<class DeviceType, typename real_type, int vector_length>
KOKKOS_FORCEINLINE_FUNCTION
typename SNAKokkos<DeviceType, real_type, vector_length>::real_type SNAKokkos<DeviceType, real_type, vector_length>::evaluate_beta_scaled(const int& j1, const int& j2, const int& j,
const int& iatom_mod, const int& elem1, const int& elem2, const int& elem3, const int& iatom_div,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) {
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const {
real_type betaj = 0;
@ -951,7 +951,7 @@ typename SNAKokkos<DeviceType, real_type, vector_length>::real_type SNAKokkos<De
template<class DeviceType, typename real_type, int vector_length>
template<int dir>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) const
{
// get shared memory offset
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
@ -988,7 +988,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_small
template<class DeviceType, typename real_type, int vector_length>
template<int dir>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div) const
{
// get shared memory offset
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
@ -1032,7 +1032,7 @@ template<class DeviceType, typename real_type, int vector_length>
KOKKOS_FORCEINLINE_FUNCTION
typename SNAKokkos<DeviceType, real_type, vector_length>::real_type SNAKokkos<DeviceType, real_type, vector_length>::evaluate_duidrj_jbend(const WignerWrapper<real_type, vector_length>& ulist_wrapper, const complex& a, const complex& b, const real_type& sfac,
const WignerWrapper<real_type, vector_length>& dulist_wrapper, const complex& da, const complex& db, const real_type& dsfacu,
const int& jelem, const int& iatom_mod, const int& j_bend, const int& iatom_div) {
const int& jelem, const int& iatom_mod, const int& j_bend, const int& iatom_div) const {
real_type dedr_full_sum = static_cast<real_type>(0);
@ -1178,7 +1178,7 @@ typename SNAKokkos<DeviceType, real_type, vector_length>::real_type SNAKokkos<De
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem)
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem) const
{
for (int jelem = 0; jelem < nelements; jelem++) {
for (int j = 0; j <= twojmax; j++) {
@ -1212,7 +1212,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) const
{
real_type rsq, r, x, y, z, z0, theta0;
@ -1242,7 +1242,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typen
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int& iter)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int& iter) const
{
const int iatom = iter / idxz_max;
const int jjz = iter % idxz_max;
@ -1305,7 +1305,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int&
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom) const
{
// for j1 = 0,...,twojmax
// for j2 = 0,twojmax
@ -1404,7 +1404,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typen
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_cpu(int iter,
const Kokkos::View<real_type**, DeviceType> &beta)
const Kokkos::View<real_type**, DeviceType> &beta) const
{
real_type betaj;
const int iatom = iter / idxz_max;
@ -1504,7 +1504,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_cpu(int iter,
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) const
{
real_type rsq, r, x, y, z, z0, theta0, cs, sn;
real_type dz0dr;
@ -1536,7 +1536,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const t
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
void SNAKokkos<DeviceType, real_type, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) const
{
t_scalar3<real_type> final_sum;
const int jelem = element(iatom, jnbor);
@ -1604,7 +1604,7 @@ template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
const real_type& r, const real_type& wj, const real_type& rcut,
const real_type& sinner, const real_type& dinner, int jelem)
const real_type& sinner, const real_type& dinner, int jelem) const
{
const real_type sfac = compute_sfac(r, rcut, sinner, dinner) * wj;
@ -1634,7 +1634,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::add_uarraytot(const typena
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
const real_type& x, const real_type& y, const real_type& z, const real_type& z0, const real_type& r)
const real_type& x, const real_type& y, const real_type& z, const real_type& z0, const real_type& r) const
{
real_type r0inv;
real_type a_r, b_r, a_i, b_i;
@ -1728,7 +1728,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_duarray_cpu(const
const real_type& x, const real_type& y, const real_type& z,
const real_type& z0, const real_type& r, const real_type& dz0dr,
const real_type& wj, const real_type& rcut,
const real_type& sinner, const real_type& dinner)
const real_type& sinner, const real_type& dinner) const
{
real_type r0inv;
real_type a_r, a_i, b_r, b_i;
@ -2206,7 +2206,7 @@ int SNAKokkos<DeviceType, real_type, vector_length>::compute_ncoeff()
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_type r, real_type rcut, real_type sinner, real_type dinner)
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_type r, real_type rcut, real_type sinner, real_type dinner) const
{
real_type sfac_outer;
constexpr real_type one = static_cast<real_type>(1.0);
@ -2239,7 +2239,7 @@ real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_typ
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_type r, real_type rcut, real_type sinner, real_type dinner)
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_type r, real_type rcut, real_type sinner, real_type dinner) const
{
real_type sfac_outer, dsfac_outer, sfac_inner, dsfac_inner;
constexpr real_type one = static_cast<real_type>(1.0);
@ -2287,7 +2287,7 @@ real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_ty
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real_type r, const real_type rcut, const real_type sinner, const real_type dinner, real_type& sfac, real_type& dsfac) {
void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real_type r, const real_type rcut, const real_type sinner, const real_type dinner, real_type& sfac, real_type& dsfac) const {
real_type sfac_outer, dsfac_outer, sfac_inner, dsfac_inner;
constexpr real_type one = static_cast<real_type>(1.0);