Added const to each of the snaKK routines; removed extra snaKK copies
This commit is contained in:
@ -624,7 +624,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
const int itype = type[i];
|
||||
const int ielem = d_map[itype];
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
|
||||
|
||||
@ -633,18 +632,18 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
}
|
||||
|
||||
if (quadraticflag) {
|
||||
const auto idxb_max = my_sna.idxb_max;
|
||||
const auto idxb_max = snaKK.idxb_max;
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
const auto idxb = icoeff % idxb_max;
|
||||
const auto idx_chem = icoeff / idxb_max;
|
||||
real_type bveci = my_sna.blist(ii, idx_chem, idxb);
|
||||
real_type bveci = snaKK.blist(ii, idx_chem, idxb);
|
||||
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
const auto jdxb = jcoeff % idxb_max;
|
||||
const auto jdx_chem = jcoeff / idxb_max;
|
||||
real_type bvecj = my_sna.blist(ii, jdx_chem, jdxb);
|
||||
real_type bvecj = snaKK.blist(ii, jdx_chem, jdxb);
|
||||
d_beta_pack(iatom_mod,icoeff,iatom_div) += d_coeffi[k]*bvecj;
|
||||
d_beta_pack(iatom_mod,jcoeff,iatom_div) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
@ -657,8 +656,6 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeigh>::member_type& team) const {
|
||||
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// extract atom number
|
||||
int ii = team.team_rank() + team.league_rank() * team.team_size();
|
||||
if (ii >= chunk_size) return;
|
||||
@ -725,20 +722,20 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const F_FLOAT dy = x(j,1) - ytmp;
|
||||
const F_FLOAT dz = x(j,2) - ztmp;
|
||||
const int jelem = d_map[jtype];
|
||||
my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
|
||||
my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[jelem])*rcutfac);
|
||||
my_sna.inside(ii,offset) = j;
|
||||
snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
|
||||
snaKK.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[jelem])*rcutfac);
|
||||
snaKK.inside(ii,offset) = j;
|
||||
if (switchinnerflag) {
|
||||
my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
|
||||
my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
|
||||
snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
|
||||
snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
|
||||
}
|
||||
if (chemflag)
|
||||
my_sna.element(ii,offset) = jelem;
|
||||
snaKK.element(ii,offset) = jelem;
|
||||
else
|
||||
my_sna.element(ii,offset) = 0;
|
||||
snaKK.element(ii,offset) = 0;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
@ -748,7 +745,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeCayleyKlein,const int iatom_mod, const int jnbor, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int ii = iatom_mod + iatom_div * vector_length;
|
||||
if (ii >= chunk_size) return;
|
||||
@ -756,13 +752,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jnbor >= ninside) return;
|
||||
|
||||
my_sna.compute_cayley_klein(iatom_mod,jnbor,iatom_div);
|
||||
snaKK.compute_cayley_klein(iatom_mod,jnbor,iatom_div);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUi, const int iatom_mod, const int j, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int ii = iatom_mod + iatom_div * vector_length;
|
||||
if (ii >= chunk_size) return;
|
||||
@ -770,13 +765,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
int itype = type(ii);
|
||||
int ielem = d_map[itype];
|
||||
|
||||
my_sna.pre_ui(iatom_mod, j, ielem, iatom_div);
|
||||
snaKK.pre_ui(iatom_mod, j, ielem, iatom_div);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiSmall,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiSmall>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// extract flattened atom_div / neighbor number / bend location
|
||||
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui;
|
||||
@ -795,7 +789,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.compute_ui_small(team, iatom_mod, jbend, jj, iatom_div);
|
||||
snaKK.compute_ui_small(team, iatom_mod, jbend, jj, iatom_div);
|
||||
});
|
||||
|
||||
}
|
||||
@ -803,7 +797,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiLarge,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiLarge>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// extract flattened atom_div / neighbor number / bend location
|
||||
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_ui;
|
||||
@ -820,7 +813,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.compute_ui_large(team,iatom_mod, jj, iatom_div);
|
||||
snaKK.compute_ui_large(team,iatom_mod, jj, iatom_div);
|
||||
});
|
||||
|
||||
}
|
||||
@ -829,21 +822,20 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUi,const int iatom_mod, const int idxu, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
if (idxu > my_sna.idxu_max) return;
|
||||
if (idxu > snaKK.idxu_max) return;
|
||||
|
||||
int elem_count = chemflag ? nelements : 1;
|
||||
|
||||
for (int ielem = 0; ielem < elem_count; ielem++) {
|
||||
|
||||
const FullHalfMapper mapper = my_sna.idxu_full_half[idxu];
|
||||
const FullHalfMapper mapper = snaKK.idxu_full_half[idxu];
|
||||
|
||||
auto utot_re = my_sna.ulisttot_re_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div);
|
||||
auto utot_im = my_sna.ulisttot_im_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div);
|
||||
auto utot_re = snaKK.ulisttot_re_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div);
|
||||
auto utot_im = snaKK.ulisttot_im_pack(iatom_mod, mapper.idxu_half, ielem, iatom_div);
|
||||
|
||||
if (mapper.flip_sign == 1) {
|
||||
utot_im = -utot_im;
|
||||
@ -851,11 +843,11 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
utot_re = -utot_re;
|
||||
}
|
||||
|
||||
my_sna.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im };
|
||||
snaKK.ulisttot_pack(iatom_mod, idxu, ielem, iatom_div) = { utot_re, utot_im };
|
||||
|
||||
if (mapper.flip_sign == 0) {
|
||||
my_sna.ylist_pack_re(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.;
|
||||
my_sna.ylist_pack_im(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.;
|
||||
snaKK.ylist_pack_re(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.;
|
||||
snaKK.ylist_pack_im(iatom_mod, mapper.idxu_half, ielem, iatom_div) = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -863,72 +855,67 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
if (jjz >= my_sna.idxz_max) return;
|
||||
if (jjz >= snaKK.idxz_max) return;
|
||||
|
||||
my_sna.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
|
||||
snaKK.compute_yi(iatom_mod,jjz,iatom_div,d_beta_pack);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYiWithZlist,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
if (jjz >= my_sna.idxz_max) return;
|
||||
if (jjz >= snaKK.idxz_max) return;
|
||||
|
||||
my_sna.compute_yi_with_zlist(iatom_mod,jjz,iatom_div,d_beta_pack);
|
||||
snaKK.compute_yi_with_zlist(iatom_mod,jjz,iatom_div,d_beta_pack);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZi,const int iatom_mod, const int jjz, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
if (jjz >= my_sna.idxz_max) return;
|
||||
if (jjz >= snaKK.idxz_max) return;
|
||||
|
||||
my_sna.compute_zi(iatom_mod,jjz,iatom_div);
|
||||
snaKK.compute_zi(iatom_mod,jjz,iatom_div);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBi,const int iatom_mod, const int jjb, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
if (jjb >= my_sna.idxb_max) return;
|
||||
if (jjb >= snaKK.idxb_max) return;
|
||||
|
||||
my_sna.compute_bi(iatom_mod,jjb,iatom_div);
|
||||
snaKK.compute_bi(iatom_mod,jjb,iatom_div);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int iatom = iatom_mod + iatom_div * vector_length;
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
if (idxb >= my_sna.idxb_max) return;
|
||||
if (idxb >= snaKK.idxb_max) return;
|
||||
|
||||
const int ntriples = my_sna.ntriples;
|
||||
const int ntriples = snaKK.ntriples;
|
||||
|
||||
for (int itriple = 0; itriple < ntriples; itriple++) {
|
||||
|
||||
const real_type blocal = my_sna.blist_pack(iatom_mod, idxb, itriple, iatom_div);
|
||||
const real_type blocal = snaKK.blist_pack(iatom_mod, idxb, itriple, iatom_div);
|
||||
|
||||
my_sna.blist(iatom, itriple, idxb) = blocal;
|
||||
snaKK.blist(iatom, itriple, idxb) = blocal;
|
||||
}
|
||||
|
||||
}
|
||||
@ -937,7 +924,6 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeFusedDeidrjSmall<dir>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrjSmall<dir> >::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// extract flattened atom_div / neighbor number / bend location
|
||||
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj;
|
||||
@ -956,7 +942,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.template compute_fused_deidrj_small<dir>(team, iatom_mod, jbend, jj, iatom_div);
|
||||
snaKK.template compute_fused_deidrj_small<dir>(team, iatom_mod, jbend, jj, iatom_div);
|
||||
|
||||
});
|
||||
|
||||
@ -966,7 +952,6 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeFusedDeidrjLarge<dir>,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeFusedDeidrjLarge<dir> >::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// extract flattened atom_div / neighbor number / bend location
|
||||
int flattened_idx = team.team_rank() + team.league_rank() * team_size_compute_fused_deidrj;
|
||||
@ -983,7 +968,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.template compute_fused_deidrj_large<dir>(team, iatom_mod, jj, iatom_div);
|
||||
snaKK.template compute_fused_deidrj_large<dir>(team, iatom_mod, jj, iatom_div);
|
||||
|
||||
});
|
||||
}
|
||||
@ -1003,7 +988,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
const int itype = type[i];
|
||||
const int ielem = d_map[itype];
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
auto d_coeffi = Kokkos::subview(d_coeffelem, ielem, Kokkos::ALL);
|
||||
|
||||
@ -1011,18 +995,18 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
d_beta(icoeff,ii) = d_coeffi[icoeff+1];
|
||||
|
||||
if (quadraticflag) {
|
||||
const auto idxb_max = my_sna.idxb_max;
|
||||
const auto idxb_max = snaKK.idxb_max;
|
||||
int k = ncoeff+1;
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
const auto idxb = icoeff % idxb_max;
|
||||
const auto idx_chem = icoeff / idxb_max;
|
||||
real_type bveci = my_sna.blist(ii,idx_chem,idxb);
|
||||
real_type bveci = snaKK.blist(ii,idx_chem,idxb);
|
||||
d_beta(icoeff,ii) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
const auto jdxb = jcoeff % idxb_max;
|
||||
const auto jdx_chem = jcoeff / idxb_max;
|
||||
real_type bvecj = my_sna.blist(ii,jdx_chem,jdxb);
|
||||
real_type bvecj = snaKK.blist(ii,jdx_chem,jdxb);
|
||||
d_beta(icoeff,ii) += d_coeffi[k]*bvecj;
|
||||
d_beta(jcoeff,ii) += d_coeffi[k]*bveci;
|
||||
k++;
|
||||
@ -1035,10 +1019,8 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeNeighCPU>::member_type& team) const {
|
||||
|
||||
|
||||
int ii = team.league_rank();
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
@ -1088,20 +1070,20 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
|
||||
if (rsq < rnd_cutsq(itype,jtype)) {
|
||||
if (final) {
|
||||
my_sna.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
my_sna.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
my_sna.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
my_sna.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
|
||||
my_sna.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[jelem])*rcutfac);
|
||||
my_sna.inside(ii,offset) = j;
|
||||
snaKK.rij(ii,offset,0) = static_cast<real_type>(dx);
|
||||
snaKK.rij(ii,offset,1) = static_cast<real_type>(dy);
|
||||
snaKK.rij(ii,offset,2) = static_cast<real_type>(dz);
|
||||
snaKK.wj(ii,offset) = static_cast<real_type>(d_wjelem[jelem]);
|
||||
snaKK.rcutij(ii,offset) = static_cast<real_type>((radi + d_radelem[jelem])*rcutfac);
|
||||
snaKK.inside(ii,offset) = j;
|
||||
if (switchinnerflag) {
|
||||
my_sna.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
|
||||
my_sna.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
|
||||
snaKK.sinnerij(ii,offset) = 0.5*(d_sinnerelem[ielem] + d_sinnerelem[jelem]);
|
||||
snaKK.dinnerij(ii,offset) = 0.5*(d_dinnerelem[ielem] + d_dinnerelem[jelem]);
|
||||
}
|
||||
if (chemflag)
|
||||
my_sna.element(ii,offset) = jelem;
|
||||
snaKK.element(ii,offset) = jelem;
|
||||
else
|
||||
my_sna.element(ii,offset) = 0;
|
||||
snaKK.element(ii,offset) = 0;
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
@ -1112,7 +1094,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPPreUiCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
const int ii = team.team_rank() + team.team_size() * team.league_rank();
|
||||
@ -1120,7 +1101,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
int itype = type(ii);
|
||||
int ielem = d_map[itype];
|
||||
|
||||
my_sna.pre_ui_cpu(team,ii,ielem);
|
||||
snaKK.pre_ui_cpu(team,ii,ielem);
|
||||
}
|
||||
|
||||
|
||||
@ -1128,7 +1109,6 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeUiCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||
@ -1139,13 +1119,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.compute_ui_cpu(team,ii,jj);
|
||||
snaKK.compute_ui_cpu(team,ii,jj);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
if (iatom >= chunk_size) return;
|
||||
|
||||
@ -1156,8 +1135,8 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
// De-symmetrize ulisttot
|
||||
for (int ielem = 0; ielem < elem_count; ielem++) {
|
||||
|
||||
const int jju_half = my_sna.idxu_half_block(j);
|
||||
const int jju = my_sna.idxu_block(j);
|
||||
const int jju_half = snaKK.idxu_half_block(j);
|
||||
const int jju = snaKK.idxu_block(j);
|
||||
|
||||
for (int mb = 0; 2*mb <= j; mb++) {
|
||||
for (int ma = 0; ma <= j; ma++) {
|
||||
@ -1168,13 +1147,13 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int idxu = jju + idxu_shift;
|
||||
|
||||
// Load ulist
|
||||
auto utot = my_sna.ulisttot(idxu_half, ielem, iatom);
|
||||
auto utot = snaKK.ulisttot(idxu_half, ielem, iatom);
|
||||
|
||||
// Store
|
||||
my_sna.ulisttot_full(idxu, ielem, iatom) = utot;
|
||||
snaKK.ulisttot_full(idxu, ielem, iatom) = utot;
|
||||
|
||||
// Zero Yi
|
||||
my_sna.ylist(idxu_half, ielem, iatom) = {0., 0.};
|
||||
snaKK.ylist(idxu_half, ielem, iatom) = {0., 0.};
|
||||
|
||||
// Symmetric term
|
||||
const int sign_factor = (((ma+mb)%2==0)?1:-1);
|
||||
@ -1186,7 +1165,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
utot.re = -utot.re;
|
||||
}
|
||||
|
||||
my_sna.ulisttot_full(idxu_flip, ielem, iatom) = utot;
|
||||
snaKK.ulisttot_full(idxu_flip, ielem, iatom) = utot;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1195,29 +1174,25 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeYiCPU,const int& ii) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
my_sna.compute_yi_cpu(ii,d_beta);
|
||||
snaKK.compute_yi_cpu(ii,d_beta);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeZiCPU,const int& ii) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
my_sna.compute_zi_cpu(ii);
|
||||
snaKK.compute_zi_cpu(ii);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeBiCPU>::member_type& team) const {
|
||||
int ii = team.league_rank();
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
my_sna.compute_bi_cpu(team,ii);
|
||||
snaKK.compute_bi_cpu(team,ii);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDuidrjCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||
@ -1228,13 +1203,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.compute_duidrj_cpu(team,ii,jj);
|
||||
snaKK.compute_duidrj_cpu(team,ii,jj);
|
||||
}
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType,TagPairSNAPComputeDeidrjCPU>::member_type& team) const {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
// Extract the atom number
|
||||
int ii = team.team_rank() + team.team_size() * (team.league_rank() % ((chunk_size+team.team_size()-1)/team.team_size()));
|
||||
@ -1245,7 +1219,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
const int ninside = d_ninside(ii);
|
||||
if (jj >= ninside) return;
|
||||
|
||||
my_sna.compute_deidrj_cpu(team,ii,jj);
|
||||
snaKK.compute_deidrj_cpu(team,ii,jj);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -1265,17 +1239,15 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
|
||||
const int i = d_ilist[ii + chunk_offset];
|
||||
|
||||
SNAKokkos<DeviceType, real_type, vector_length> my_sna = snaKK;
|
||||
|
||||
const int ninside = d_ninside(ii);
|
||||
|
||||
for (int jj = 0; jj < ninside; jj++) {
|
||||
int j = my_sna.inside(ii,jj);
|
||||
int j = snaKK.inside(ii,jj);
|
||||
|
||||
F_FLOAT fij[3];
|
||||
fij[0] = my_sna.dedr(ii,jj,0);
|
||||
fij[1] = my_sna.dedr(ii,jj,1);
|
||||
fij[2] = my_sna.dedr(ii,jj,2);
|
||||
fij[0] = snaKK.dedr(ii,jj,0);
|
||||
fij[1] = snaKK.dedr(ii,jj,1);
|
||||
fij[2] = snaKK.dedr(ii,jj,2);
|
||||
|
||||
a_f(i,0) += fij[0];
|
||||
a_f(i,1) += fij[1];
|
||||
@ -1288,8 +1260,8 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
if (vflag_either) {
|
||||
v_tally_xyz<NEIGHFLAG>(ev,i,j,
|
||||
fij[0],fij[1],fij[2],
|
||||
-my_sna.rij(ii,jj,0),-my_sna.rij(ii,jj,1),
|
||||
-my_sna.rij(ii,jj,2));
|
||||
-snaKK.rij(ii,jj,0),-snaKK.rij(ii,jj,1),
|
||||
-snaKK.rij(ii,jj,2));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1316,7 +1288,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
const auto idxb = icoeff % idxb_max;
|
||||
const auto idx_chem = icoeff / idxb_max;
|
||||
evdwl += d_coeffi[icoeff+1]*my_sna.blist(ii,idx_chem,idxb);
|
||||
evdwl += d_coeffi[icoeff+1]*snaKK.blist(ii,idx_chem,idxb);
|
||||
}
|
||||
|
||||
// quadratic contributions
|
||||
@ -1325,12 +1297,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
|
||||
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
|
||||
const auto idxb = icoeff % idxb_max;
|
||||
const auto idx_chem = icoeff / idxb_max;
|
||||
real_type bveci = my_sna.blist(ii,idx_chem,idxb);
|
||||
real_type bveci = snaKK.blist(ii,idx_chem,idxb);
|
||||
evdwl += 0.5*d_coeffi[k++]*bveci*bveci;
|
||||
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
|
||||
auto jdxb = jcoeff % idxb_max;
|
||||
auto jdx_chem = jcoeff / idxb_max;
|
||||
auto bvecj = my_sna.blist(ii,jdx_chem,jdxb);
|
||||
auto bvecj = snaKK.blist(ii,jdx_chem,jdxb);
|
||||
evdwl += d_coeffi[k++]*bveci*bvecj;
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,84 +187,84 @@ class SNAKokkos {
|
||||
|
||||
// functions for bispectrum coefficients, GPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_cayley_klein(const int&, const int&, const int&);
|
||||
void compute_cayley_klein(const int&, const int&, const int&) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void pre_ui(const int&, const int&, const int&, const int&); // ForceSNAP
|
||||
void pre_ui(const int&, const int&, const int&, const int&) const; // ForceSNAP
|
||||
|
||||
// version of the code with parallelism over j_bend
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); // ForceSNAP
|
||||
void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int) const; // ForceSNAP
|
||||
// version of the code without parallelism over j_bend
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); // ForceSNAP
|
||||
void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int) const; // ForceSNAP
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi(const int&, const int&, const int&); // ForceSNAP
|
||||
void compute_zi(const int&, const int&, const int&) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(int,int,int,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi_with_zlist(int,int,int,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const int&, const int&, const int&); // ForceSNAP
|
||||
void compute_bi(const int&, const int&, const int&) const; // ForceSNAP
|
||||
|
||||
// functions for derivatives, GPU only
|
||||
// version of the code with parallelism over j_bend
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); //ForceSNAP
|
||||
void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int) const; //ForceSNAP
|
||||
// version of the code without parallelism over j_bend
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); //ForceSNAP
|
||||
void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int) const; //ForceSNAP
|
||||
|
||||
// core "evaluation" functions that get plugged into "compute" functions
|
||||
// plugged into compute_ui_small, compute_ui_large
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void evaluate_ui_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&, const int&,
|
||||
const int&, const int&, const int&);
|
||||
const int&, const int&, const int&) const;
|
||||
// plugged into compute_zi, compute_yi
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
complex evaluate_zi(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&,
|
||||
const int&, const int&, const int&, const int&, const real_type*);
|
||||
const int&, const int&, const int&, const int&, const real_type*) const;
|
||||
// plugged into compute_yi, compute_yi_with_zlist
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
real_type evaluate_beta_scaled(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &);
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &) const;
|
||||
// plugged into compute_fused_deidrj_small, compute_fused_deidrj_large
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
real_type evaluate_duidrj_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&,
|
||||
const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&,
|
||||
const int&, const int&, const int&, const int&);
|
||||
const int&, const int&, const int&, const int&) const;
|
||||
|
||||
// functions for bispectrum coefficients, CPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
|
||||
void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi_cpu(const int&); // ForceSNAP
|
||||
void compute_zi_cpu(const int&) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi_cpu(int,
|
||||
const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
|
||||
const Kokkos::View<real_type**, DeviceType> &beta) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int) const; // ForceSNAP
|
||||
|
||||
// functions for derivatives, CPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
|
||||
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int) const; //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int) const; // ForceSNAP
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real_type compute_sfac(real_type, real_type, real_type, real_type); // add_uarraytot, compute_duarray
|
||||
real_type compute_sfac(real_type, real_type, real_type, real_type) const; // add_uarraytot, compute_duarray
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real_type compute_dsfac(real_type, real_type, real_type, real_type); // compute_duarray
|
||||
real_type compute_dsfac(real_type, real_type, real_type, real_type) const; // compute_duarray
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_s_dsfac(const real_type, const real_type, const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
|
||||
void compute_s_dsfac(const real_type, const real_type, const real_type, const real_type, real_type&, real_type&) const; // compute_cayley_klein
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
double* timers;
|
||||
@ -365,12 +365,12 @@ class SNAKokkos {
|
||||
void init_rootpqarray(); // init()
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, const real_type&, const real_type&, int); // compute_ui
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, const real_type&, const real_type&, int) const; // compute_ui
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
const real_type&, const real_type&, const real_type&,
|
||||
const real_type&, const real_type&); // compute_ui_cpu
|
||||
const real_type&, const real_type&) const; // compute_ui_cpu
|
||||
|
||||
|
||||
inline
|
||||
@ -382,7 +382,7 @@ class SNAKokkos {
|
||||
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
|
||||
const real_type&, const real_type&, const real_type&, const real_type&, const real_type&,
|
||||
const real_type&, const real_type&);
|
||||
const real_type&, const real_type&) const;
|
||||
|
||||
// Sets the style for the switching function
|
||||
// 0 = none
|
||||
|
||||
@ -371,7 +371,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::grow_rij(int newnatom, int
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const int& iatom_mod, const int& jnbor, const int& iatom_div) const
|
||||
{
|
||||
const int iatom = iatom_mod + vector_length * iatom_div;
|
||||
const real_type x = rij(iatom,jnbor,0);
|
||||
@ -460,7 +460,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_cayley_klein(const
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mod, const int& j, const int& ielem, const int& iatom_div) const
|
||||
{
|
||||
|
||||
for (int jelem = 0; jelem < nelements; jelem++) {
|
||||
@ -494,7 +494,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom_mo
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) const
|
||||
{
|
||||
|
||||
// get shared memory offset
|
||||
@ -525,7 +525,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_small(const typ
|
||||
// and some amount of load imbalance, at the expense of reducing parallelism
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div) const
|
||||
{
|
||||
// get shared memory offset
|
||||
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
|
||||
@ -558,7 +558,7 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::evaluate_ui_jbend(const WignerWrapper<real_type, vector_length>& ulist_wrapper,
|
||||
const complex& a, const complex& b, const real_type& sfac, const int& jelem,
|
||||
const int& iatom_mod, const int& j_bend, const int& iatom_div)
|
||||
const int& iatom_mod, const int& j_bend, const int& iatom_div) const
|
||||
{
|
||||
|
||||
// utot(j,ma,mb) = 0 for all j,ma,ma
|
||||
@ -664,7 +664,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::evaluate_ui_jbend(const Wi
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iatom_mod, const int& jjz, const int& iatom_div) const
|
||||
{
|
||||
|
||||
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, idxcg;
|
||||
@ -692,7 +692,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi(const int& iato
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iatom_mod, const int& jjb, const int& iatom_div) const
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
@ -786,7 +786,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi(const int& iato
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi(int iatom_mod, int jjz, int iatom_div,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const
|
||||
{
|
||||
|
||||
int j1, j2, j, ma1min, ma2max, mb1min, mb2max, na, nb, jju_half, idxcg;
|
||||
@ -827,7 +827,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi(int iatom_mod,
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_with_zlist(int iatom_mod, int jjz, int iatom_div,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack)
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const
|
||||
{
|
||||
int j1, j2, j, jju_half;
|
||||
idxz(jjz).get_yi_with_zlist(j1, j2, j, jju_half);
|
||||
@ -859,7 +859,7 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename SNAKokkos<DeviceType, real_type, vector_length>::complex SNAKokkos<DeviceType, real_type, vector_length>::evaluate_zi(const int& j1, const int& j2, const int& j,
|
||||
const int& ma1min, const int& ma2max, const int& mb1min, const int& mb2max, const int& na, const int& nb,
|
||||
const int& iatom_mod, const int& elem1, const int& elem2, const int& iatom_div, const real_type* cgblock) {
|
||||
const int& iatom_mod, const int& elem1, const int& elem2, const int& iatom_div, const real_type* cgblock) const {
|
||||
|
||||
complex ztmp = complex::zero();
|
||||
|
||||
@ -911,7 +911,7 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename SNAKokkos<DeviceType, real_type, vector_length>::real_type SNAKokkos<DeviceType, real_type, vector_length>::evaluate_beta_scaled(const int& j1, const int& j2, const int& j,
|
||||
const int& iatom_mod, const int& elem1, const int& elem2, const int& elem3, const int& iatom_div,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) {
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack) const {
|
||||
|
||||
real_type betaj = 0;
|
||||
|
||||
@ -951,7 +951,7 @@ typename SNAKokkos<DeviceType, real_type, vector_length>::real_type SNAKokkos<De
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int j_bend, const int jnbor, const int iatom_div) const
|
||||
{
|
||||
// get shared memory offset
|
||||
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
|
||||
@ -988,7 +988,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_small
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int iatom_mod, const int jnbor, const int iatom_div) const
|
||||
{
|
||||
// get shared memory offset
|
||||
// scratch size: 32 atoms * (twojmax+1) cached values, no double buffer
|
||||
@ -1032,7 +1032,7 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
typename SNAKokkos<DeviceType, real_type, vector_length>::real_type SNAKokkos<DeviceType, real_type, vector_length>::evaluate_duidrj_jbend(const WignerWrapper<real_type, vector_length>& ulist_wrapper, const complex& a, const complex& b, const real_type& sfac,
|
||||
const WignerWrapper<real_type, vector_length>& dulist_wrapper, const complex& da, const complex& db, const real_type& dsfacu,
|
||||
const int& jelem, const int& iatom_mod, const int& j_bend, const int& iatom_div) {
|
||||
const int& jelem, const int& iatom_mod, const int& j_bend, const int& iatom_div) const {
|
||||
|
||||
real_type dedr_full_sum = static_cast<real_type>(0);
|
||||
|
||||
@ -1178,7 +1178,7 @@ typename SNAKokkos<DeviceType, real_type, vector_length>::real_type SNAKokkos<De
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int& iatom, const int& ielem) const
|
||||
{
|
||||
for (int jelem = 0; jelem < nelements; jelem++) {
|
||||
for (int j = 0; j <= twojmax; j++) {
|
||||
@ -1212,7 +1212,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const typename
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) const
|
||||
{
|
||||
real_type rsq, r, x, y, z, z0, theta0;
|
||||
|
||||
@ -1242,7 +1242,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_ui_cpu(const typen
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int& iter)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int& iter) const
|
||||
{
|
||||
const int iatom = iter / idxz_max;
|
||||
const int jjz = iter % idxz_max;
|
||||
@ -1305,7 +1305,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_zi_cpu(const int&
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom) const
|
||||
{
|
||||
// for j1 = 0,...,twojmax
|
||||
// for j2 = 0,twojmax
|
||||
@ -1404,7 +1404,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_bi_cpu(const typen
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_cpu(int iter,
|
||||
const Kokkos::View<real_type**, DeviceType> &beta)
|
||||
const Kokkos::View<real_type**, DeviceType> &beta) const
|
||||
{
|
||||
real_type betaj;
|
||||
const int iatom = iter / idxz_max;
|
||||
@ -1504,7 +1504,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_yi_cpu(int iter,
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) const
|
||||
{
|
||||
real_type rsq, r, x, y, z, z0, theta0, cs, sn;
|
||||
real_type dz0dr;
|
||||
@ -1536,7 +1536,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_duidrj_cpu(const t
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor)
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor) const
|
||||
{
|
||||
t_scalar3<real_type> final_sum;
|
||||
const int jelem = element(iatom, jnbor);
|
||||
@ -1604,7 +1604,7 @@ template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
const real_type& r, const real_type& wj, const real_type& rcut,
|
||||
const real_type& sinner, const real_type& dinner, int jelem)
|
||||
const real_type& sinner, const real_type& dinner, int jelem) const
|
||||
{
|
||||
const real_type sfac = compute_sfac(r, rcut, sinner, dinner) * wj;
|
||||
|
||||
@ -1634,7 +1634,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::add_uarraytot(const typena
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int iatom, int jnbor,
|
||||
const real_type& x, const real_type& y, const real_type& z, const real_type& z0, const real_type& r)
|
||||
const real_type& x, const real_type& y, const real_type& z, const real_type& z0, const real_type& r) const
|
||||
{
|
||||
real_type r0inv;
|
||||
real_type a_r, b_r, a_i, b_i;
|
||||
@ -1728,7 +1728,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_duarray_cpu(const
|
||||
const real_type& x, const real_type& y, const real_type& z,
|
||||
const real_type& z0, const real_type& r, const real_type& dz0dr,
|
||||
const real_type& wj, const real_type& rcut,
|
||||
const real_type& sinner, const real_type& dinner)
|
||||
const real_type& sinner, const real_type& dinner) const
|
||||
{
|
||||
real_type r0inv;
|
||||
real_type a_r, a_i, b_r, b_i;
|
||||
@ -2206,7 +2206,7 @@ int SNAKokkos<DeviceType, real_type, vector_length>::compute_ncoeff()
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_type r, real_type rcut, real_type sinner, real_type dinner)
|
||||
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_type r, real_type rcut, real_type sinner, real_type dinner) const
|
||||
{
|
||||
real_type sfac_outer;
|
||||
constexpr real_type one = static_cast<real_type>(1.0);
|
||||
@ -2239,7 +2239,7 @@ real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_sfac(real_typ
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_type r, real_type rcut, real_type sinner, real_type dinner)
|
||||
real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_type r, real_type rcut, real_type sinner, real_type dinner) const
|
||||
{
|
||||
real_type sfac_outer, dsfac_outer, sfac_inner, dsfac_inner;
|
||||
constexpr real_type one = static_cast<real_type>(1.0);
|
||||
@ -2287,7 +2287,7 @@ real_type SNAKokkos<DeviceType, real_type, vector_length>::compute_dsfac(real_ty
|
||||
|
||||
template<class DeviceType, typename real_type, int vector_length>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real_type r, const real_type rcut, const real_type sinner, const real_type dinner, real_type& sfac, real_type& dsfac) {
|
||||
void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real_type r, const real_type rcut, const real_type sinner, const real_type dinner, real_type& sfac, real_type& dsfac) const {
|
||||
|
||||
real_type sfac_outer, dsfac_outer, sfac_inner, dsfac_inner;
|
||||
constexpr real_type one = static_cast<real_type>(1.0);
|
||||
|
||||
Reference in New Issue
Block a user