Unify the CPU and GPU PreUi routines

This commit is contained in:
Evan Weinberg
2024-11-20 10:20:42 -08:00
parent 98b67b8ea0
commit 6e54d9326b
4 changed files with 14 additions and 41 deletions

View File

@ -36,22 +36,21 @@ PairStyle(snap/kk/host,PairSNAPKokkosDevice<LMPHostType>);
namespace LAMMPS_NS {
// Routines for both the CPU and GPU backend
struct TagPairSNAPPreUi{};
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
struct TagPairSNAPComputeZi{};
struct TagPairSNAPComputeBi{};
struct TagPairSNAPBeta{};
struct TagPairSNAPComputeYi{};
struct TagPairSNAPComputeYiWithZlist{};
struct TagPairSNAPBeta{};
template<int NEIGHFLAG, int EVFLAG>
struct TagPairSNAPComputeForce{};
// GPU backend only
struct TagPairSNAPComputeNeigh{};
struct TagPairSNAPComputeCayleyKlein{};
struct TagPairSNAPPreUi{};
struct TagPairSNAPComputeUiSmall{}; // more parallelism, more divergence
struct TagPairSNAPComputeUiLarge{}; // less parallelism, no divergence
struct TagPairSNAPComputeBi{};
template<int dir>
struct TagPairSNAPComputeFusedDeidrjSmall{}; // more parallelism, more divergence
template<int dir>
@ -59,10 +58,7 @@ struct TagPairSNAPComputeFusedDeidrjLarge{}; // less parallelism, no divergence
// CPU backend only
struct TagPairSNAPComputeNeighCPU{};
struct TagPairSNAPPreUiCPU{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPComputeBiCPU{};
struct TagPairSNAPComputeYiCPU{};
struct TagPairSNAPComputeDuidrjCPU{};
struct TagPairSNAPComputeDeidrjCPU{};
@ -202,7 +198,7 @@ class PairSNAPKokkos : public PairSNAP {
void operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeighCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUiCPU, const int& iatom) const;
void operator() (TagPairSNAPPreUi, const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeUiCPU, const int& ii) const;
@ -214,7 +210,7 @@ class PairSNAPKokkos : public PairSNAP {
void operator() (TagPairSNAPComputeZi, const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBiCPU, const int& ii) const;
void operator() (TagPairSNAPComputeBi, const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYi, const int& ii) const;

View File

@ -235,7 +235,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::compute(int eflag_in,
//PreUi
{
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPPreUiCPU> policy_preui_cpu(0, chunk_size);
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPPreUi> policy_preui_cpu(0, chunk_size * (twojmax + 1));
Kokkos::parallel_for("PreUiCPU",policy_preui_cpu,*this);
}
@ -263,7 +263,7 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::compute(int eflag_in,
//ComputeBi
int idxb_max = snaKK.idxb_max;
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeBiCPU> policy_bi_cpu(0, chunk_size * idxb_max);
typename Kokkos::RangePolicy<DeviceType,TagPairSNAPComputeBi> policy_bi_cpu(0, chunk_size * idxb_max);
Kokkos::parallel_for("ComputeBiCPU",policy_bi_cpu,*this);
//Compute beta = dE_i/dB_i for all i in list
@ -807,11 +807,14 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUiCPU, const int& iatom) const {
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPPreUi, const int& ii) const {
const int iatom = ii / (twojmax+1);
const int j = ii % (twojmax+1);
const int itype = type(iatom);
const int ielem = d_map[itype];
snaKK.pre_ui_cpu(iatom, ielem);
snaKK.pre_ui(iatom, j, ielem);
}
/* ----------------------------------------------------------------------
@ -944,12 +947,12 @@ void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSN
if (iatom >= chunk_size) return;
if (jjb >= snaKK.idxb_max) return;
snaKK.compute_bi(iatom,jjb);
snaKK.compute_bi(iatom, jjb);
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBiCPU, const int& ii) const {
void PairSNAPKokkos<DeviceType, real_type, vector_length>::operator() (TagPairSNAPComputeBi, const int& ii) const {
const int iatom = ii / snaKK.idxb_max;
const int jjb = ii % snaKK.idxb_max;
snaKK.compute_bi(iatom, jjb);

View File

@ -241,8 +241,6 @@ class SNAKokkos {
// functions for bispectrum coefficients, CPU only
KOKKOS_INLINE_FUNCTION
void pre_ui_cpu(const int&, const int&) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_ui_cpu(const int&, const int&) const; // ForceSNAP
// functions for derivatives, CPU only

View File

@ -466,30 +466,6 @@ void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui(const int& iatom, c
}
}
template<class DeviceType, typename real_type, int vector_length>
KOKKOS_INLINE_FUNCTION
void SNAKokkos<DeviceType, real_type, vector_length>::pre_ui_cpu(const int& iatom, const int& ielem) const
{
for (int jelem = 0; jelem < nelements; jelem++) {
for (int j = 0; j <= twojmax; j++) {
int jju = idxu_half_block(j); // removed "const" to work around GCC 7 bug
// Only diagonal elements get initialized
for (int m = 0; m < (j+1)*(j/2+1); m++) {
const int jjup = jju + m;
// if m is on the "diagonal", initialize it with the self energy.
// Otherwise zero it out
real_type init = 0;
if (m % (j+2) == 0 && (!chem_flag || ielem == jelem || wselfall_flag)) { init = wself; } //need to map iatom to element
ulisttot_re(iatom, jelem, jjup) = init;
ulisttot_im(iatom, jelem, jjup) = 0;
};
}
}
}
/* ----------------------------------------------------------------------
compute Ui by computing Wigner U-functions for one neighbor and
accumulating to the total. GPU only.