Merge branch 'develop' into pair_d3

This commit is contained in:
Axel Kohlmeyer
2024-12-17 21:22:05 -05:00
4 changed files with 1672 additions and 1889 deletions

View File

@ -30,29 +30,34 @@ PairStyle(snap/kk/host,PairSNAPKokkosDevice<LMPHostType>);
#include "pair_snap.h"
#include "kokkos_type.h"
#include "neigh_list_kokkos.h"
#include "sna_kokkos.h"
#include "pair_kokkos.h"
namespace LAMMPS_NS {
// pre-declare so sna_kokkos.h can refer to it
template<class DeviceType, typename real_type_, int vector_length_> class PairSNAPKokkos;
};
#include "sna_kokkos.h"
namespace LAMMPS_NS {
// Routines for both the CPU and GPU backend
struct TagPairSNAPPreUi{};
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
template <bool chemsnap> struct TagPairSNAPComputeZi{};
template <bool chemsnap> struct TagPairSNAPComputeBi{};
struct TagPairSNAPComputeBetaLinear{};
struct TagPairSNAPComputeBetaQuadratic{};
template <bool chemsnap> struct TagPairSNAPComputeYi{};
template <bool chemsnap> struct TagPairSNAPComputeYiWithZlist{};
template<int NEIGHFLAG, int EVFLAG>
struct TagPairSNAPComputeForce{};
// GPU backend only
struct TagPairSNAPComputeNeigh{};
struct TagPairSNAPComputeCayleyKlein{};
struct TagPairSNAPPreUi{};
struct TagPairSNAPComputeUiSmall{}; // more parallelism, more divergence
struct TagPairSNAPComputeUiLarge{}; // less parallelism, no divergence
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
struct TagPairSNAPComputeZi{};
struct TagPairSNAPBeta{};
struct TagPairSNAPComputeBi{};
struct TagPairSNAPTransformBi{}; // re-order blist from AoSoA to AoS
struct TagPairSNAPComputeYi{};
struct TagPairSNAPComputeYiWithZlist{};
template<int dir>
struct TagPairSNAPComputeFusedDeidrjSmall{}; // more parallelism, more divergence
template<int dir>
@ -60,14 +65,7 @@ struct TagPairSNAPComputeFusedDeidrjLarge{}; // less parallelism, no divergence
// CPU backend only
struct TagPairSNAPComputeNeighCPU{};
struct TagPairSNAPPreUiCPU{};
struct TagPairSNAPComputeUiCPU{};
struct TagPairSNAPTransformUiCPU{};
struct TagPairSNAPComputeZiCPU{};
struct TagPairSNAPBetaCPU{};
struct TagPairSNAPComputeBiCPU{};
struct TagPairSNAPZeroYiCPU{};
struct TagPairSNAPComputeYiCPU{};
struct TagPairSNAPComputeDuidrjCPU{};
struct TagPairSNAPComputeDeidrjCPU{};
@ -80,6 +78,8 @@ class PairSNAPKokkos : public PairSNAP {
typedef ArrayTypes<DeviceType> AT;
typedef EV_FLOAT value_type;
static constexpr LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
static constexpr int host_flag = (execution_space == LAMMPS_NS::Host);
static constexpr int vector_length = vector_length_;
using real_type = real_type_;
using complex = SNAComplex<real_type>;
@ -93,9 +93,11 @@ class PairSNAPKokkos : public PairSNAP {
static constexpr int team_size_compute_ui = 2;
static constexpr int tile_size_transform_ui = 2;
static constexpr int tile_size_compute_zi = 2;
static constexpr int min_blocks_compute_zi = 0; // no minimum bound
static constexpr int tile_size_compute_bi = 2;
static constexpr int tile_size_transform_bi = 2;
static constexpr int tile_size_compute_beta = 2;
static constexpr int tile_size_compute_yi = 2;
static constexpr int min_blocks_compute_yi = 0; // no minimum bound
static constexpr int team_size_compute_fused_deidrj = 2;
#elif defined(KOKKOS_ENABLE_SYCL)
static constexpr int team_size_compute_neigh = 4;
@ -104,9 +106,11 @@ class PairSNAPKokkos : public PairSNAP {
static constexpr int team_size_compute_ui = 8;
static constexpr int tile_size_transform_ui = 8;
static constexpr int tile_size_compute_zi = 4;
static constexpr int min_blocks_compute_zi = 0; // no minimum bound
static constexpr int tile_size_compute_bi = 4;
static constexpr int tile_size_transform_bi = 4;
static constexpr int tile_size_compute_beta = 8;
static constexpr int tile_size_compute_yi = 8;
static constexpr int min_blocks_compute_yi = 0; // no minimum bound
static constexpr int team_size_compute_fused_deidrj = 4;
#else
static constexpr int team_size_compute_neigh = 4;
@ -116,17 +120,21 @@ class PairSNAPKokkos : public PairSNAP {
static constexpr int tile_size_transform_ui = 4;
static constexpr int tile_size_compute_zi = 8;
static constexpr int tile_size_compute_bi = 4;
static constexpr int tile_size_transform_bi = 4;
static constexpr int tile_size_compute_beta = 4;
static constexpr int tile_size_compute_yi = 8;
static constexpr int team_size_compute_fused_deidrj = sizeof(real_type) == 4 ? 4 : 2;
// this empirically reduces perf fluctuations from compiler version to compiler version
static constexpr int min_blocks_compute_zi = 4;
static constexpr int min_blocks_compute_yi = 4;
#endif
// Custom MDRangePolicy, Rank3, to reduce verbosity of kernel launches
// This hides the Kokkos::IndexType<int> and Kokkos::Rank<3...>
// and reduces the verbosity of the LaunchBound by hiding the explicit
// multiplication by vector_length
template <class Device, int num_tiles, class TagPairSNAP>
using Snap3DRangePolicy = typename Kokkos::MDRangePolicy<Device, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, Kokkos::LaunchBounds<vector_length * num_tiles>, TagPairSNAP>;
template <class Device, int num_tiles, class TagPairSNAP, int min_blocks = 0>
using Snap3DRangePolicy = typename Kokkos::MDRangePolicy<Device, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, Kokkos::LaunchBounds<vector_length * num_tiles, min_blocks>, TagPairSNAP>;
// Custom SnapAoSoATeamPolicy to reduce the verbosity of kernel launches
// This hides the LaunchBounds abstraction by hiding the explicit
@ -134,6 +142,29 @@ class PairSNAPKokkos : public PairSNAP {
template <class Device, int num_teams, class TagPairSNAP>
using SnapAoSoATeamPolicy = typename Kokkos::TeamPolicy<Device, Kokkos::LaunchBounds<vector_length * num_teams>, TagPairSNAP>;
// Custom MDRangePolicy, Rank2, on the host, to reduce verbosity of kernel launches. The striding of this launch is intentionally
// different from the tiled 3D range policy on the device.
template <class Device, class TagPairSNAP>
using Snap2DHostRangePolicy = typename Kokkos::MDRangePolicy<Device, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<int>, Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, TagPairSNAP>;
// Custom RangePolicy, Rank2, on the host, to reduce verbosity of kernel launches
template <class Device, class TagPairSNAP>
using Snap1DHostRangePolicy = typename Kokkos::RangePolicy<Device, Kokkos::Schedule<Kokkos::Dynamic>, TagPairSNAP>;
// Helper routine that returns a CPU or a GPU policy as appropriate
template <class Device, int num_tiles, class TagPairSNAP, int min_blocks = 0>
auto snap_get_policy(const int& chunk_size_div, const int& second_loop) {
if constexpr (host_flag) {
return Snap1DHostRangePolicy<Device, TagPairSNAP>(0, chunk_size_div * vector_length);
// the 2-d policy is still correct but it has atomics so it's slower on the CPU
//return Snap2DHostRangePolicy<Device, TagPairSNAP>({0, 0}, {chunk_size_div * vector_length, second_loop});
} else
return Snap3DRangePolicy<Device, num_tiles, TagPairSNAP, min_blocks>({0, 0, 0},
{vector_length, second_loop, chunk_size_div},
{vector_length, num_tiles, 1});
}
PairSNAPKokkos(class LAMMPS *);
~PairSNAPKokkos() override;
@ -149,6 +180,7 @@ class PairSNAPKokkos : public PairSNAP {
template<class TagStyle>
void check_team_size_reduce(int, int&);
// CPU and GPU backend
template<int NEIGHFLAG, int EVFLAG>
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const int& ii) const;
@ -157,18 +189,23 @@ class PairSNAPKokkos : public PairSNAP {
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const int& ii, EV_FLOAT&) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPBetaCPU,const int& ii) const;
// GPU backend only
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
// GPU backend only
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeCayleyKlein, const int iatom_mod, const int jnbor, const int iatom_div) const;
// CPU and GPU
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUi,const int iatom_mod, const int j, const int iatom_div) const;
void operator() (TagPairSNAPPreUi, const int& iatom_mod, const int& j, const int& iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUi, const int& iatom, const int& j) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUi, const int& iatom) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeUiSmall,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiSmall>::member_type& team) const;
@ -177,25 +214,67 @@ class PairSNAPKokkos : public PairSNAP {
void operator() (TagPairSNAPComputeUiLarge,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiLarge>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPTransformUi,const int iatom_mod, const int j, const int iatom_div) const;
void operator() (TagPairSNAPTransformUi, const int& iatom_mod, const int& idxu, const int& iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeZi,const int iatom_mod, const int idxz, const int iatom_div) const;
void operator() (TagPairSNAPTransformUi, const int& iatom, const int& idxu) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPBeta, const int& ii) const;
void operator() (TagPairSNAPTransformUi, const int& iatom) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeZi<chemsnap>, const int& iatom_mod, const int& idxz, const int& iatom_div) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeZi<chemsnap>, const int& iatom, const int& idxz) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeZi<chemsnap>, const int& iatom) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBi<chemsnap>, const int& iatom_mod, const int& idxb, const int& iatom_div) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBi<chemsnap>, const int& iatom, const int& idxb) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBi<chemsnap>, const int& iatom) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBi,const int iatom_mod, const int idxb, const int iatom_div) const;
void operator() (TagPairSNAPComputeBetaLinear, const int& iatom_mod, const int& idxb, const int& iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const;
void operator() (TagPairSNAPComputeBetaLinear, const int& iatom, const int& idxb) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYi,const int iatom_mod, const int idxz, const int iatom_div) const;
void operator() (TagPairSNAPComputeBetaLinear, const int& iatom) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYiWithZlist,const int iatom_mod, const int idxz, const int iatom_div) const;
void operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom_mod, const int& idxb, const int& iatom_div) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom, const int& idxb) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYi<chemsnap>, const int& iatom_mod, const int& idxz, const int& iatom_div) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYi<chemsnap>, const int& iatom, const int& idxz) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYi<chemsnap>, const int& iatom) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYiWithZlist<chemsnap>, const int& iatom_mod, const int& idxz, const int& iatom_div) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYiWithZlist<chemsnap>, const int& iatom, const int& idxz) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYiWithZlist<chemsnap>, const int& iatom) const;
template<int dir>
KOKKOS_INLINE_FUNCTION
@ -210,28 +289,22 @@ class PairSNAPKokkos : public PairSNAP {
void operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeighCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUiCPU>::member_type& team) const;
void operator() (TagPairSNAPComputeUiCPU, const int& iatom, const int& jnbor) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiCPU>::member_type& team) const;
void operator() (TagPairSNAPComputeUiCPU, const int& iatom) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const;
void operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom, const int& jnbor) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeZiCPU,const int& ii) const;
void operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBiCPU>::member_type& team) const;
void operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom, const int& jnbor) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeYiCPU,const int& ii) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrjCPU>::member_type& team) const;
KOKKOS_INLINE_FUNCTION
void operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrjCPU>::member_type& team) const;
void operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
@ -252,7 +325,7 @@ class PairSNAPKokkos : public PairSNAP {
SNAKokkos<DeviceType, real_type, vector_length> snaKK;
int inum,max_neighs,chunk_size,chunk_offset;
int host_flag,neighflag;
int neighflag;
int eflag,vflag;
@ -260,13 +333,12 @@ class PairSNAPKokkos : public PairSNAP {
Kokkos::View<real_type*, DeviceType> d_radelem; // element radii
Kokkos::View<real_type*, DeviceType> d_wjelem; // elements weights
Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
typename SNAKokkos<DeviceType, real_type, vector_length>::t_sna_2d_lr d_coeffelem; // element bispectrum coefficients
Kokkos::View<real_type*, DeviceType> d_sinnerelem; // element inner cutoff midpoint
Kokkos::View<real_type*, DeviceType> d_dinnerelem; // element inner cutoff half-width
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
Kokkos::View<real_type**, DeviceType> d_beta; // betas for all atoms in list
Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
typename SNAKokkos<DeviceType, real_type, vector_length>::t_sna_2d d_beta; // betas for all atoms in list
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
tdual_fparams k_cutsq;
@ -301,6 +373,9 @@ class PairSNAPKokkos : public PairSNAP {
template <typename scratch_type>
int scratch_size_helper(int values_per_team);
// Make SNAKokkos a friend
friend class SNAKokkos<DeviceType, real_type, vector_length>;
};

File diff suppressed because it is too large Load Diff

View File

@ -134,6 +134,8 @@ class SNAKokkos {
static constexpr int vector_length = vector_length_;
using KKDeviceType = typename KKDevice<DeviceType>::value;
static constexpr LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
static constexpr int host_flag = (execution_space == LAMMPS_NS::Host);
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
typedef Kokkos::View<real_type*, DeviceType> t_sna_1d;
@ -141,6 +143,7 @@ class SNAKokkos {
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
typedef Kokkos::View<real_type**, DeviceType> t_sna_2d;
typedef Kokkos::View<real_type**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
typedef Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> t_sna_2d_lr;
typedef Kokkos::View<real_type***, DeviceType> t_sna_3d;
typedef Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
typedef Kokkos::View<real_type***[3], DeviceType> t_sna_4d;
@ -156,7 +159,7 @@ class SNAKokkos {
typedef Kokkos::View<complex***, DeviceType> t_sna_3c;
typedef Kokkos::View<complex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll;
typedef Kokkos::View<complex***[3], DeviceType> t_sna_4c;
typedef Kokkos::View<complex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll;
typedef Kokkos::View<complex***[3], DeviceType> t_sna_4c3;
typedef Kokkos::View<complex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
typedef Kokkos::View<complex**[3], DeviceType> t_sna_3c3;
typedef Kokkos::View<complex*****, DeviceType> t_sna_5c;
@ -168,7 +171,8 @@ class SNAKokkos {
SNAKokkos(const SNAKokkos<DeviceType,real_type,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
inline
SNAKokkos(real_type, int, real_type, int, int, int, int, int, int, int);
//SNAKokkos(real_type, int, real_type, int, int, int, int, int, int, int);
SNAKokkos(const PairSNAPKokkos<DeviceType, real_type, vector_length>&);
KOKKOS_INLINE_FUNCTION
~SNAKokkos();
@ -182,88 +186,87 @@ class SNAKokkos {
double memory_usage();
int ncoeff;
int host_flag;
// functions for bispectrum coefficients, GPU only
KOKKOS_INLINE_FUNCTION
void compute_cayley_klein(const int&, const int&, const int&);
void compute_cayley_klein(const int&, const int&) const;
KOKKOS_INLINE_FUNCTION
void pre_ui(const int&, const int&, const int&, const int&); // ForceSNAP
void pre_ui(const int&, const int&, const int&) const; // ForceSNAP
// version of the code with parallelism over j_bend
KOKKOS_INLINE_FUNCTION
void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); // ForceSNAP
void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int) const; // ForceSNAP
// version of the code without parallelism over j_bend
KOKKOS_INLINE_FUNCTION
void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); // ForceSNAP
void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int) const; // ForceSNAP
// desymmetrize ulisttot
KOKKOS_INLINE_FUNCTION
void compute_zi(const int&, const int&, const int&); // ForceSNAP
void transform_ui(const int&, const int&) const;
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void compute_zi(const int&, const int&) const; // ForceSNAP
template <bool chemsnap, bool need_atomics> KOKKOS_INLINE_FUNCTION
void compute_yi(const int&, const int&) const; // ForceSNAP
template <bool chemsnap, bool need_atomics> KOKKOS_INLINE_FUNCTION
void compute_yi_with_zlist(const int&, const int&) const; // ForceSNAP
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
void compute_bi(const int&, const int&) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi(int,int,int,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi_with_zlist(int,int,int,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_bi(const int&, const int&, const int&); // ForceSNAP
void compute_beta_linear(const int&, const int&, const int&) const;
template <bool need_atomics> KOKKOS_INLINE_FUNCTION
void compute_beta_quadratic(const int&, const int&, const int&) const;
// functions for derivatives, GPU only
// version of the code with parallelism over j_bend
template<int dir>
KOKKOS_INLINE_FUNCTION
void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); //ForceSNAP
void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int) const; //ForceSNAP
// version of the code without parallelism over j_bend
template<int dir>
KOKKOS_INLINE_FUNCTION
void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); //ForceSNAP
void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int) const; //ForceSNAP
// core "evaluation" functions that get plugged into "compute" functions
// plugged into compute_ui_small, compute_ui_large
KOKKOS_FORCEINLINE_FUNCTION
void evaluate_ui_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&, const int&,
const int&, const int&, const int&);
const int&, const int&) const;
// plugged into compute_zi, compute_yi
KOKKOS_FORCEINLINE_FUNCTION
complex evaluate_zi(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&,
const int&, const int&, const int&, const int&, const real_type*);
// plugged into compute_yi, compute_yi_with_zlist
const int&, const int&, const int&, const real_type*) const;
// plugged into compute_bi
KOKKOS_FORCEINLINE_FUNCTION
real_type evaluate_beta_scaled(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&,
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &);
real_type evaluate_bi(const int&, const int&, const int&, const int&,
const int&, const int&, const int&) const;
// plugged into compute_yi, compute_yi_with_zlist
template <bool chemsnap> KOKKOS_FORCEINLINE_FUNCTION
real_type evaluate_beta_scaled(const int&, const int&, const int&, const int&, const int&, const int&, const int&) const;
// plugged into compute_fused_deidrj_small, compute_fused_deidrj_large
KOKKOS_FORCEINLINE_FUNCTION
real_type evaluate_duidrj_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&,
const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&,
const int&, const int&, const int&, const int&);
const int&, const int&, const int&) const;
// functions for bispectrum coefficients, CPU only
KOKKOS_INLINE_FUNCTION
void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_zi_cpu(const int&); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_yi_cpu(int,
const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
template <bool need_atomics> KOKKOS_INLINE_FUNCTION
void compute_ui_cpu(const int&, const int&) const; // ForceSNAP
// functions for derivatives, CPU only
KOKKOS_INLINE_FUNCTION
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
void compute_duidrj_cpu(const int&, const int&) const; //ForceSNAP
KOKKOS_INLINE_FUNCTION
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
void compute_deidrj_cpu(const int&, const int&) const; // ForceSNAP
KOKKOS_INLINE_FUNCTION
real_type compute_sfac(real_type, real_type, real_type, real_type); // add_uarraytot, compute_duarray
real_type compute_sfac(real_type, real_type, real_type, real_type) const; // add_uarraytot, compute_duarray
KOKKOS_INLINE_FUNCTION
real_type compute_dsfac(real_type, real_type, real_type, real_type); // compute_duarray
real_type compute_dsfac(real_type, real_type, real_type, real_type) const; // compute_duarray
KOKKOS_INLINE_FUNCTION
void compute_s_dsfac(const real_type, const real_type, const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
void compute_s_dsfac(const real_type, const real_type, const real_type, const real_type, real_type&, real_type&) const; // compute_cayley_klein
#ifdef TIMING_INFO
double* timers;
@ -283,37 +286,41 @@ class SNAKokkos {
t_sna_2d dinnerij;
t_sna_2i element;
t_sna_3d dedr;
int natom, nmax;
int natom, natom_pad, nmax;
void grow_rij(int, int);
int twojmax, diagonalstyle;
// Input beta coefficients; aliases the object in PairSnapKokkos
t_sna_2d_lr d_coeffelem;
// Beta for all atoms in list; aliases the object in PairSnapKokkos
// for qSNAP the quadratic terms get accumulated into it
// in compute_bi
t_sna_2d d_beta;
// Structures for both the CPU, GPU backend
t_sna_3d ulisttot_re;
t_sna_3d ulisttot_im;
t_sna_3c ulisttot; // un-folded ulisttot
t_sna_3c zlist;
t_sna_3d blist;
t_sna_3c_ll ulisttot;
t_sna_3c_ll ulisttot_full; // un-folded ulisttot, cpu only
t_sna_3c_ll zlist;
t_sna_3c_ll ulist;
t_sna_3c_ll ylist;
t_sna_3d ylist_re;
t_sna_3d ylist_im;
// derivatives of data
t_sna_4c3_ll dulist;
// Structures for the CPU backend only
t_sna_3c ulist_cpu;
t_sna_4c3 dulist_cpu;
// Modified structures for GPU backend
t_sna_3c_ll a_pack; // Cayley-Klein `a`
t_sna_3c_ll b_pack; // `b`
t_sna_4c_ll da_pack; // `da`
t_sna_4c_ll db_pack; // `db`
t_sna_4d_ll sfac_pack; // sfac, dsfac_{x,y,z}
t_sna_4d_ll ulisttot_re_pack; // split real,
t_sna_4d_ll ulisttot_im_pack; // imag, AoSoA, flattened
t_sna_4c_ll ulisttot_pack; // AoSoA layout
t_sna_4c_ll zlist_pack; // AoSoA layout
t_sna_4d_ll blist_pack;
t_sna_4d_ll ylist_pack_re; // split real,
t_sna_4d_ll ylist_pack_im; // imag AoSoA layout
t_sna_2c a_gpu; // Cayley-Klein `a`
t_sna_2c b_gpu; // `b`
t_sna_3c da_gpu; // `da`
t_sna_3c db_gpu; // `db`
t_sna_3d sfac_gpu; // sfac, dsfac_{x,y,z}
int idxcg_max, idxu_max, idxu_half_max, idxu_cache_max, idxz_max, idxb_max;
@ -363,25 +370,11 @@ class SNAKokkos {
inline
void init_rootpqarray(); // init()
KOKKOS_INLINE_FUNCTION
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, const real_type&, const real_type&, int); // compute_ui
KOKKOS_INLINE_FUNCTION
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
const real_type&, const real_type&, const real_type&,
const real_type&, const real_type&); // compute_ui_cpu
inline
double deltacg(int, int, int); // init_clebsch_gordan
inline
int compute_ncoeff(); // SNAKokkos()
KOKKOS_INLINE_FUNCTION
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
const real_type&, const real_type&, const real_type&, const real_type&, const real_type&,
const real_type&, const real_type&);
// Sets the style for the switching function
// 0 = none
@ -401,6 +394,9 @@ class SNAKokkos {
real_type wself;
int wselfall_flag;
// quadratic flag
int quadratic_flag;
int bzero_flag; // 1 if bzero subtracted from barray
Kokkos::View<real_type*, DeviceType> bzero; // array of B values for isolated atoms
};
@ -409,4 +405,3 @@ class SNAKokkos {
#include "sna_kokkos_impl.h"
#endif

File diff suppressed because it is too large Load Diff