Merge branch 'develop' into pair_d3
This commit is contained in:
@ -30,29 +30,34 @@ PairStyle(snap/kk/host,PairSNAPKokkosDevice<LMPHostType>);
|
||||
#include "pair_snap.h"
|
||||
#include "kokkos_type.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "sna_kokkos.h"
|
||||
#include "pair_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
// pre-declare so sna_kokkos.h can refer to it
|
||||
template<class DeviceType, typename real_type_, int vector_length_> class PairSNAPKokkos;
|
||||
};
|
||||
|
||||
#include "sna_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// Routines for both the CPU and GPU backend
|
||||
struct TagPairSNAPPreUi{};
|
||||
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
|
||||
template <bool chemsnap> struct TagPairSNAPComputeZi{};
|
||||
template <bool chemsnap> struct TagPairSNAPComputeBi{};
|
||||
struct TagPairSNAPComputeBetaLinear{};
|
||||
struct TagPairSNAPComputeBetaQuadratic{};
|
||||
template <bool chemsnap> struct TagPairSNAPComputeYi{};
|
||||
template <bool chemsnap> struct TagPairSNAPComputeYiWithZlist{};
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
struct TagPairSNAPComputeForce{};
|
||||
|
||||
|
||||
// GPU backend only
|
||||
struct TagPairSNAPComputeNeigh{};
|
||||
struct TagPairSNAPComputeCayleyKlein{};
|
||||
struct TagPairSNAPPreUi{};
|
||||
struct TagPairSNAPComputeUiSmall{}; // more parallelism, more divergence
|
||||
struct TagPairSNAPComputeUiLarge{}; // less parallelism, no divergence
|
||||
struct TagPairSNAPTransformUi{}; // re-order ulisttot from SoA to AoSoA, zero ylist
|
||||
struct TagPairSNAPComputeZi{};
|
||||
struct TagPairSNAPBeta{};
|
||||
struct TagPairSNAPComputeBi{};
|
||||
struct TagPairSNAPTransformBi{}; // re-order blist from AoSoA to AoS
|
||||
struct TagPairSNAPComputeYi{};
|
||||
struct TagPairSNAPComputeYiWithZlist{};
|
||||
template<int dir>
|
||||
struct TagPairSNAPComputeFusedDeidrjSmall{}; // more parallelism, more divergence
|
||||
template<int dir>
|
||||
@ -60,14 +65,7 @@ struct TagPairSNAPComputeFusedDeidrjLarge{}; // less parallelism, no divergence
|
||||
|
||||
// CPU backend only
|
||||
struct TagPairSNAPComputeNeighCPU{};
|
||||
struct TagPairSNAPPreUiCPU{};
|
||||
struct TagPairSNAPComputeUiCPU{};
|
||||
struct TagPairSNAPTransformUiCPU{};
|
||||
struct TagPairSNAPComputeZiCPU{};
|
||||
struct TagPairSNAPBetaCPU{};
|
||||
struct TagPairSNAPComputeBiCPU{};
|
||||
struct TagPairSNAPZeroYiCPU{};
|
||||
struct TagPairSNAPComputeYiCPU{};
|
||||
struct TagPairSNAPComputeDuidrjCPU{};
|
||||
struct TagPairSNAPComputeDeidrjCPU{};
|
||||
|
||||
@ -80,6 +78,8 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
static constexpr LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
static constexpr int host_flag = (execution_space == LAMMPS_NS::Host);
|
||||
static constexpr int vector_length = vector_length_;
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
@ -93,9 +93,11 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
static constexpr int team_size_compute_ui = 2;
|
||||
static constexpr int tile_size_transform_ui = 2;
|
||||
static constexpr int tile_size_compute_zi = 2;
|
||||
static constexpr int min_blocks_compute_zi = 0; // no minimum bound
|
||||
static constexpr int tile_size_compute_bi = 2;
|
||||
static constexpr int tile_size_transform_bi = 2;
|
||||
static constexpr int tile_size_compute_beta = 2;
|
||||
static constexpr int tile_size_compute_yi = 2;
|
||||
static constexpr int min_blocks_compute_yi = 0; // no minimum bound
|
||||
static constexpr int team_size_compute_fused_deidrj = 2;
|
||||
#elif defined(KOKKOS_ENABLE_SYCL)
|
||||
static constexpr int team_size_compute_neigh = 4;
|
||||
@ -104,9 +106,11 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
static constexpr int team_size_compute_ui = 8;
|
||||
static constexpr int tile_size_transform_ui = 8;
|
||||
static constexpr int tile_size_compute_zi = 4;
|
||||
static constexpr int min_blocks_compute_zi = 0; // no minimum bound
|
||||
static constexpr int tile_size_compute_bi = 4;
|
||||
static constexpr int tile_size_transform_bi = 4;
|
||||
static constexpr int tile_size_compute_beta = 8;
|
||||
static constexpr int tile_size_compute_yi = 8;
|
||||
static constexpr int min_blocks_compute_yi = 0; // no minimum bound
|
||||
static constexpr int team_size_compute_fused_deidrj = 4;
|
||||
#else
|
||||
static constexpr int team_size_compute_neigh = 4;
|
||||
@ -116,17 +120,21 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
static constexpr int tile_size_transform_ui = 4;
|
||||
static constexpr int tile_size_compute_zi = 8;
|
||||
static constexpr int tile_size_compute_bi = 4;
|
||||
static constexpr int tile_size_transform_bi = 4;
|
||||
static constexpr int tile_size_compute_beta = 4;
|
||||
static constexpr int tile_size_compute_yi = 8;
|
||||
static constexpr int team_size_compute_fused_deidrj = sizeof(real_type) == 4 ? 4 : 2;
|
||||
|
||||
// this empirically reduces perf fluctuations from compiler version to compiler version
|
||||
static constexpr int min_blocks_compute_zi = 4;
|
||||
static constexpr int min_blocks_compute_yi = 4;
|
||||
#endif
|
||||
|
||||
// Custom MDRangePolicy, Rank3, to reduce verbosity of kernel launches
|
||||
// This hides the Kokkos::IndexType<int> and Kokkos::Rank<3...>
|
||||
// and reduces the verbosity of the LaunchBound by hiding the explicit
|
||||
// multiplication by vector_length
|
||||
template <class Device, int num_tiles, class TagPairSNAP>
|
||||
using Snap3DRangePolicy = typename Kokkos::MDRangePolicy<Device, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, Kokkos::LaunchBounds<vector_length * num_tiles>, TagPairSNAP>;
|
||||
template <class Device, int num_tiles, class TagPairSNAP, int min_blocks = 0>
|
||||
using Snap3DRangePolicy = typename Kokkos::MDRangePolicy<Device, Kokkos::IndexType<int>, Kokkos::Rank<3, Kokkos::Iterate::Left, Kokkos::Iterate::Left>, Kokkos::LaunchBounds<vector_length * num_tiles, min_blocks>, TagPairSNAP>;
|
||||
|
||||
// Custom SnapAoSoATeamPolicy to reduce the verbosity of kernel launches
|
||||
// This hides the LaunchBounds abstraction by hiding the explicit
|
||||
@ -134,6 +142,29 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
template <class Device, int num_teams, class TagPairSNAP>
|
||||
using SnapAoSoATeamPolicy = typename Kokkos::TeamPolicy<Device, Kokkos::LaunchBounds<vector_length * num_teams>, TagPairSNAP>;
|
||||
|
||||
// Custom MDRangePolicy, Rank2, on the host, to reduce verbosity of kernel launches. The striding of this launch is intentionally
|
||||
// different from the tiled 3D range policy on the device.
|
||||
template <class Device, class TagPairSNAP>
|
||||
using Snap2DHostRangePolicy = typename Kokkos::MDRangePolicy<Device, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<int>, Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Right>, TagPairSNAP>;
|
||||
|
||||
// Custom RangePolicy, Rank2, on the host, to reduce verbosity of kernel launches
|
||||
template <class Device, class TagPairSNAP>
|
||||
using Snap1DHostRangePolicy = typename Kokkos::RangePolicy<Device, Kokkos::Schedule<Kokkos::Dynamic>, TagPairSNAP>;
|
||||
|
||||
// Helper routine that returns a CPU or a GPU policy as appropriate
|
||||
template <class Device, int num_tiles, class TagPairSNAP, int min_blocks = 0>
|
||||
auto snap_get_policy(const int& chunk_size_div, const int& second_loop) {
|
||||
if constexpr (host_flag) {
|
||||
return Snap1DHostRangePolicy<Device, TagPairSNAP>(0, chunk_size_div * vector_length);
|
||||
|
||||
// the 2-d policy is still correct but it has atomics so it's slower on the CPU
|
||||
//return Snap2DHostRangePolicy<Device, TagPairSNAP>({0, 0}, {chunk_size_div * vector_length, second_loop});
|
||||
} else
|
||||
return Snap3DRangePolicy<Device, num_tiles, TagPairSNAP, min_blocks>({0, 0, 0},
|
||||
{vector_length, second_loop, chunk_size_div},
|
||||
{vector_length, num_tiles, 1});
|
||||
}
|
||||
|
||||
PairSNAPKokkos(class LAMMPS *);
|
||||
~PairSNAPKokkos() override;
|
||||
|
||||
@ -149,6 +180,7 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
template<class TagStyle>
|
||||
void check_team_size_reduce(int, int&);
|
||||
|
||||
// CPU and GPU backend
|
||||
template<int NEIGHFLAG, int EVFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const int& ii) const;
|
||||
@ -157,18 +189,23 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeForce<NEIGHFLAG,EVFLAG>,const int& ii, EV_FLOAT&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPBetaCPU,const int& ii) const;
|
||||
|
||||
// GPU backend only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeNeigh,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeigh>::member_type& team) const;
|
||||
|
||||
// GPU backend only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeCayleyKlein, const int iatom_mod, const int jnbor, const int iatom_div) const;
|
||||
|
||||
// CPU and GPU
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUi,const int iatom_mod, const int j, const int iatom_div) const;
|
||||
void operator() (TagPairSNAPPreUi, const int& iatom_mod, const int& j, const int& iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUi, const int& iatom, const int& j) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUi, const int& iatom) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeUiSmall,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiSmall>::member_type& team) const;
|
||||
@ -177,25 +214,67 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
void operator() (TagPairSNAPComputeUiLarge,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiLarge>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPTransformUi,const int iatom_mod, const int j, const int iatom_div) const;
|
||||
void operator() (TagPairSNAPTransformUi, const int& iatom_mod, const int& idxu, const int& iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeZi,const int iatom_mod, const int idxz, const int iatom_div) const;
|
||||
void operator() (TagPairSNAPTransformUi, const int& iatom, const int& idxu) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPBeta, const int& ii) const;
|
||||
void operator() (TagPairSNAPTransformUi, const int& iatom) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeZi<chemsnap>, const int& iatom_mod, const int& idxz, const int& iatom_div) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeZi<chemsnap>, const int& iatom, const int& idxz) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeZi<chemsnap>, const int& iatom) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBi<chemsnap>, const int& iatom_mod, const int& idxb, const int& iatom_div) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBi<chemsnap>, const int& iatom, const int& idxb) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBi<chemsnap>, const int& iatom) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBi,const int iatom_mod, const int idxb, const int iatom_div) const;
|
||||
void operator() (TagPairSNAPComputeBetaLinear, const int& iatom_mod, const int& idxb, const int& iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPTransformBi,const int iatom_mod, const int idxb, const int iatom_div) const;
|
||||
void operator() (TagPairSNAPComputeBetaLinear, const int& iatom, const int& idxb) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi,const int iatom_mod, const int idxz, const int iatom_div) const;
|
||||
void operator() (TagPairSNAPComputeBetaLinear, const int& iatom) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYiWithZlist,const int iatom_mod, const int idxz, const int iatom_div) const;
|
||||
void operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom_mod, const int& idxb, const int& iatom_div) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom, const int& idxb) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBetaQuadratic, const int& iatom) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi<chemsnap>, const int& iatom_mod, const int& idxz, const int& iatom_div) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi<chemsnap>, const int& iatom, const int& idxz) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYi<chemsnap>, const int& iatom) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYiWithZlist<chemsnap>, const int& iatom_mod, const int& idxz, const int& iatom_div) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYiWithZlist<chemsnap>, const int& iatom, const int& idxz) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYiWithZlist<chemsnap>, const int& iatom) const;
|
||||
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -210,28 +289,22 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
void operator() (TagPairSNAPComputeNeighCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeNeighCPU>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPPreUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPPreUiCPU>::member_type& team) const;
|
||||
void operator() (TagPairSNAPComputeUiCPU, const int& iatom, const int& jnbor) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeUiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeUiCPU>::member_type& team) const;
|
||||
void operator() (TagPairSNAPComputeUiCPU, const int& iatom) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPTransformUiCPU, const int j, const int iatom) const;
|
||||
void operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom, const int& jnbor) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeZiCPU,const int& ii) const;
|
||||
void operator() (TagPairSNAPComputeDuidrjCPU, const int& iatom) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeBiCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeBiCPU>::member_type& team) const;
|
||||
void operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom, const int& jnbor) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeYiCPU,const int& ii) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeDuidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDuidrjCPU>::member_type& team) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (TagPairSNAPComputeDeidrjCPU,const typename Kokkos::TeamPolicy<DeviceType, TagPairSNAPComputeDeidrjCPU>::member_type& team) const;
|
||||
void operator() (TagPairSNAPComputeDeidrjCPU, const int& iatom) const;
|
||||
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -252,7 +325,7 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
SNAKokkos<DeviceType, real_type, vector_length> snaKK;
|
||||
|
||||
int inum,max_neighs,chunk_size,chunk_offset;
|
||||
int host_flag,neighflag;
|
||||
int neighflag;
|
||||
|
||||
int eflag,vflag;
|
||||
|
||||
@ -260,13 +333,12 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
|
||||
Kokkos::View<real_type*, DeviceType> d_radelem; // element radii
|
||||
Kokkos::View<real_type*, DeviceType> d_wjelem; // elements weights
|
||||
Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> d_coeffelem; // element bispectrum coefficients
|
||||
typename SNAKokkos<DeviceType, real_type, vector_length>::t_sna_2d_lr d_coeffelem; // element bispectrum coefficients
|
||||
Kokkos::View<real_type*, DeviceType> d_sinnerelem; // element inner cutoff midpoint
|
||||
Kokkos::View<real_type*, DeviceType> d_dinnerelem; // element inner cutoff half-width
|
||||
Kokkos::View<T_INT*, DeviceType> d_map; // mapping from atom types to elements
|
||||
Kokkos::View<T_INT*, DeviceType> d_ninside; // ninside for all atoms in list
|
||||
Kokkos::View<real_type**, DeviceType> d_beta; // betas for all atoms in list
|
||||
Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> d_beta_pack; // betas for all atoms in list, GPU
|
||||
typename SNAKokkos<DeviceType, real_type, vector_length>::t_sna_2d d_beta; // betas for all atoms in list
|
||||
|
||||
typedef Kokkos::DualView<F_FLOAT**, DeviceType> tdual_fparams;
|
||||
tdual_fparams k_cutsq;
|
||||
@ -301,6 +373,9 @@ class PairSNAPKokkos : public PairSNAP {
|
||||
template <typename scratch_type>
|
||||
int scratch_size_helper(int values_per_team);
|
||||
|
||||
// Make SNAKokkos a friend
|
||||
friend class SNAKokkos<DeviceType, real_type, vector_length>;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -134,6 +134,8 @@ class SNAKokkos {
|
||||
static constexpr int vector_length = vector_length_;
|
||||
|
||||
using KKDeviceType = typename KKDevice<DeviceType>::value;
|
||||
static constexpr LAMMPS_NS::ExecutionSpace execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
static constexpr int host_flag = (execution_space == LAMMPS_NS::Host);
|
||||
|
||||
typedef Kokkos::View<int*, DeviceType> t_sna_1i;
|
||||
typedef Kokkos::View<real_type*, DeviceType> t_sna_1d;
|
||||
@ -141,6 +143,7 @@ class SNAKokkos {
|
||||
typedef Kokkos::View<int**, DeviceType> t_sna_2i;
|
||||
typedef Kokkos::View<real_type**, DeviceType> t_sna_2d;
|
||||
typedef Kokkos::View<real_type**, Kokkos::LayoutLeft, DeviceType> t_sna_2d_ll;
|
||||
typedef Kokkos::View<real_type**, Kokkos::LayoutRight, DeviceType> t_sna_2d_lr;
|
||||
typedef Kokkos::View<real_type***, DeviceType> t_sna_3d;
|
||||
typedef Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> t_sna_3d_ll;
|
||||
typedef Kokkos::View<real_type***[3], DeviceType> t_sna_4d;
|
||||
@ -156,7 +159,7 @@ class SNAKokkos {
|
||||
typedef Kokkos::View<complex***, DeviceType> t_sna_3c;
|
||||
typedef Kokkos::View<complex***, Kokkos::LayoutLeft, DeviceType> t_sna_3c_ll;
|
||||
typedef Kokkos::View<complex***[3], DeviceType> t_sna_4c;
|
||||
typedef Kokkos::View<complex***[3], Kokkos::LayoutLeft, DeviceType> t_sna_4c3_ll;
|
||||
typedef Kokkos::View<complex***[3], DeviceType> t_sna_4c3;
|
||||
typedef Kokkos::View<complex****, Kokkos::LayoutLeft, DeviceType> t_sna_4c_ll;
|
||||
typedef Kokkos::View<complex**[3], DeviceType> t_sna_3c3;
|
||||
typedef Kokkos::View<complex*****, DeviceType> t_sna_5c;
|
||||
@ -168,7 +171,8 @@ class SNAKokkos {
|
||||
SNAKokkos(const SNAKokkos<DeviceType,real_type,vector_length>& sna, const typename Kokkos::TeamPolicy<DeviceType>::member_type& team);
|
||||
|
||||
inline
|
||||
SNAKokkos(real_type, int, real_type, int, int, int, int, int, int, int);
|
||||
//SNAKokkos(real_type, int, real_type, int, int, int, int, int, int, int);
|
||||
SNAKokkos(const PairSNAPKokkos<DeviceType, real_type, vector_length>&);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
~SNAKokkos();
|
||||
@ -182,88 +186,87 @@ class SNAKokkos {
|
||||
double memory_usage();
|
||||
|
||||
int ncoeff;
|
||||
int host_flag;
|
||||
|
||||
// functions for bispectrum coefficients, GPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_cayley_klein(const int&, const int&, const int&);
|
||||
void compute_cayley_klein(const int&, const int&) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void pre_ui(const int&, const int&, const int&, const int&); // ForceSNAP
|
||||
void pre_ui(const int&, const int&, const int&) const; // ForceSNAP
|
||||
|
||||
// version of the code with parallelism over j_bend
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); // ForceSNAP
|
||||
void compute_ui_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int) const; // ForceSNAP
|
||||
// version of the code without parallelism over j_bend
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); // ForceSNAP
|
||||
void compute_ui_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int) const; // ForceSNAP
|
||||
|
||||
// desymmetrize ulisttot
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi(const int&, const int&, const int&); // ForceSNAP
|
||||
void transform_ui(const int&, const int&) const;
|
||||
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi(const int&, const int&) const; // ForceSNAP
|
||||
template <bool chemsnap, bool need_atomics> KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(const int&, const int&) const; // ForceSNAP
|
||||
template <bool chemsnap, bool need_atomics> KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi_with_zlist(const int&, const int&) const; // ForceSNAP
|
||||
template <bool chemsnap> KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const int&, const int&) const; // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi(int,int,int,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi_with_zlist(int,int,int,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &beta_pack); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi(const int&, const int&, const int&); // ForceSNAP
|
||||
void compute_beta_linear(const int&, const int&, const int&) const;
|
||||
template <bool need_atomics> KOKKOS_INLINE_FUNCTION
|
||||
void compute_beta_quadratic(const int&, const int&, const int&) const;
|
||||
|
||||
// functions for derivatives, GPU only
|
||||
// version of the code with parallelism over j_bend
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int); //ForceSNAP
|
||||
void compute_fused_deidrj_small(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int, const int) const; //ForceSNAP
|
||||
// version of the code without parallelism over j_bend
|
||||
template<int dir>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int); //ForceSNAP
|
||||
void compute_fused_deidrj_large(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, const int, const int, const int) const; //ForceSNAP
|
||||
|
||||
// core "evaluation" functions that get plugged into "compute" functions
|
||||
// plugged into compute_ui_small, compute_ui_large
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
void evaluate_ui_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&, const int&,
|
||||
const int&, const int&, const int&);
|
||||
const int&, const int&) const;
|
||||
// plugged into compute_zi, compute_yi
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
complex evaluate_zi(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&,
|
||||
const int&, const int&, const int&, const int&, const real_type*);
|
||||
// plugged into compute_yi, compute_yi_with_zlist
|
||||
const int&, const int&, const int&, const real_type*) const;
|
||||
// plugged into compute_bi
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
real_type evaluate_beta_scaled(const int&, const int&, const int&, const int&, const int&, const int&, const int&, const int&,
|
||||
const Kokkos::View<real_type***, Kokkos::LayoutLeft, DeviceType> &);
|
||||
real_type evaluate_bi(const int&, const int&, const int&, const int&,
|
||||
const int&, const int&, const int&) const;
|
||||
// plugged into compute_yi, compute_yi_with_zlist
|
||||
template <bool chemsnap> KOKKOS_FORCEINLINE_FUNCTION
|
||||
real_type evaluate_beta_scaled(const int&, const int&, const int&, const int&, const int&, const int&, const int&) const;
|
||||
// plugged into compute_fused_deidrj_small, compute_fused_deidrj_large
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
real_type evaluate_duidrj_jbend(const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&,
|
||||
const WignerWrapper<real_type, vector_length>&, const complex&, const complex&, const real_type&,
|
||||
const int&, const int&, const int&, const int&);
|
||||
const int&, const int&, const int&) const;
|
||||
|
||||
// functions for bispectrum coefficients, CPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void pre_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team,const int&,const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_zi_cpu(const int&); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_yi_cpu(int,
|
||||
const Kokkos::View<real_type**, DeviceType> &beta); // ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_bi_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int); // ForceSNAP
|
||||
template <bool need_atomics> KOKKOS_INLINE_FUNCTION
|
||||
void compute_ui_cpu(const int&, const int&) const; // ForceSNAP
|
||||
|
||||
// functions for derivatives, CPU only
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); //ForceSNAP
|
||||
void compute_duidrj_cpu(const int&, const int&) const; //ForceSNAP
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_deidrj_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int); // ForceSNAP
|
||||
void compute_deidrj_cpu(const int&, const int&) const; // ForceSNAP
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real_type compute_sfac(real_type, real_type, real_type, real_type); // add_uarraytot, compute_duarray
|
||||
real_type compute_sfac(real_type, real_type, real_type, real_type) const; // add_uarraytot, compute_duarray
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
real_type compute_dsfac(real_type, real_type, real_type, real_type); // compute_duarray
|
||||
real_type compute_dsfac(real_type, real_type, real_type, real_type) const; // compute_duarray
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_s_dsfac(const real_type, const real_type, const real_type, const real_type, real_type&, real_type&); // compute_cayley_klein
|
||||
void compute_s_dsfac(const real_type, const real_type, const real_type, const real_type, real_type&, real_type&) const; // compute_cayley_klein
|
||||
|
||||
#ifdef TIMING_INFO
|
||||
double* timers;
|
||||
@ -283,37 +286,41 @@ class SNAKokkos {
|
||||
t_sna_2d dinnerij;
|
||||
t_sna_2i element;
|
||||
t_sna_3d dedr;
|
||||
int natom, nmax;
|
||||
int natom, natom_pad, nmax;
|
||||
|
||||
void grow_rij(int, int);
|
||||
|
||||
int twojmax, diagonalstyle;
|
||||
|
||||
// Input beta coefficients; aliases the object in PairSnapKokkos
|
||||
t_sna_2d_lr d_coeffelem;
|
||||
|
||||
// Beta for all atoms in list; aliases the object in PairSnapKokkos
|
||||
// for qSNAP the quadratic terms get accumulated into it
|
||||
// in compute_bi
|
||||
t_sna_2d d_beta;
|
||||
|
||||
// Structures for both the CPU, GPU backend
|
||||
t_sna_3d ulisttot_re;
|
||||
t_sna_3d ulisttot_im;
|
||||
t_sna_3c ulisttot; // un-folded ulisttot
|
||||
|
||||
t_sna_3c zlist;
|
||||
t_sna_3d blist;
|
||||
t_sna_3c_ll ulisttot;
|
||||
t_sna_3c_ll ulisttot_full; // un-folded ulisttot, cpu only
|
||||
t_sna_3c_ll zlist;
|
||||
|
||||
t_sna_3c_ll ulist;
|
||||
t_sna_3c_ll ylist;
|
||||
t_sna_3d ylist_re;
|
||||
t_sna_3d ylist_im;
|
||||
|
||||
// derivatives of data
|
||||
t_sna_4c3_ll dulist;
|
||||
// Structures for the CPU backend only
|
||||
t_sna_3c ulist_cpu;
|
||||
t_sna_4c3 dulist_cpu;
|
||||
|
||||
// Modified structures for GPU backend
|
||||
t_sna_3c_ll a_pack; // Cayley-Klein `a`
|
||||
t_sna_3c_ll b_pack; // `b`
|
||||
t_sna_4c_ll da_pack; // `da`
|
||||
t_sna_4c_ll db_pack; // `db`
|
||||
t_sna_4d_ll sfac_pack; // sfac, dsfac_{x,y,z}
|
||||
|
||||
t_sna_4d_ll ulisttot_re_pack; // split real,
|
||||
t_sna_4d_ll ulisttot_im_pack; // imag, AoSoA, flattened
|
||||
t_sna_4c_ll ulisttot_pack; // AoSoA layout
|
||||
t_sna_4c_ll zlist_pack; // AoSoA layout
|
||||
t_sna_4d_ll blist_pack;
|
||||
t_sna_4d_ll ylist_pack_re; // split real,
|
||||
t_sna_4d_ll ylist_pack_im; // imag AoSoA layout
|
||||
t_sna_2c a_gpu; // Cayley-Klein `a`
|
||||
t_sna_2c b_gpu; // `b`
|
||||
t_sna_3c da_gpu; // `da`
|
||||
t_sna_3c db_gpu; // `db`
|
||||
t_sna_3d sfac_gpu; // sfac, dsfac_{x,y,z}
|
||||
|
||||
int idxcg_max, idxu_max, idxu_half_max, idxu_cache_max, idxz_max, idxb_max;
|
||||
|
||||
@ -363,25 +370,11 @@ class SNAKokkos {
|
||||
inline
|
||||
void init_rootpqarray(); // init()
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void add_uarraytot(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int, const real_type&, const real_type&, const real_type&, const real_type&, const real_type&, int); // compute_ui
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_uarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
const real_type&, const real_type&, const real_type&,
|
||||
const real_type&, const real_type&); // compute_ui_cpu
|
||||
|
||||
|
||||
inline
|
||||
double deltacg(int, int, int); // init_clebsch_gordan
|
||||
|
||||
inline
|
||||
int compute_ncoeff(); // SNAKokkos()
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_duarray_cpu(const typename Kokkos::TeamPolicy<DeviceType>::member_type& team, int, int,
|
||||
const real_type&, const real_type&, const real_type&, // compute_duidrj_cpu
|
||||
const real_type&, const real_type&, const real_type&, const real_type&, const real_type&,
|
||||
const real_type&, const real_type&);
|
||||
|
||||
// Sets the style for the switching function
|
||||
// 0 = none
|
||||
@ -401,6 +394,9 @@ class SNAKokkos {
|
||||
real_type wself;
|
||||
int wselfall_flag;
|
||||
|
||||
// quadratic flag
|
||||
int quadratic_flag;
|
||||
|
||||
int bzero_flag; // 1 if bzero subtracted from barray
|
||||
Kokkos::View<real_type*, DeviceType> bzero; // array of B values for isolated atoms
|
||||
};
|
||||
@ -409,4 +405,3 @@ class SNAKokkos {
|
||||
|
||||
#include "sna_kokkos_impl.h"
|
||||
#endif
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user